Spaces:

broadfield-dev
/

screenshot

Running

File size: 13,038 Bytes

be8b6de
a457c97
be8b6de
 
 
 
a98acba
360569f
 
6935192
3b38290
7d2610b
be8b6de
6741cd4
6c870fa
534e13f
6741cd4
be8b6de
 
 
6741cd4
be8b6de
 
 
df66a58
c485c19
df66a58
be8b6de
 
360569f
be8b6de
534e13f
1a7b6de
 
 
be8b6de
6741cd4
d1685d3
4084cf0
be8b6de
 
a98acba
 
be8b6de
3b38290
 
 
 
4084cf0
d1685d3
be8b6de
7d2610b
534e13f
7d2610b
 
 
 
 
 
 
be8b6de
4084cf0
7d2610b
6741cd4
a98acba
360569f
4084cf0
360569f
fcfb056
 
 
 
 
 
 
37adb99
fcfb056
7d2610b
fcfb056
4084cf0
fcfb056
 
 
37adb99
fcfb056
 
 
 
 
 
7d2610b
534e13f
8871397
 
534e13f
fcfb056
360569f
37adb99
360569f
 
 
37adb99
360569f
 
 
3452e4e
360569f
 
8871397
7d2610b
360569f
3452e4e
7d2610b
 
 
 
7e3ef54
4084cf0
be8b6de
6741cd4
4084cf0
be8b6de
a98acba
be8b6de
 
 
7e3ef54
 
 
a622f66
fcfb056
4084cf0
fcfb056
360569f
 
a622f66
 
 
 
360569f
 
fcfb056
4084cf0
7e3ef54
 
 
 
 
 
a622f66
be8b6de
4084cf0
df66a58
a98acba
4084cf0
be8b6de
7d2610b
be8b6de
7d2610b
3452e4e
534e13f
3452e4e
534e13f
 
 
 
3452e4e
8871397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3452e4e
 
 
 
64bf601
3452e4e
534e13f
3452e4e
80d93f2
3452e4e
8871397
 
3452e4e
 
64bf601
80d93f2
3452e4e
534e13f
3452e4e
7e3ef54
4084cf0
8871397
 
 
a622f66
534e13f
a622f66
3452e4e
80d93f2
4084cf0
7d2610b
534e13f
be8b6de
4084cf0
7d2610b
7e3ef54
3452e4e
6741cd4
 
 
 
be8b6de
6741cd4
 
7d2610b
6741cd4
 
 
534e13f
6741cd4
 
 
7e3ef54
 
6741cd4
 
 
 
 
 
 
4084cf0
6741cd4
 
4084cf0
6741cd4
7e3ef54
 
6741cd4
 
 
3452e4e
7d2610b
 
 
6741cd4
3452e4e

import os
from flask import Flask, render_template, request, jsonify
from PIL import Image
import google.generativeai as genai
import base64
import io
import logging
import json
import re
import random
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
from flask_socketio import SocketIO, emit

app = Flask(__name__)
app.config['SECRET_KEY'] = str(random.randint(11111,99999999999999999999999999))
socketio = SocketIO(app, cors_allowed_origins="*")  # For development; restrict in production.

# --- API Key Configuration ---
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
    raise ValueError("GOOGLE_API_KEY environment variable not set.")

genai.configure(api_key=GOOGLE_API_KEY)

# --- Free-tier Gemini Models ---
AVAILABLE_MODELS = ["gemini-1.5-flash"]
DEFAULT_MODEL = "gemini-1.5-flash"

# --- Optimization Parameters ---
DEFAULT_MAX_HEIGHT = 1000
DEFAULT_IMAGE_FORMAT = "PNG"
DEFAULT_TIMEOUT = 10000

# --- Ensure Playwright uses the same cache path at runtime ---
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = "/app/.cache/playwright"

# --- Utility Functions ---
def screenshot_from_url(url: str, max_height: int = DEFAULT_MAX_HEIGHT, image_format: str = DEFAULT_IMAGE_FORMAT, timeout: int = DEFAULT_TIMEOUT) -> Image.Image:
    app.logger.info(f"Taking screenshot of {url} with timeout {timeout}ms")
    try:
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            context = browser.new_context()
            page = context.new_page()
            try:
                page.goto(url, timeout=timeout)
                page.wait_for_load_state("networkidle", timeout=timeout)
            except PlaywrightTimeoutError:
                app.logger.warning(f"Timeout waiting for networkidle on {url}. Capturing partial screenshot.")
            screenshot_bytes = page.screenshot(full_page=True, type=image_format.lower(), timeout=timeout)
            browser.close()

            socketio.emit('progress', {'percent': 25, 'message': 'Screenshot taken'})
            image = Image.open(io.BytesIO(screenshot_bytes))
            if image.height > max_height:
                ratio = max_height / image.height
                new_width = int(image.width * ratio)
                image = image.resize((new_width, max_height), Image.LANCZOS)
            app.logger.info("Screenshot captured successfully")
            return image
    except Exception as e:
        app.logger.error(f"Error taking screenshot: {e}")
        socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
        raise Exception(f"Failed to capture screenshot: {str(e)}")

def parse_model_response(response_text: str) -> dict:
    app.logger.info("Parsing model response")
    try:
        # Use regex to find content within START and STOP tokens
        pattern = r"==START_JSON==(.*?)==STOP_JSON=="
        match = re.search(pattern, response_text, re.DOTALL)

        if match:
            json_content = match.group(1).strip()
            files = json.loads(json_content)
            if not isinstance(files, dict) or "files" not in files or not isinstance(files["files"], dict):
                raise ValueError("Invalid JSON structure")
            socketio.emit('progress', {'percent': 90, 'message': 'JSON parsed'})
        else:
            app.logger.warning("No JSON found within START/STOP tokens, attempting custom parsing")
            pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
            matches = re.findall(pattern, response_text, re.DOTALL)
            if not matches:
                raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
            files = {
                "files": {
                    filename.strip(): {"content": content.strip()}
                    for filename, content in matches
                }
            }
            socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})

        # Keep the original index.html content intact (full HTML with <link> and <script>)
        app.logger.info("Preserving original index.html content")
        return files

    except json.JSONDecodeError:
        app.logger.warning(f"Response is not valid JSON, attempting custom parsing. Response start: {response_text[:200]}")
        pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
        matches = re.findall(pattern, response_text, re.DOTALL)
        if not matches:
            raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
        files = {
            "files": {
                filename.strip(): {"content": content.strip()}
                for filename, content in matches
            }
        }
        app.logger.info("Preserving original index.html content")
        socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})
        return files
    except ValueError as e:
        app.logger.error(f"ValueError in parsing: {e}")
        socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
        raise

def image_to_html(image: Image.Image, model_name: str) -> tuple[dict, str, str]:
    app.logger.info(f"Converting image to HTML with model {model_name}")
    if image is None:
        raise ValueError("Image is None.")

    buffered = io.BytesIO()
    image.save(buffered, format="PNG")
    img_str = base64.b64encode(buffered.getvalue()).decode()

    prompt = """
    Analyze this webpage screenshot and generate a complete and fully functional code package to recreate it **exactly** as it appears in the image. It is extremely important that the generated HTML, CSS, and JavaScript, when combined, will produce a visual output that is as close as possible to the original screenshot. Do not omit any important elements or styling details. Pay very close attention to fonts, colors, spacing, layout, and interactive elements. Preserve the original structure and avoid making unnecessary changes.

    If there are buttons, forms, or other interactive elements, create JavaScript code that makes them functional or simulates the expected behavior.

    Return your response in the following JSON format, enclosed within `==START_JSON==` and `==STOP_JSON==` tokens:

    ==START_JSON==
    {
        "files": {
            "index.html": {"content": "<html>...</html>"},
            "style.css": {"content": "body { ... }"},
            "script.js": {"content": "console.log('...');"},
            "and_any_other_files.needed": {"content": "file content"}
        }
    }
    ==STOP_JSON==

    Include **all** necessary files to recreate the webpage, including HTML, CSS (either inline or in separate `.css` files), JavaScript (either inline or in separate `.js` files), and any other assets (images, fonts, etc.) if present in the screenshot.  Name the main HTML file `index.html`. Use `style.css` for CSS and `script.js` for JavaScript, unless there are multiple CSS or JavaScript files, in which case, name them descriptively (e.g., `responsive.css`, `animations.js`). Prioritize external CSS and JS files for better organization unless inline styling or scripting is clearly more appropriate based on the screenshot.

    Generate JavaScript code to handle basic interactions, such as button clicks, form submissions, and any other dynamic behavior visible in the screenshot.

    It is CRITICAL that the HTML is well-formed and valid, that CSS styles are complete and accurate, and that JavaScript code functions as intended to produce the webpage shown in the screenshot.  **The appearance and functionality of the outputted code should match the screenshot, and this is your primary objective.**

    DO NOT INCLUDE any introductory or explanatory text outside of the JSON block. Only the JSON block should be present in your response.
    """

    model = genai.GenerativeModel(model_name)
    contents = [prompt, {"mime_type": "image/png", "data": img_str}]

    try:
        socketio.emit('progress', {'percent': 50, 'message': 'Sending request to Gemini...'})
        response = model.generate_content(contents)
        socketio.emit('progress', {'percent': 75, 'message': 'Received response from Gemini'})
        app.logger.info(f"Raw Gemini API response: {response.text}")
        files = parse_model_response(response.text)

        # Extract file contents
        css_content = files["files"].get("style.css", {}).get("content", "")
        js_content = files["files"].get("script.js", {}).get("content", "")
        index_html_content = files["files"].get("index.html", {}).get("content", "")

        # Strip index.html to body content for combined HTML
        body_content = index_html_content
        body_match = re.search(r"<body[^>]*>(.*?)</body>", index_html_content, re.DOTALL | re.IGNORECASE)
        if body_match:
            body_content = body_match.group(1).strip()
            app.logger.info("Extracted body content for combined HTML")
        elif "<html" in index_html_content.lower():
            content_match = re.search(r"<body[^>]*>(.*)$", index_html_content, re.DOTALL | re.IGNORECASE) or \
                            re.search(r"(?<=<html[^>]*>).*", index_html_content, re.DOTALL | re.IGNORECASE)
            body_content = content_match.group(0).strip() if content_match else index_html_content
            app.logger.warning("No explicit body tag found, using inferred content for combined HTML")
        else:
            app.logger.info("No HTML structure detected, using raw content as-is for combined HTML")

        # Build combined HTML with inlined CSS and JS
        combined_html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n"
        combined_html += "    <meta charset=\"UTF-8\">\n"
        combined_html += "    <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
        combined_html += "    <title>Recreated Webpage</title>\n"
        if css_content:
            combined_html += "    <style>\n"
            combined_html += f"        {css_content}\n"
            combined_html += "    </style>\n"
        combined_html += "</head>\n<body>\n"
        
        if body_content:
            combined_html += f"    {body_content}\n"
        else:
            combined_html += "    <!-- No HTML content generated -->\n"

        if js_content:
            combined_html += "    <script>\n"
            combined_html += f"        {js_content}\n"
            combined_html += "    </script>\n"
        combined_html += "</body>\n</html>"

        # For preview and standalone file, use the original full index.html content
        html_content = index_html_content if index_html_content else "<p>No HTML file generated</p>"
        if not index_html_content:
            html_content = next(
                (f["content"] for fname, f in files["files"].items() if fname.endswith(".html")),
                "<p>No HTML file generated</p>"
            )

        app.logger.info("HTML and files extracted successfully")
        socketio.emit('progress', {'percent': 100, 'message': 'Processing complete'})
        return files, html_content, combined_html
    except Exception as e:
        app.logger.error(f"Error extracting files: {e}")
        socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
        return {"files": {"error.txt": {"content": f"Error: {str(e)}"}}}, f"Error: {str(e)}", f"Error: {str(e)}"

# --- Flask Routes ---
@app.route('/')
def index():
    return render_template('index.html', models=AVAILABLE_MODELS, default_model=DEFAULT_MODEL)

@app.route('/process_url', methods=['POST'])
def process_url():
    data = request.form
    url = data.get('url')
    max_height = int(data.get('max_height', DEFAULT_MAX_HEIGHT))
    image_format = data.get('image_format', DEFAULT_IMAGE_FORMAT)
    timeout = int(data.get('timeout', DEFAULT_TIMEOUT))
    model_name = data.get('model_name', DEFAULT_MODEL)

    try:
        files, html_content, combined_html = image_to_html(screenshot_from_url(url, max_height, image_format, timeout), model_name)
        return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route('/process_image', methods=['POST'])
def process_image():
    if 'image' not in request.files:
        return jsonify({"error": "No image uploaded"}), 400

    image_file = request.files['image']
    model_name = request.form.get('model_name', DEFAULT_MODEL)

    try:
        files, html_content, combined_html = image_to_html(Image.open(image_file), model_name)
        return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

@socketio.on('connect')
def test_connect():
    app.logger.info("Client connected")

if __name__ == '__main__':
    socketio.run(app, host='0.0.0.0', port=7860, debug=False, allow_unsafe_werkzeug=True)