screenshot / app.py
broadfield-dev's picture
Update app.py
6c870fa verified
import os
from flask import Flask, render_template, request, jsonify
from PIL import Image
import google.generativeai as genai
import base64
import io
import logging
import json
import re
import random
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
from flask_socketio import SocketIO, emit
app = Flask(__name__)
app.config['SECRET_KEY'] = str(random.randint(11111,99999999999999999999999999))
socketio = SocketIO(app, cors_allowed_origins="*") # For development; restrict in production.
# --- API Key Configuration ---
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
if not GOOGLE_API_KEY:
raise ValueError("GOOGLE_API_KEY environment variable not set.")
genai.configure(api_key=GOOGLE_API_KEY)
# --- Free-tier Gemini Models ---
AVAILABLE_MODELS = ["gemini-1.5-flash"]
DEFAULT_MODEL = "gemini-1.5-flash"
# --- Optimization Parameters ---
DEFAULT_MAX_HEIGHT = 1000
DEFAULT_IMAGE_FORMAT = "PNG"
DEFAULT_TIMEOUT = 10000
# --- Ensure Playwright uses the same cache path at runtime ---
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = "/app/.cache/playwright"
# --- Utility Functions ---
def screenshot_from_url(url: str, max_height: int = DEFAULT_MAX_HEIGHT, image_format: str = DEFAULT_IMAGE_FORMAT, timeout: int = DEFAULT_TIMEOUT) -> Image.Image:
app.logger.info(f"Taking screenshot of {url} with timeout {timeout}ms")
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
page = context.new_page()
try:
page.goto(url, timeout=timeout)
page.wait_for_load_state("networkidle", timeout=timeout)
except PlaywrightTimeoutError:
app.logger.warning(f"Timeout waiting for networkidle on {url}. Capturing partial screenshot.")
screenshot_bytes = page.screenshot(full_page=True, type=image_format.lower(), timeout=timeout)
browser.close()
socketio.emit('progress', {'percent': 25, 'message': 'Screenshot taken'})
image = Image.open(io.BytesIO(screenshot_bytes))
if image.height > max_height:
ratio = max_height / image.height
new_width = int(image.width * ratio)
image = image.resize((new_width, max_height), Image.LANCZOS)
app.logger.info("Screenshot captured successfully")
return image
except Exception as e:
app.logger.error(f"Error taking screenshot: {e}")
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
raise Exception(f"Failed to capture screenshot: {str(e)}")
def parse_model_response(response_text: str) -> dict:
app.logger.info("Parsing model response")
try:
# Use regex to find content within START and STOP tokens
pattern = r"==START_JSON==(.*?)==STOP_JSON=="
match = re.search(pattern, response_text, re.DOTALL)
if match:
json_content = match.group(1).strip()
files = json.loads(json_content)
if not isinstance(files, dict) or "files" not in files or not isinstance(files["files"], dict):
raise ValueError("Invalid JSON structure")
socketio.emit('progress', {'percent': 90, 'message': 'JSON parsed'})
else:
app.logger.warning("No JSON found within START/STOP tokens, attempting custom parsing")
pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
matches = re.findall(pattern, response_text, re.DOTALL)
if not matches:
raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
files = {
"files": {
filename.strip(): {"content": content.strip()}
for filename, content in matches
}
}
socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})
# Keep the original index.html content intact (full HTML with <link> and <script>)
app.logger.info("Preserving original index.html content")
return files
except json.JSONDecodeError:
app.logger.warning(f"Response is not valid JSON, attempting custom parsing. Response start: {response_text[:200]}")
pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
matches = re.findall(pattern, response_text, re.DOTALL)
if not matches:
raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
files = {
"files": {
filename.strip(): {"content": content.strip()}
for filename, content in matches
}
}
app.logger.info("Preserving original index.html content")
socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})
return files
except ValueError as e:
app.logger.error(f"ValueError in parsing: {e}")
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
raise
def image_to_html(image: Image.Image, model_name: str) -> tuple[dict, str, str]:
app.logger.info(f"Converting image to HTML with model {model_name}")
if image is None:
raise ValueError("Image is None.")
buffered = io.BytesIO()
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
prompt = """
Analyze this webpage screenshot and generate a complete and fully functional code package to recreate it **exactly** as it appears in the image. It is extremely important that the generated HTML, CSS, and JavaScript, when combined, will produce a visual output that is as close as possible to the original screenshot. Do not omit any important elements or styling details. Pay very close attention to fonts, colors, spacing, layout, and interactive elements. Preserve the original structure and avoid making unnecessary changes.
If there are buttons, forms, or other interactive elements, create JavaScript code that makes them functional or simulates the expected behavior.
Return your response in the following JSON format, enclosed within `==START_JSON==` and `==STOP_JSON==` tokens:
==START_JSON==
{
"files": {
"index.html": {"content": "<html>...</html>"},
"style.css": {"content": "body { ... }"},
"script.js": {"content": "console.log('...');"},
"and_any_other_files.needed": {"content": "file content"}
}
}
==STOP_JSON==
Include **all** necessary files to recreate the webpage, including HTML, CSS (either inline or in separate `.css` files), JavaScript (either inline or in separate `.js` files), and any other assets (images, fonts, etc.) if present in the screenshot. Name the main HTML file `index.html`. Use `style.css` for CSS and `script.js` for JavaScript, unless there are multiple CSS or JavaScript files, in which case, name them descriptively (e.g., `responsive.css`, `animations.js`). Prioritize external CSS and JS files for better organization unless inline styling or scripting is clearly more appropriate based on the screenshot.
Generate JavaScript code to handle basic interactions, such as button clicks, form submissions, and any other dynamic behavior visible in the screenshot.
It is CRITICAL that the HTML is well-formed and valid, that CSS styles are complete and accurate, and that JavaScript code functions as intended to produce the webpage shown in the screenshot. **The appearance and functionality of the outputted code should match the screenshot, and this is your primary objective.**
DO NOT INCLUDE any introductory or explanatory text outside of the JSON block. Only the JSON block should be present in your response.
"""
model = genai.GenerativeModel(model_name)
contents = [prompt, {"mime_type": "image/png", "data": img_str}]
try:
socketio.emit('progress', {'percent': 50, 'message': 'Sending request to Gemini...'})
response = model.generate_content(contents)
socketio.emit('progress', {'percent': 75, 'message': 'Received response from Gemini'})
app.logger.info(f"Raw Gemini API response: {response.text}")
files = parse_model_response(response.text)
# Extract file contents
css_content = files["files"].get("style.css", {}).get("content", "")
js_content = files["files"].get("script.js", {}).get("content", "")
index_html_content = files["files"].get("index.html", {}).get("content", "")
# Strip index.html to body content for combined HTML
body_content = index_html_content
body_match = re.search(r"<body[^>]*>(.*?)</body>", index_html_content, re.DOTALL | re.IGNORECASE)
if body_match:
body_content = body_match.group(1).strip()
app.logger.info("Extracted body content for combined HTML")
elif "<html" in index_html_content.lower():
content_match = re.search(r"<body[^>]*>(.*)$", index_html_content, re.DOTALL | re.IGNORECASE) or \
re.search(r"(?<=<html[^>]*>).*", index_html_content, re.DOTALL | re.IGNORECASE)
body_content = content_match.group(0).strip() if content_match else index_html_content
app.logger.warning("No explicit body tag found, using inferred content for combined HTML")
else:
app.logger.info("No HTML structure detected, using raw content as-is for combined HTML")
# Build combined HTML with inlined CSS and JS
combined_html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n"
combined_html += " <meta charset=\"UTF-8\">\n"
combined_html += " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
combined_html += " <title>Recreated Webpage</title>\n"
if css_content:
combined_html += " <style>\n"
combined_html += f" {css_content}\n"
combined_html += " </style>\n"
combined_html += "</head>\n<body>\n"
if body_content:
combined_html += f" {body_content}\n"
else:
combined_html += " <!-- No HTML content generated -->\n"
if js_content:
combined_html += " <script>\n"
combined_html += f" {js_content}\n"
combined_html += " </script>\n"
combined_html += "</body>\n</html>"
# For preview and standalone file, use the original full index.html content
html_content = index_html_content if index_html_content else "<p>No HTML file generated</p>"
if not index_html_content:
html_content = next(
(f["content"] for fname, f in files["files"].items() if fname.endswith(".html")),
"<p>No HTML file generated</p>"
)
app.logger.info("HTML and files extracted successfully")
socketio.emit('progress', {'percent': 100, 'message': 'Processing complete'})
return files, html_content, combined_html
except Exception as e:
app.logger.error(f"Error extracting files: {e}")
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
return {"files": {"error.txt": {"content": f"Error: {str(e)}"}}}, f"Error: {str(e)}", f"Error: {str(e)}"
# --- Flask Routes ---
@app.route('/')
def index():
return render_template('index.html', models=AVAILABLE_MODELS, default_model=DEFAULT_MODEL)
@app.route('/process_url', methods=['POST'])
def process_url():
data = request.form
url = data.get('url')
max_height = int(data.get('max_height', DEFAULT_MAX_HEIGHT))
image_format = data.get('image_format', DEFAULT_IMAGE_FORMAT)
timeout = int(data.get('timeout', DEFAULT_TIMEOUT))
model_name = data.get('model_name', DEFAULT_MODEL)
try:
files, html_content, combined_html = image_to_html(screenshot_from_url(url, max_height, image_format, timeout), model_name)
return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route('/process_image', methods=['POST'])
def process_image():
if 'image' not in request.files:
return jsonify({"error": "No image uploaded"}), 400
image_file = request.files['image']
model_name = request.form.get('model_name', DEFAULT_MODEL)
try:
files, html_content, combined_html = image_to_html(Image.open(image_file), model_name)
return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
except Exception as e:
return jsonify({"error": str(e)}), 500
@socketio.on('connect')
def test_connect():
app.logger.info("Client connected")
if __name__ == '__main__':
socketio.run(app, host='0.0.0.0', port=7860, debug=False, allow_unsafe_werkzeug=True)