Spaces:
Running
Running
import os | |
from flask import Flask, render_template, request, jsonify | |
from PIL import Image | |
import google.generativeai as genai | |
import base64 | |
import io | |
import logging | |
import json | |
import re | |
import random | |
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError | |
from flask_socketio import SocketIO, emit | |
app = Flask(__name__) | |
app.config['SECRET_KEY'] = str(random.randint(11111,99999999999999999999999999)) | |
socketio = SocketIO(app, cors_allowed_origins="*") # For development; restrict in production. | |
# --- API Key Configuration --- | |
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
if not GOOGLE_API_KEY: | |
raise ValueError("GOOGLE_API_KEY environment variable not set.") | |
genai.configure(api_key=GOOGLE_API_KEY) | |
# --- Free-tier Gemini Models --- | |
AVAILABLE_MODELS = ["gemini-1.5-flash"] | |
DEFAULT_MODEL = "gemini-1.5-flash" | |
# --- Optimization Parameters --- | |
DEFAULT_MAX_HEIGHT = 1000 | |
DEFAULT_IMAGE_FORMAT = "PNG" | |
DEFAULT_TIMEOUT = 10000 | |
# --- Ensure Playwright uses the same cache path at runtime --- | |
os.environ["PLAYWRIGHT_BROWSERS_PATH"] = "/app/.cache/playwright" | |
# --- Utility Functions --- | |
def screenshot_from_url(url: str, max_height: int = DEFAULT_MAX_HEIGHT, image_format: str = DEFAULT_IMAGE_FORMAT, timeout: int = DEFAULT_TIMEOUT) -> Image.Image: | |
app.logger.info(f"Taking screenshot of {url} with timeout {timeout}ms") | |
try: | |
with sync_playwright() as p: | |
browser = p.chromium.launch(headless=True) | |
context = browser.new_context() | |
page = context.new_page() | |
try: | |
page.goto(url, timeout=timeout) | |
page.wait_for_load_state("networkidle", timeout=timeout) | |
except PlaywrightTimeoutError: | |
app.logger.warning(f"Timeout waiting for networkidle on {url}. Capturing partial screenshot.") | |
screenshot_bytes = page.screenshot(full_page=True, type=image_format.lower(), timeout=timeout) | |
browser.close() | |
socketio.emit('progress', {'percent': 25, 'message': 'Screenshot taken'}) | |
image = Image.open(io.BytesIO(screenshot_bytes)) | |
if image.height > max_height: | |
ratio = max_height / image.height | |
new_width = int(image.width * ratio) | |
image = image.resize((new_width, max_height), Image.LANCZOS) | |
app.logger.info("Screenshot captured successfully") | |
return image | |
except Exception as e: | |
app.logger.error(f"Error taking screenshot: {e}") | |
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'}) | |
raise Exception(f"Failed to capture screenshot: {str(e)}") | |
def parse_model_response(response_text: str) -> dict: | |
app.logger.info("Parsing model response") | |
try: | |
# Use regex to find content within START and STOP tokens | |
pattern = r"==START_JSON==(.*?)==STOP_JSON==" | |
match = re.search(pattern, response_text, re.DOTALL) | |
if match: | |
json_content = match.group(1).strip() | |
files = json.loads(json_content) | |
if not isinstance(files, dict) or "files" not in files or not isinstance(files["files"], dict): | |
raise ValueError("Invalid JSON structure") | |
socketio.emit('progress', {'percent': 90, 'message': 'JSON parsed'}) | |
else: | |
app.logger.warning("No JSON found within START/STOP tokens, attempting custom parsing") | |
pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```" | |
matches = re.findall(pattern, response_text, re.DOTALL) | |
if not matches: | |
raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}") | |
files = { | |
"files": { | |
filename.strip(): {"content": content.strip()} | |
for filename, content in matches | |
} | |
} | |
socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'}) | |
# Keep the original index.html content intact (full HTML with <link> and <script>) | |
app.logger.info("Preserving original index.html content") | |
return files | |
except json.JSONDecodeError: | |
app.logger.warning(f"Response is not valid JSON, attempting custom parsing. Response start: {response_text[:200]}") | |
pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```" | |
matches = re.findall(pattern, response_text, re.DOTALL) | |
if not matches: | |
raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}") | |
files = { | |
"files": { | |
filename.strip(): {"content": content.strip()} | |
for filename, content in matches | |
} | |
} | |
app.logger.info("Preserving original index.html content") | |
socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'}) | |
return files | |
except ValueError as e: | |
app.logger.error(f"ValueError in parsing: {e}") | |
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'}) | |
raise | |
def image_to_html(image: Image.Image, model_name: str) -> tuple[dict, str, str]: | |
app.logger.info(f"Converting image to HTML with model {model_name}") | |
if image is None: | |
raise ValueError("Image is None.") | |
buffered = io.BytesIO() | |
image.save(buffered, format="PNG") | |
img_str = base64.b64encode(buffered.getvalue()).decode() | |
prompt = """ | |
Analyze this webpage screenshot and generate a complete and fully functional code package to recreate it **exactly** as it appears in the image. It is extremely important that the generated HTML, CSS, and JavaScript, when combined, will produce a visual output that is as close as possible to the original screenshot. Do not omit any important elements or styling details. Pay very close attention to fonts, colors, spacing, layout, and interactive elements. Preserve the original structure and avoid making unnecessary changes. | |
If there are buttons, forms, or other interactive elements, create JavaScript code that makes them functional or simulates the expected behavior. | |
Return your response in the following JSON format, enclosed within `==START_JSON==` and `==STOP_JSON==` tokens: | |
==START_JSON== | |
{ | |
"files": { | |
"index.html": {"content": "<html>...</html>"}, | |
"style.css": {"content": "body { ... }"}, | |
"script.js": {"content": "console.log('...');"}, | |
"and_any_other_files.needed": {"content": "file content"} | |
} | |
} | |
==STOP_JSON== | |
Include **all** necessary files to recreate the webpage, including HTML, CSS (either inline or in separate `.css` files), JavaScript (either inline or in separate `.js` files), and any other assets (images, fonts, etc.) if present in the screenshot. Name the main HTML file `index.html`. Use `style.css` for CSS and `script.js` for JavaScript, unless there are multiple CSS or JavaScript files, in which case, name them descriptively (e.g., `responsive.css`, `animations.js`). Prioritize external CSS and JS files for better organization unless inline styling or scripting is clearly more appropriate based on the screenshot. | |
Generate JavaScript code to handle basic interactions, such as button clicks, form submissions, and any other dynamic behavior visible in the screenshot. | |
It is CRITICAL that the HTML is well-formed and valid, that CSS styles are complete and accurate, and that JavaScript code functions as intended to produce the webpage shown in the screenshot. **The appearance and functionality of the outputted code should match the screenshot, and this is your primary objective.** | |
DO NOT INCLUDE any introductory or explanatory text outside of the JSON block. Only the JSON block should be present in your response. | |
""" | |
model = genai.GenerativeModel(model_name) | |
contents = [prompt, {"mime_type": "image/png", "data": img_str}] | |
try: | |
socketio.emit('progress', {'percent': 50, 'message': 'Sending request to Gemini...'}) | |
response = model.generate_content(contents) | |
socketio.emit('progress', {'percent': 75, 'message': 'Received response from Gemini'}) | |
app.logger.info(f"Raw Gemini API response: {response.text}") | |
files = parse_model_response(response.text) | |
# Extract file contents | |
css_content = files["files"].get("style.css", {}).get("content", "") | |
js_content = files["files"].get("script.js", {}).get("content", "") | |
index_html_content = files["files"].get("index.html", {}).get("content", "") | |
# Strip index.html to body content for combined HTML | |
body_content = index_html_content | |
body_match = re.search(r"<body[^>]*>(.*?)</body>", index_html_content, re.DOTALL | re.IGNORECASE) | |
if body_match: | |
body_content = body_match.group(1).strip() | |
app.logger.info("Extracted body content for combined HTML") | |
elif "<html" in index_html_content.lower(): | |
content_match = re.search(r"<body[^>]*>(.*)$", index_html_content, re.DOTALL | re.IGNORECASE) or \ | |
re.search(r"(?<=<html[^>]*>).*", index_html_content, re.DOTALL | re.IGNORECASE) | |
body_content = content_match.group(0).strip() if content_match else index_html_content | |
app.logger.warning("No explicit body tag found, using inferred content for combined HTML") | |
else: | |
app.logger.info("No HTML structure detected, using raw content as-is for combined HTML") | |
# Build combined HTML with inlined CSS and JS | |
combined_html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n" | |
combined_html += " <meta charset=\"UTF-8\">\n" | |
combined_html += " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n" | |
combined_html += " <title>Recreated Webpage</title>\n" | |
if css_content: | |
combined_html += " <style>\n" | |
combined_html += f" {css_content}\n" | |
combined_html += " </style>\n" | |
combined_html += "</head>\n<body>\n" | |
if body_content: | |
combined_html += f" {body_content}\n" | |
else: | |
combined_html += " <!-- No HTML content generated -->\n" | |
if js_content: | |
combined_html += " <script>\n" | |
combined_html += f" {js_content}\n" | |
combined_html += " </script>\n" | |
combined_html += "</body>\n</html>" | |
# For preview and standalone file, use the original full index.html content | |
html_content = index_html_content if index_html_content else "<p>No HTML file generated</p>" | |
if not index_html_content: | |
html_content = next( | |
(f["content"] for fname, f in files["files"].items() if fname.endswith(".html")), | |
"<p>No HTML file generated</p>" | |
) | |
app.logger.info("HTML and files extracted successfully") | |
socketio.emit('progress', {'percent': 100, 'message': 'Processing complete'}) | |
return files, html_content, combined_html | |
except Exception as e: | |
app.logger.error(f"Error extracting files: {e}") | |
socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'}) | |
return {"files": {"error.txt": {"content": f"Error: {str(e)}"}}}, f"Error: {str(e)}", f"Error: {str(e)}" | |
# --- Flask Routes --- | |
def index(): | |
return render_template('index.html', models=AVAILABLE_MODELS, default_model=DEFAULT_MODEL) | |
def process_url(): | |
data = request.form | |
url = data.get('url') | |
max_height = int(data.get('max_height', DEFAULT_MAX_HEIGHT)) | |
image_format = data.get('image_format', DEFAULT_IMAGE_FORMAT) | |
timeout = int(data.get('timeout', DEFAULT_TIMEOUT)) | |
model_name = data.get('model_name', DEFAULT_MODEL) | |
try: | |
files, html_content, combined_html = image_to_html(screenshot_from_url(url, max_height, image_format, timeout), model_name) | |
return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
def process_image(): | |
if 'image' not in request.files: | |
return jsonify({"error": "No image uploaded"}), 400 | |
image_file = request.files['image'] | |
model_name = request.form.get('model_name', DEFAULT_MODEL) | |
try: | |
files, html_content, combined_html = image_to_html(Image.open(image_file), model_name) | |
return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
def test_connect(): | |
app.logger.info("Client connected") | |
if __name__ == '__main__': | |
socketio.run(app, host='0.0.0.0', port=7860, debug=False, allow_unsafe_werkzeug=True) |