Spaces:

broadfield-dev
/

screenshot

Running

App Files Files Community

screenshot / app.py

broadfield-dev

Update app.py

6c870fa verified 4 months ago

raw

history blame contribute delete

13 kB

	import os
	from flask import Flask, render_template, request, jsonify
	from PIL import Image
	import google.generativeai as genai
	import base64
	import io
	import logging
	import json
	import re
	import random
	from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
	from flask_socketio import SocketIO, emit

	app = Flask(__name__)
	app.config['SECRET_KEY'] = str(random.randint(11111,99999999999999999999999999))
	socketio = SocketIO(app, cors_allowed_origins="*") # For development; restrict in production.

	# --- API Key Configuration ---
	GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
	if not GOOGLE_API_KEY:
	raise ValueError("GOOGLE_API_KEY environment variable not set.")

	genai.configure(api_key=GOOGLE_API_KEY)

	# --- Free-tier Gemini Models ---
	AVAILABLE_MODELS = ["gemini-1.5-flash"]
	DEFAULT_MODEL = "gemini-1.5-flash"

	# --- Optimization Parameters ---
	DEFAULT_MAX_HEIGHT = 1000
	DEFAULT_IMAGE_FORMAT = "PNG"
	DEFAULT_TIMEOUT = 10000

	# --- Ensure Playwright uses the same cache path at runtime ---
	os.environ["PLAYWRIGHT_BROWSERS_PATH"] = "/app/.cache/playwright"

	# --- Utility Functions ---
	def screenshot_from_url(url: str, max_height: int = DEFAULT_MAX_HEIGHT, image_format: str = DEFAULT_IMAGE_FORMAT, timeout: int = DEFAULT_TIMEOUT) -> Image.Image:
	app.logger.info(f"Taking screenshot of {url} with timeout {timeout}ms")
	try:
	with sync_playwright() as p:
	browser = p.chromium.launch(headless=True)
	context = browser.new_context()
	page = context.new_page()
	try:
	page.goto(url, timeout=timeout)
	page.wait_for_load_state("networkidle", timeout=timeout)
	except PlaywrightTimeoutError:
	app.logger.warning(f"Timeout waiting for networkidle on {url}. Capturing partial screenshot.")
	screenshot_bytes = page.screenshot(full_page=True, type=image_format.lower(), timeout=timeout)
	browser.close()

	socketio.emit('progress', {'percent': 25, 'message': 'Screenshot taken'})
	image = Image.open(io.BytesIO(screenshot_bytes))
	if image.height > max_height:
	ratio = max_height / image.height
	new_width = int(image.width * ratio)
	image = image.resize((new_width, max_height), Image.LANCZOS)
	app.logger.info("Screenshot captured successfully")
	return image
	except Exception as e:
	app.logger.error(f"Error taking screenshot: {e}")
	socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
	raise Exception(f"Failed to capture screenshot: {str(e)}")

	def parse_model_response(response_text: str) -> dict:
	app.logger.info("Parsing model response")
	try:
	# Use regex to find content within START and STOP tokens
	pattern = r"==START_JSON==(.*?)==STOP_JSON=="
	match = re.search(pattern, response_text, re.DOTALL)

	if match:
	json_content = match.group(1).strip()
	files = json.loads(json_content)
	if not isinstance(files, dict) or "files" not in files or not isinstance(files["files"], dict):
	raise ValueError("Invalid JSON structure")
	socketio.emit('progress', {'percent': 90, 'message': 'JSON parsed'})
	else:
	app.logger.warning("No JSON found within START/STOP tokens, attempting custom parsing")
	pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
	matches = re.findall(pattern, response_text, re.DOTALL)
	if not matches:
	raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
	files = {
	"files": {
	filename.strip(): {"content": content.strip()}
	for filename, content in matches
	}
	}
	socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})

	# Keep the original index.html content intact (full HTML with <link> and <script>)
	app.logger.info("Preserving original index.html content")
	return files

	except json.JSONDecodeError:
	app.logger.warning(f"Response is not valid JSON, attempting custom parsing. Response start: {response_text[:200]}")
	pattern = r"### (.+?)\n```(?:\w+)?\n(.*?)\n```"
	matches = re.findall(pattern, response_text, re.DOTALL)
	if not matches:
	raise ValueError(f"Could not parse response into files. Response start: {response_text[:200]}")
	files = {
	"files": {
	filename.strip(): {"content": content.strip()}
	for filename, content in matches
	}
	}
	app.logger.info("Preserving original index.html content")
	socketio.emit('progress', {'percent': 90, 'message': 'Parsed with fallback method'})
	return files
	except ValueError as e:
	app.logger.error(f"ValueError in parsing: {e}")
	socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
	raise

	def image_to_html(image: Image.Image, model_name: str) -> tuple[dict, str, str]:
	app.logger.info(f"Converting image to HTML with model {model_name}")
	if image is None:
	raise ValueError("Image is None.")

	buffered = io.BytesIO()
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	prompt = """
	Analyze this webpage screenshot and generate a complete and fully functional code package to recreate it exactly as it appears in the image. It is extremely important that the generated HTML, CSS, and JavaScript, when combined, will produce a visual output that is as close as possible to the original screenshot. Do not omit any important elements or styling details. Pay very close attention to fonts, colors, spacing, layout, and interactive elements. Preserve the original structure and avoid making unnecessary changes.

	If there are buttons, forms, or other interactive elements, create JavaScript code that makes them functional or simulates the expected behavior.

	Return your response in the following JSON format, enclosed within `==START_JSON==` and `==STOP_JSON==` tokens:

	==START_JSON==
	{
	"files": {
	"index.html": {"content": "<html>...</html>"},
	"style.css": {"content": "body { ... }"},
	"script.js": {"content": "console.log('...');"},
	"and_any_other_files.needed": {"content": "file content"}
	}
	}
	==STOP_JSON==

	Include all necessary files to recreate the webpage, including HTML, CSS (either inline or in separate `.css` files), JavaScript (either inline or in separate `.js` files), and any other assets (images, fonts, etc.) if present in the screenshot. Name the main HTML file `index.html`. Use `style.css` for CSS and `script.js` for JavaScript, unless there are multiple CSS or JavaScript files, in which case, name them descriptively (e.g., `responsive.css`, `animations.js`). Prioritize external CSS and JS files for better organization unless inline styling or scripting is clearly more appropriate based on the screenshot.

	Generate JavaScript code to handle basic interactions, such as button clicks, form submissions, and any other dynamic behavior visible in the screenshot.

	It is CRITICAL that the HTML is well-formed and valid, that CSS styles are complete and accurate, and that JavaScript code functions as intended to produce the webpage shown in the screenshot. The appearance and functionality of the outputted code should match the screenshot, and this is your primary objective.

	DO NOT INCLUDE any introductory or explanatory text outside of the JSON block. Only the JSON block should be present in your response.
	"""

	model = genai.GenerativeModel(model_name)
	contents = [prompt, {"mime_type": "image/png", "data": img_str}]

	try:
	socketio.emit('progress', {'percent': 50, 'message': 'Sending request to Gemini...'})
	response = model.generate_content(contents)
	socketio.emit('progress', {'percent': 75, 'message': 'Received response from Gemini'})
	app.logger.info(f"Raw Gemini API response: {response.text}")
	files = parse_model_response(response.text)

	# Extract file contents
	css_content = files["files"].get("style.css", {}).get("content", "")
	js_content = files["files"].get("script.js", {}).get("content", "")
	index_html_content = files["files"].get("index.html", {}).get("content", "")

	# Strip index.html to body content for combined HTML
	body_content = index_html_content
	body_match = re.search(r"<body[^>]>(.?)</body>", index_html_content, re.DOTALL \| re.IGNORECASE)
	if body_match:
	body_content = body_match.group(1).strip()
	app.logger.info("Extracted body content for combined HTML")
	elif "<html" in index_html_content.lower():
	content_match = re.search(r"<body[^>]>(.)$", index_html_content, re.DOTALL \| re.IGNORECASE) or \
	re.search(r"(?<=<html[^>]>).", index_html_content, re.DOTALL \| re.IGNORECASE)
	body_content = content_match.group(0).strip() if content_match else index_html_content
	app.logger.warning("No explicit body tag found, using inferred content for combined HTML")
	else:
	app.logger.info("No HTML structure detected, using raw content as-is for combined HTML")

	# Build combined HTML with inlined CSS and JS
	combined_html = "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n"
	combined_html += " <meta charset=\"UTF-8\">\n"
	combined_html += " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
	combined_html += " <title>Recreated Webpage</title>\n"
	if css_content:
	combined_html += " <style>\n"
	combined_html += f" {css_content}\n"
	combined_html += " </style>\n"
	combined_html += "</head>\n<body>\n"

	if body_content:
	combined_html += f" {body_content}\n"
	else:
	combined_html += " <!-- No HTML content generated -->\n"

	if js_content:
	combined_html += " <script>\n"
	combined_html += f" {js_content}\n"
	combined_html += " </script>\n"
	combined_html += "</body>\n</html>"

	# For preview and standalone file, use the original full index.html content
	html_content = index_html_content if index_html_content else "<p>No HTML file generated</p>"
	if not index_html_content:
	html_content = next(
	(f["content"] for fname, f in files["files"].items() if fname.endswith(".html")),
	"<p>No HTML file generated</p>"
	)

	app.logger.info("HTML and files extracted successfully")
	socketio.emit('progress', {'percent': 100, 'message': 'Processing complete'})
	return files, html_content, combined_html
	except Exception as e:
	app.logger.error(f"Error extracting files: {e}")
	socketio.emit('progress', {'percent': 100, 'message': f'Error: {str(e)}'})
	return {"files": {"error.txt": {"content": f"Error: {str(e)}"}}}, f"Error: {str(e)}", f"Error: {str(e)}"

	# --- Flask Routes ---
	@app.route('/')
	def index():
	return render_template('index.html', models=AVAILABLE_MODELS, default_model=DEFAULT_MODEL)

	@app.route('/process_url', methods=['POST'])
	def process_url():
	data = request.form
	url = data.get('url')
	max_height = int(data.get('max_height', DEFAULT_MAX_HEIGHT))
	image_format = data.get('image_format', DEFAULT_IMAGE_FORMAT)
	timeout = int(data.get('timeout', DEFAULT_TIMEOUT))
	model_name = data.get('model_name', DEFAULT_MODEL)

	try:
	files, html_content, combined_html = image_to_html(screenshot_from_url(url, max_height, image_format, timeout), model_name)
	return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route('/process_image', methods=['POST'])
	def process_image():
	if 'image' not in request.files:
	return jsonify({"error": "No image uploaded"}), 400

	image_file = request.files['image']
	model_name = request.form.get('model_name', DEFAULT_MODEL)

	try:
	files, html_content, combined_html = image_to_html(Image.open(image_file), model_name)
	return jsonify({"files": files["files"], "preview": html_content, "combined_html": combined_html})
	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@socketio.on('connect')
	def test_connect():
	app.logger.info("Client connected")

	if __name__ == '__main__':
	socketio.run(app, host='0.0.0.0', port=7860, debug=False, allow_unsafe_werkzeug=True)