Spaces:

MINEOGO
/

pro-zephyr-coder

Sleeping

App Files Files Community

pro-zephyr-coder / app.py

MINEOGO

Upload app.py

aa6f369 verified 8 months ago

raw

history blame

12.9 kB

	import gradio as gr
	from huggingface_hub import InferenceClient
	import os
	import re # For post-processing fallback

	# --- Configuration ---
	API_TOKEN = os.getenv("HF_TOKEN", None)
	MODEL = "HuggingFaceH4/zephyr-7b-beta" # Or choose another suitable model

	# --- Initialize Inference Client ---
	try:
	print(f"Attempting to initialize Inference Client for model: {MODEL}")
	if API_TOKEN:
	print("Using HF Token found in environment.")
	client = InferenceClient(model=MODEL, token=API_TOKEN)
	else:
	print("HF Token not found. Running without token (may lead to rate limits).")
	client = InferenceClient(model=MODEL)
	print("Inference Client initialized successfully.")
	except Exception as e:
	print(f"Error initializing Inference Client: {e}")
	raise gr.Error(f"Failed to initialize the AI model client for '{MODEL}'. Check model name, network, and HF_TOKEN secret if applicable. Error: {e}")

	# --- Core Code Generation Function ---
	def generate_code(
	prompt: str,
	backend_choice: str,
	file_structure: str,
	max_tokens: int,
	temperature: float,
	top_p: float,
	):
	"""
	Generates website code based on user prompt and choices.
	Aims for richer CSS, emphasizes completeness, and strictly outputs ONLY raw code.
	Yields the code token by token for live updates.
	"""
	print(f"--- Generating Code ---")
	print(f"Prompt: {prompt[:100]}...")
	print(f"Backend Context: {backend_choice}")
	print(f"File Structure: {file_structure}")
	# Log the max_tokens value being used for this request
	print(f"Settings: Max Tokens={max_tokens}, Temp={temperature}, Top-P={top_p}")

	# --- Dynamically Build System Message ---
	if file_structure == "Single File":
	file_structure_instruction = (
	"- File Structure is 'Single File': Generate ONLY a single, complete `index.html` file. "
	"Embed ALL CSS directly within `<style>` tags inside the `<head>`. "
	"Embed ALL necessary JavaScript directly within `<script>` tags just before the closing `</body>` tag. "
	"Do NOT use file separation markers."
	)
	else: # Multiple Files
	file_structure_instruction = (
	"- File Structure is 'Multiple Files': Generate code for `index.html`, `style.css`, and `script.js` (if JS is needed). "
	"Use these EXACT markers: `<!-- index.html -->`, `/* style.css */`, `// script.js` (only if JS is needed).\n"
	"- Place the corresponding code directly after each marker.\n"
	"- Inside `index.html`, link `style.css` in the `<head>` and include `script.js` before `</body>` if generated."
	)

	# Assemble the full system message - Emphasizing completeness and NO premature stopping
	system_message = (
	"You are an expert frontend web developer AI. Your primary goal is to generate complete, visually appealing, modern, and well-styled frontend code (HTML, CSS, client-side JS) based only on the user's description and selected options. "
	"Follow ALL these rules with EXTREME STRICTNESS:\n"
	"1. STYLE & DETAIL: Generate rich, detailed code. Use plenty of CSS for layout, spacing, typography, colors, and effects. Aim for a high-quality visual result.\n"
	"2. COMPLETENESS: Generate the entire requested code structure. Ensure all files/sections are fully generated and properly closed (e.g., closing HTML tags `</html>`, CSS braces `}`, script tags `</script>`). DO NOT STOP GENERATING PREMATURELY. Finish the whole task.\n"
	"3. RAW CODE ONLY: Your entire response MUST consist only of the requested source code. NO extra text, NO explanations, NO apologies, NO introductions, NO summaries, NO comments about the code (except standard code comments), NO MARKDOWN formatting (like ```html), and *ABSOLUTELY NO CONVERSATIONAL TEXT OR TAGS* like `<\|user\|>` or `<\|assistant\|>`.\n"
	"4. IMMEDIATE CODE START: The response MUST begin directly with the first character of the code (`<!DOCTYPE html>` or `<!-- index.html -->`).\n"
	"5. IMMEDIATE CODE END: The response MUST end immediately after the very last character of the generated code (`</html>`, `}`, `;`, etc.). DO NOT add any text, spaces, or newlines after the code concludes.\n"
	"6. MANDATORY `index.html`: Always generate the content for `index.html`.\n"
	f"7. FILE STRUCTURE ({file_structure}): Strictly follow ONLY the instructions for the selected file structure:\n"
	f" {file_structure_instruction}\n"
	"8. BACKEND CONTEXT ({backend_choice}): Use as a hint for frontend structure only. Generate ONLY frontend code.\n"
	"9. FRONTEND ONLY: Do NOT generate server-side code.\n"
	"10. ACCURACY: Generate functional code addressing the user's prompt.\n\n"
	"REMEMBER: Create COMPLETE, visually appealing code. Output ONLY raw code. START immediately with code. FINISH the entire code generation. END immediately with code. NO extra text/tags."
	)

	# --- Construct the messages for the API ---
	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": f"Generate the complete website frontend code for: {prompt}"}
	]

	# --- Stream the response from the API ---
	response_stream = ""
	full_response_for_cleaning = ""
	token_count = 0 # Add a simple counter for debugging
	try:
	print("Sending request to Hugging Face Inference API...")
	stream = client.chat_completion(
	messages=messages,
	max_tokens=max_tokens, # Use the value from the slider
	stream=True,
	temperature=temperature,
	top_p=top_p,
	)
	for message in stream:
	token = message.choices[0].delta.content
	if isinstance(token, str):
	token_count += 1 # Crude approximation of tokens received
	response_stream += token
	full_response_for_cleaning += token
	# Log progress occasionally for debugging if needed
	# if token_count % 100 == 0:
	# print(f"Stream progress: Received ~{token_count} tokens...")
	yield response_stream # Yield cumulative response for live update

	print(f"API stream finished. Received ~{token_count} tokens. Raw length: {len(full_response_for_cleaning)}")
	# Check if received tokens are close to max_tokens, indicating potential cutoff
	if token_count >= max_tokens - 10: # Check if close to the limit (allowing for slight variations)
	print(f"WARNING: Generation might have been cut short due to reaching max_tokens limit ({max_tokens}).")
	# Optionally, append a warning to the output itself, though it violates the "code only" rule
	# full_response_for_cleaning += "\n\n<!-- WARNING: Output may be incomplete due to max_tokens limit. -->"


	# --- Post-Processing (Fallback Safety Net) ---
	cleaned_response = full_response_for_cleaning.strip()
	cleaned_response = re.sub(r"^\s```[a-z]\s*\n?", "", cleaned_response)
	cleaned_response = re.sub(r"\n?\s```\s$", "", cleaned_response)
	cleaned_response = re.sub(r"<\s\\|?\s(user\|assistant)\s\\|?\s>", "", cleaned_response, flags=re.IGNORECASE)
	common_phrases = [
	"Here is the code:", "Okay, here is the code:", "Here's the code:",
	"Sure, here is the code you requested:", "Let me know if you need anything else."
	]
	# Simple check, might need more robust cleaning if issues persist
	for phrase in common_phrases:
	# Check start
	if cleaned_response.lower().startswith(phrase.lower()):
	cleaned_response = cleaned_response[len(phrase):].lstrip()
	# Check end - be careful not to remove parts of valid code
	# This end check is risky, might remove valid closing comments or similar.
	# Consider removing if it causes issues.
	# if cleaned_response.lower().endswith(phrase.lower()):
	# cleaned_response = cleaned_response[:-len(phrase)].rstrip()


	yield cleaned_response.strip() # Yield final cleaned response

	except Exception as e:
	error_message = f"An error occurred during the API call: {e}"
	print(error_message)
	yield f"## Error\n\nFailed to generate code.\nReason: {e}\n\nPlease check the model status, your connection, and API token (if applicable)."


	# --- Build Gradio Interface using Blocks ---
	with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
	gr.Markdown("# ✨ Website Code Generator ✨")
	gr.Markdown(
	"Describe the website you want. The AI will generate visually styled frontend code (HTML, CSS, JS) using plenty of CSS. "
	"The code appears live below.\n"
	"Important:\n"
	"1. This generator creates code based only on your initial description. To refine, modify your description and generate again.\n"
	"2. If the code output stops abruptly, it likely hit the 'Max New Tokens' limit. Increase the slider value below and try again!" # Added explanation
	)

	with gr.Row():
	with gr.Column(scale=2):
	prompt_input = gr.Textbox(
	label="Website Description",
	placeholder="e.g., A modern portfolio landing page with smooth scroll nav, stylish hero, project cards with hover effects, contact form.",
	lines=6,
	)
	backend_radio = gr.Radio(
	["Static", "Flask", "Node.js"], label="Backend Context Hint", value="Static",
	info="Hint for AI (e.g., {{var}}) - generates ONLY frontend code."
	)
	file_structure_radio = gr.Radio(
	["Multiple Files", "Single File"], label="Output File Structure", value="Multiple Files",
	info="Choose 'Single File' (all in index.html) or 'Multiple Files' (separate css/js)."
	)
	generate_button = gr.Button("🎨 Generate Stylish Website Code", variant="primary")

	with gr.Column(scale=3):
	code_output = gr.Code(
	label="Generated Code (Raw Output - Aiming for Style!)",
	language="html",
	lines=30,
	interactive=False,
	)

	with gr.Accordion("Advanced Generation Settings", open=False):
	# INCREASED max_tokens range and default value
	max_tokens_slider = gr.Slider(
	minimum=512,
	maximum=4096, # Set maximum to model's limit (Zephyr 7B can handle this)
	value=3072, # Increased default significantly
	step=256, # Larger steps might be practical
	label="Max New Tokens",
	info="Max length of generated code. Increase if output is cut off!" # Updated info
	)
	temperature_slider = gr.Slider(
	minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature",
	info="Controls randomness. Lower=focused, Higher=creative."
	)
	top_p_slider = gr.Slider(
	minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P",
	info="Alternative randomness control."
	)

	# --- Connect Inputs/Outputs ---
	generate_button.click(
	fn=generate_code,
	inputs=[
	prompt_input,
	backend_radio,
	file_structure_radio,
	max_tokens_slider,
	temperature_slider,
	top_p_slider,
	],
	outputs=code_output,
	)

	# --- Examples ---
	gr.Examples(
	examples=[
	["A simple counter page with a number display, an increment button, and a decrement button. Style the buttons nicely and center everything.", "Static", "Single File"],
	["A responsive product grid for an e-commerce site. Each card needs an image, title, price, and 'Add to Cart' button with a hover effect. Use modern CSS.", "Static", "Multiple Files"],
	["A personal blog homepage featuring a clean header with navigation, a main content area for post summaries (placeholders ok), and a simple footer. Use a nice font.", "Flask", "Multiple Files"],
	["A 'Coming Soon' page with a large countdown timer (use JS), a background image, and an email signup form. Make it look sleek.", "Static", "Multiple Files"]
	],
	inputs=[prompt_input, backend_radio, file_structure_radio],
	label="Example Prompts (Aiming for Style)"
	)

	# --- Launch ---
	if __name__ == "__main__":
	print("Starting Gradio app...")
	# Ensure queue is enabled for Spaces
	demo.queue(max_size=10).launch()
	print("Gradio app launched.")