Spaces:

ReallyFloppyPenguin
/

SynthGen

Running

App Files Files Community

ReallyFloppyPenguin commited on Apr 5

Commit

b4174f8

verified ·

1 Parent(s): c429c3c

Update app.py

Browse files

Files changed (1) hide show

app.py +297 -3

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from typing import Union, Optional, Dict, Tuple # Import Dict and Tuple
 from synthgen import (
     generate_synthetic_text,
     generate_prompts,
-    generate_synthetic_conversation
 )
 # We no longer need to import api_key here or check it directly in app.py
@@ -27,6 +28,21 @@ def create_json_file(data: object, base_filename: str) -> Union[str, None]:
         print(f"Error creating JSON file {base_filename}: {e}")
         return None
 def parse_conversation_string(text: str) -> list[dict]:
     """Parses a multi-line conversation string into a list of message dictionaries."""
     messages = []
@@ -252,11 +268,222 @@ def run_conversation_generation_and_prepare_json(
     return (gr.update(value=output_str), gr.update(value=json_filepath))
 # --- Gradio Interface Definition ---
 with gr.Blocks() as demo:
     gr.Markdown("# Synthetic Data Generator using OpenRouter")
     gr.Markdown(
-        "Generate synthetic text samples or conversations using various models"
     )
     # Removed the api_key_loaded check and warning Markdown
@@ -370,10 +597,77 @@ with gr.Blocks() as demo:
                 outputs=[output_conv, download_file_conv] # Output to both Textbox and File
             )
 # Launch the Gradio app
 if __name__ == "__main__":
     print("Launching Gradio App...")
     print("Make sure the OPENROUTER_API_KEY environment variable is set.")
     # Use share=True for temporary public link if running locally and need to test
-    demo.launch() # share=True

 from synthgen import (
     generate_synthetic_text,
     generate_prompts,
+    generate_synthetic_conversation,
+    generate_corpus_content # Import the new function
 )
 # We no longer need to import api_key here or check it directly in app.py
         print(f"Error creating JSON file {base_filename}: {e}")
         return None
+# Add the missing function definition
+def create_text_file(data: str, base_filename: str) -> Union[str, None]:
+    """Creates a temporary text file and returns its path."""
+    try:
+        # Ensure filename ends with .txt
+        if not base_filename.lower().endswith(".txt"):
+             base_filename += ".txt" # Append if missing for clarity, though suffix handles it
+        # Create a temporary file with a .txt extension
+        with tempfile.NamedTemporaryFile(mode='w', suffix=".txt", delete=False, encoding='utf-8') as temp_file:
+            temp_file.write(data)
+            return temp_file.name # Return the path to the temporary file
+    except Exception as e:
+        print(f"Error creating text file {base_filename}: {e}")
+        return None
 def parse_conversation_string(text: str) -> list[dict]:
     """Parses a multi-line conversation string into a list of message dictionaries."""
     messages = []
     return (gr.update(value=output_str), gr.update(value=json_filepath))
+# Define content_type_labels globally for use in UI and wrapper functions
+content_type_labels = {
+    "Corpus Snippets": "# Snippets",
+    "Short Story": "Approx Words",
+    "Article": "Approx Words"
+}
+content_type_defaults = {
+    "Corpus Snippets": 5,
+    "Short Story": 1000, # Match new backend default
+    "Article": 1500     # Match new backend default
+}
+# Wrapper for Corpus/Content Generation
+def run_corpus_generation_and_prepare_file(
+    topic: str,
+    content_type: str,
+    length_param: int,
+    model: str,
+    temperature: float,
+    top_p: float,
+    max_tokens: int
+) -> Tuple[gr.update, gr.update]:
+    """Generates corpus/story/article content and prepares a file for download."""
+    temp_val = temperature if temperature > 0 else None
+    top_p_val = top_p if 0 < top_p <= 1 else None
+    max_tokens_val = max_tokens if max_tokens > 0 else None
+    # Use the global dictionary for error messages
+    label_for_error = content_type_labels.get(content_type, 'Length Param')
+    if not topic: return (gr.update(value="Error: Please enter a topic."), gr.update(value=None))
+    if not content_type: return (gr.update(value="Error: Please select a content type."), gr.update(value=None))
+    if length_param <= 0: return (gr.update(value=f"Error: Please enter a positive value for '{label_for_error}'."), gr.update(value=None))
+    print(f"Generating {content_type} about '{topic}'...")
+    output_str = f"Generating {content_type} about '{topic}' using model '{model}'...\n"
+    output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n" + "="*40 + "\n\n"
+    generated_content = generate_corpus_content(
+        topic=topic, content_type=content_type, length_param=length_param, model=model,
+        temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
+    )
+    output_str += generated_content
+    file_path = None
+    if not generated_content.startswith("Error:"):
+         core_content = generated_content
+         if "\n\n" in generated_content: parts = generated_content.split("\n\n", 1); core_content = parts[1] if len(parts) > 1 else generated_content
+         if content_type == "Corpus Snippets":
+             snippets = [s.strip() for s in core_content.split('---') if s.strip()]
+             if not snippets: snippets = [s.strip() for s in core_content.split('\n\n') if s.strip()]
+             corpus_data = {"topic": topic, "snippets": snippets}
+             file_path = create_json_file(corpus_data, f"{topic}_corpus.json")
+         else:
+             file_path = create_text_file(core_content, f"{topic}_{content_type.replace(' ','_')}.txt")
+    return (gr.update(value=output_str), gr.update(value=file_path))
+# NEW function to update the length parameter label and default value
+def update_length_param_ui(content_type: str) -> gr.update:
+    """Updates the label and default value of the length parameter input."""
+    new_label = content_type_labels.get(content_type, "Length Param")
+    new_value = content_type_defaults.get(content_type, 5) # Default to 5 if type unknown
+    return gr.update(label=new_label, value=new_value)
+# --- Generation Wrappers ---
+# ... (generate_prompts_ui, run_generation_and_prepare_json, run_conversation_generation_and_prepare_json remain the same) ...
+# NEW UI Wrapper for generating TOPICS
+def generate_topics_ui(
+    num_topics: int,
+    model: str,
+    temperature: float,
+    top_p: float,
+    max_tokens: int
+) -> str:
+    """UI Wrapper to generate diverse topics using the AI."""
+    temp_val = temperature if temperature > 0 else None
+    top_p_val = top_p if 0 < top_p <= 1 else None
+    max_tokens_val = max_tokens if max_tokens > 0 else 150 # Limit token for topic list
+    if not model:
+        return "Error: Please select a model for topic generation."
+    if num_topics <= 0:
+        return "Error: Number of topics to generate must be positive."
+    if num_topics > 50: # Keep limit reasonable
+        return "Error: Cannot generate more than 50 topics at a time."
+    print(f"Generating {num_topics} topics with settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val}")
+    # Instruction focused on generating topics
+    instruction = (
+        f"Generate exactly {num_topics} diverse and interesting topics suitable for generating content like articles, stories, or corpus snippets. "
+        f"Each topic should be concise (a few words to a short phrase). "
+        f"Present each topic on a new line, with no other introductory or concluding text or numbering."
+        f"\n\nExamples:\n"
+        f"The future of renewable energy\n"
+        f"The history of the Silk Road\n"
+        f"The impact of social media on mental health"
+    )
+    system_msg = "You are an expert topic generator. Follow the user's instructions precisely."
+    try:
+        # Use the core text generation function
+        generated_text = generate_synthetic_text(
+            instruction,
+            model,
+            system_message=system_msg,
+            temperature=temp_val,
+            top_p=top_p_val,
+            max_tokens=max_tokens_val
+        )
+        if generated_text.startswith("Error:"):
+             raise ValueError(generated_text) # Propagate error
+        # Split into lines and clean up
+        topics_list = [t.strip() for t in generated_text.strip().split('\n') if t.strip()]
+        if not topics_list:
+             print(f"Warning: Failed to parse topics from generated text. Raw text:\n{generated_text}")
+             raise ValueError("AI failed to generate topics in the expected format.")
+        # Return newline-separated string for the Textbox
+        return "\n".join(topics_list[:num_topics]) # Truncate if needed
+    except ValueError as e:
+        return f"Error generating topics: {e}"
+    except Exception as e:
+        print(f"Unexpected error in generate_topics_ui: {e}")
+        return f"An unexpected error occurred: {e}"
+# Modified Wrapper for Bulk Corpus/Content Generation
+def run_bulk_content_generation_and_prepare_json(
+    topics_text: str, # Renamed from topic
+    content_type: str,
+    length_param: int,
+    model: str,
+    temperature: float,
+    top_p: float,
+    max_tokens: int
+) -> Tuple[gr.update, gr.update]:
+    """Generates content for multiple topics and prepares a JSON file."""
+    temp_val = temperature if temperature > 0 else None
+    top_p_val = top_p if 0 < top_p <= 1 else None
+    max_tokens_val = max_tokens if max_tokens > 0 else None
+    # --- Input Validation ---
+    if not topics_text:
+        return (gr.update(value="Error: Please enter or generate at least one topic."), gr.update(value=None))
+    if not content_type:
+        return (gr.update(value="Error: Please select a content type."), gr.update(value=None))
+    topics = [t.strip() for t in topics_text.strip().split('\n') if t.strip()]
+    if not topics:
+        return (gr.update(value="Error: No valid topics found in the input."), gr.update(value=None))
+    label_for_error = content_type_labels.get(content_type, 'Length Param')
+    if length_param <= 0:
+        return (gr.update(value=f"Error: Please enter a positive value for '{label_for_error}'."), gr.update(value=None))
+    # --- End Validation ---
+    output_str = f"Generating {content_type} for {len(topics)} topics using model '{model}'...\n"
+    output_str += f"(Settings: Temp={temp_val}, Top-P={top_p_val}, MaxTokens={max_tokens_val})\n" + "="*40 + "\n\n"
+    bulk_results = [] # Store results for JSON
+    # --- Loop through topics ---
+    for i, topic in enumerate(topics):
+        print(f"Generating {content_type} for topic {i+1}/{len(topics)}: '{topic}'...")
+        output_str += f"--- Topic {i+1}/{len(topics)}: '{topic}' ---\n"
+        generated_content_full = generate_corpus_content( # Returns string including title/error
+            topic=topic, content_type=content_type, length_param=length_param, model=model,
+            temperature=temp_val, top_p=top_p_val, max_tokens=max_tokens_val
+        )
+        output_str += generated_content_full + "\n\n" # Add full result to textbox
+        # --- Prepare structured result for JSON ---
+        result_entry = {"topic": topic, "content_type": content_type}
+        if generated_content_full.startswith("Error:"):
+            result_entry["status"] = "error"
+            result_entry["error_message"] = generated_content_full
+            result_entry["content"] = None
+        else:
+            result_entry["status"] = "success"
+            result_entry["error_message"] = None
+            # Extract core content (remove potential title added by backend)
+            core_content = generated_content_full
+            if "\n\n" in generated_content_full:
+                parts = generated_content_full.split("\n\n", 1)
+                core_content = parts[1] if len(parts) > 1 else generated_content_full
+            if content_type == "Corpus Snippets":
+                snippets = [s.strip() for s in core_content.split('---') if s.strip()]
+                if not snippets: snippets = [s.strip() for s in core_content.split('\n\n') if s.strip()]
+                result_entry["content"] = snippets # Store list for corpus
+            else:
+                result_entry["content"] = core_content # Store string for story/article
+        bulk_results.append(result_entry)
+        # --- End JSON preparation ---
+    # --- Finalize ---
+    output_str += "="*40 + f"\nBulk generation complete for {len(topics)} topics."
+    json_filepath = create_json_file(bulk_results, f"{content_type.replace(' ','_')}_bulk_results.json")
+    return (gr.update(value=output_str), gr.update(value=json_filepath))
 # --- Gradio Interface Definition ---
 with gr.Blocks() as demo:
     gr.Markdown("# Synthetic Data Generator using OpenRouter")
     gr.Markdown(
+        "Generate synthetic text samples, conversations, or other content using various models"
     )
     # Removed the api_key_loaded check and warning Markdown
                 outputs=[output_conv, download_file_conv] # Output to both Textbox and File
             )
+        # --- Content Generation Tab (Modified for Bulk) ---
+        with gr.TabItem("Bulk Content Generation"):
+            output_content = gr.Textbox(label="Generated Content (Log)", lines=15, show_copy_button=True)
+            # Output is now always JSON
+            download_file_content = gr.File(label="Download Results as JSON")
+            gr.Markdown("Enter one topic per line below, or use the 'Generate Topics' button.")
+            with gr.Row():
+                # Changed to multi-line Textbox
+                topic_input_content = gr.Textbox(
+                    label="Topics (one per line)",
+                    lines=5,
+                    placeholder="Enter topics here, one per line...\ne.g., The future of renewable energy\nThe history of the Silk Road"
+                )
+            # --- Topic Generation ---
+            with gr.Accordion("Topic Generation Options", open=False):
+                 with gr.Row():
+                    num_topics_input = gr.Number(label="# Topics to Generate", value=5, minimum=1, maximum=50, step=1)
+                    # Use shared model selector below and settings
+                    generate_topics_button = gr.Button("Generate Topics using AI")
+            # --- Generation Settings ---
+            with gr.Row():
+                content_type_choices = list(content_type_labels.keys())
+                content_type_input = gr.Dropdown(
+                    label="Content Type", choices=content_type_choices, value=content_type_choices[0]
+                )
+                default_length_label = content_type_labels[content_type_choices[0]]
+                default_length_value = content_type_defaults[content_type_choices[0]]
+                length_param_input = gr.Number(
+                    label=default_length_label, value=default_length_value, minimum=1, step=1
+                )
+            with gr.Row():
+                model_input_content = gr.Dropdown(label="Model", choices=model_choices, value=default_model)
+            # Button to trigger bulk generation
+            generate_content_button = gr.Button("Generate Bulk Content")
+            # --- Event Listeners ---
+            # Listener to update length param UI
+            content_type_input.change(
+                fn=update_length_param_ui,
+                inputs=content_type_input,
+                outputs=length_param_input
+            )
+            # Listener for topic generation button
+            generate_topics_button.click(
+                 fn=generate_topics_ui,
+                 inputs=[ # Pass necessary inputs for topic generation
+                     num_topics_input, model_input_content, # Use this tab's model selector
+                     temperature_slider, top_p_slider, max_tokens_slider
+                 ],
+                 outputs=topic_input_content # Output generated topics to the textbox
+            )
+            # Listener for main generation button
+            generate_content_button.click(
+                fn=run_bulk_content_generation_and_prepare_json, # Use the new bulk wrapper
+                inputs=[
+                    topic_input_content, content_type_input, length_param_input,
+                    model_input_content,
+                    temperature_slider, top_p_slider, max_tokens_slider
+                ],
+                outputs=[output_content, download_file_content]
+            )
 # Launch the Gradio app
 if __name__ == "__main__":
     print("Launching Gradio App...")
     print("Make sure the OPENROUTER_API_KEY environment variable is set.")
     # Use share=True for temporary public link if running locally and need to test
+    demo.launch() # share=True