Spaces:
Running
Running
import gradio as gr | |
import json | |
from llama_cpp import Llama | |
import os | |
from huggingface_hub import hf_hub_download | |
# Global variable to store the model | |
llm = None | |
def load_model(): | |
"""Load the llama.cpp model""" | |
global llm | |
try: | |
# You can replace this with any GGUF model from Hugging Face | |
# For example, using a small model for demonstration | |
model_name = "microsoft/DialoGPT-medium" | |
# For now, we'll use a local model path or download one | |
# This is a placeholder - you'll need to specify the actual model | |
print("Loading llama.cpp model...") | |
# Initialize with basic settings | |
# Note: You'll need to provide an actual GGUF model file | |
# llm = Llama( | |
# model_path="path/to/your/model.gguf", | |
# n_ctx=2048, | |
# n_threads=2, | |
# verbose=False | |
# ) | |
print("Model loaded successfully!") | |
return "Model loaded successfully!" | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
return f"Error loading model: {e}" | |
def text_to_json(input_text, max_tokens=512, temperature=0.7): | |
"""Convert plain text to structured JSON using llama.cpp""" | |
global llm | |
if llm is None: | |
return {"error": "Model not loaded. Please load the model first."} | |
try: | |
# Create a prompt for JSON generation | |
prompt = f"""Convert the following text into a structured JSON format. Extract key information and organize it logically: | |
Text: {input_text} | |
JSON:""" | |
# Generate response using llama.cpp | |
response = llm( | |
prompt, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
stop=["```", "\n\n\n"], | |
echo=False | |
) | |
generated_text = response['choices'][0]['text'].strip() | |
# Try to parse as JSON to validate | |
try: | |
parsed_json = json.loads(generated_text) | |
return json.dumps(parsed_json, indent=2) | |
except json.JSONDecodeError: | |
# If not valid JSON, return as a structured attempt | |
return generated_text | |
except Exception as e: | |
return f"Error generating JSON: {str(e)}" | |
def demo_without_model(input_text): | |
"""Demo function that works without loading a model""" | |
try: | |
# Simple rule-based JSON conversion for demonstration | |
words = input_text.strip().split() | |
# Create a basic JSON structure | |
result = { | |
"input_text": input_text, | |
"word_count": len(words), | |
"words": words, | |
"character_count": len(input_text), | |
"sentences": input_text.split('.'), | |
"metadata": { | |
"processed_by": "llama.cpp demo", | |
"timestamp": "demo_mode" | |
} | |
} | |
return json.dumps(result, indent=2) | |
except Exception as e: | |
return f"Error processing text: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="Plain Text to JSON with llama.cpp") as demo: | |
gr.Markdown("# Plain Text to JSON Converter") | |
gr.Markdown("Convert plain text into structured JSON format using llama.cpp") | |
with gr.Tab("Text to JSON"): | |
with gr.Row(): | |
with gr.Column(): | |
input_text = gr.Textbox( | |
label="Input Text", | |
placeholder="Enter your text here...", | |
lines=5 | |
) | |
with gr.Row(): | |
max_tokens = gr.Slider( | |
minimum=50, | |
maximum=1000, | |
value=512, | |
label="Max Tokens" | |
) | |
temperature = gr.Slider( | |
minimum=0.1, | |
maximum=1.0, | |
value=0.7, | |
label="Temperature" | |
) | |
convert_btn = gr.Button("Convert to JSON", variant="primary") | |
demo_btn = gr.Button("Demo (No Model)", variant="secondary") | |
with gr.Column(): | |
output_json = gr.Textbox( | |
label="Generated JSON", | |
lines=10, | |
interactive=False | |
) | |
with gr.Tab("Model Management"): | |
load_btn = gr.Button("Load Model", variant="primary") | |
model_status = gr.Textbox( | |
label="Model Status", | |
value="Model not loaded", | |
interactive=False | |
) | |
gr.Markdown(""" | |
### Instructions: | |
1. Click "Load Model" to initialize llama.cpp (requires a GGUF model file) | |
2. Use "Demo (No Model)" for basic functionality without loading a model | |
3. For full functionality, you need to provide a GGUF model file | |
### Notes: | |
- This space uses llama.cpp for efficient CPU inference | |
- Models should be in GGUF format | |
- Adjust max_tokens and temperature for different outputs | |
""") | |
# Event handlers | |
convert_btn.click( | |
fn=text_to_json, | |
inputs=[input_text, max_tokens, temperature], | |
outputs=output_json | |
) | |
demo_btn.click( | |
fn=demo_without_model, | |
inputs=input_text, | |
outputs=output_json | |
) | |
load_btn.click( | |
fn=load_model, | |
outputs=model_status | |
) | |
if __name__ == "__main__": | |
demo.launch() |