Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| import gradio as gr # Gradio: Library for building web interfaces | |
| import requests # Library for sending API requests | |
| from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM | |
| from io import BytesIO # Tool for handling image data in memory | |
| def extract_text_from_image(image, api_key): | |
| """ | |
| Function to extract text from an image (using Upstage Document OCR API) | |
| """ | |
| # Upstage API Endpoint | |
| url = "https://api.upstage.ai/v1/document-digitization" | |
| # Set up headers for API Key authentication | |
| headers = {'Authorization': f'Bearer {api_key}'} | |
| # Save the image to a memory buffer (JPEG format) | |
| buffer = BytesIO() | |
| image.save(buffer, format="JPEG") | |
| buffer.seek(0) | |
| # Prepare files and data for the request | |
| files = {"document": ("image.jpg", buffer, "image/jpeg")} | |
| data = {"model": "ocr"} # Model to use: OCR | |
| # Send POST request | |
| response = requests.post(url, headers=headers, files=files, data=data) | |
| # If request is successful, extract text | |
| if response.status_code == 200: | |
| text = response.json().get("text", "") # Extract text from JSON response | |
| return text.strip() # Remove leading/trailing whitespace and return | |
| else: | |
| # Return error message on failure | |
| return f"OCR Failed: {response.status_code} - {response.text}" | |
| def translate_text_with_solar(korean_text, api_key): | |
| """ | |
| Function to translate Korean text into English (using Upstage Solar Pro API) | |
| """ | |
| # Initialize OpenAI client for calling Solar LLM | |
| client = OpenAI( | |
| api_key=api_key, | |
| base_url="https://api.upstage.ai/v1" | |
| ) | |
| # Construct prompt for the model | |
| prompt = f""" | |
| Below is a handwritten letter in Korean.\n | |
| {korean_text} \n | |
| Please translate it into English.\n\n | |
| Translated letter in English: | |
| """ | |
| # Call Solar LLM to perform translation | |
| response = client.chat.completions.create( | |
| model="solar-pro", # Model to use | |
| messages=[{"role": "user", "content": prompt}], # User message | |
| temperature=0.5, # Creativity level (0.0~1.0) | |
| max_tokens=2048 # Max response length | |
| ) | |
| # Return translated text | |
| return response.choices[0].message.content | |
| # Gradio interface layout | |
| with gr.Blocks() as demo: | |
| # Header description | |
| gr.Markdown("# π Handwritten Letter Translator") | |
| gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the π Translate button to translate it into English using Solar LLM!") | |
| gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.") | |
| # β API Key input | |
| api_key_input = gr.Textbox(label="π Upstage API Key", type="password", placeholder="Paste your API key here") | |
| # Layout: 2-column format | |
| with gr.Row(): | |
| # Left column: image upload | |
| with gr.Column(scale=1): | |
| image_input = gr.Image(type="pil", label=" π Upload Letter Image") | |
| # Right column: extracted text and translation | |
| with gr.Column(scale=2): | |
| korean_box = gr.Textbox(label="π Extracted Korean Text", lines=10) | |
| translate_button = gr.Button("π Translate") | |
| english_box = gr.Textbox(label="Translated English Text", lines=10) | |
| # Step 1: Run OCR when image is uploaded β display extracted text | |
| image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box) | |
| # Step 2: Run translation when button is clicked β display translated result | |
| translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box) | |
| # Run app | |
| if __name__ == "__main__": | |
| demo.launch() |