Document-OCR-Demo-ENG

Sleeping

App Files Files Community

Document-OCR-Demo-ENG / app.py

Yescia

Update app.py

3f1998d verified 8 months ago

raw

history blame contribute delete

3.8 kB

	# Import necessary libraries
	import gradio as gr # Gradio: Library for building web interfaces
	import requests # Library for sending API requests
	from openai import OpenAI # OpenAI-compatible client for using Upstage Solar LLM
	from io import BytesIO # Tool for handling image data in memory

	def extract_text_from_image(image, api_key):
	"""
	Function to extract text from an image (using Upstage Document OCR API)
	"""
	# Upstage API Endpoint
	url = "https://api.upstage.ai/v1/document-digitization"

	# Set up headers for API Key authentication
	headers = {'Authorization': f'Bearer {api_key}'}

	# Save the image to a memory buffer (JPEG format)
	buffer = BytesIO()
	image.save(buffer, format="JPEG")
	buffer.seek(0)

	# Prepare files and data for the request
	files = {"document": ("image.jpg", buffer, "image/jpeg")}
	data = {"model": "ocr"} # Model to use: OCR

	# Send POST request
	response = requests.post(url, headers=headers, files=files, data=data)

	# If request is successful, extract text
	if response.status_code == 200:
	text = response.json().get("text", "") # Extract text from JSON response
	return text.strip() # Remove leading/trailing whitespace and return
	else:
	# Return error message on failure
	return f"OCR Failed: {response.status_code} - {response.text}"



	def translate_text_with_solar(korean_text, api_key):
	"""
	Function to translate Korean text into English (using Upstage Solar Pro API)
	"""
	# Initialize OpenAI client for calling Solar LLM
	client = OpenAI(
	api_key=api_key,
	base_url="https://api.upstage.ai/v1"
	)

	# Construct prompt for the model
	prompt = f"""
	Below is a handwritten letter in Korean.\n
	{korean_text} \n
	Please translate it into English.\n\n
	Translated letter in English:
	"""

	# Call Solar LLM to perform translation
	response = client.chat.completions.create(
	model="solar-pro", # Model to use
	messages=[{"role": "user", "content": prompt}], # User message
	temperature=0.5, # Creativity level (0.0~1.0)
	max_tokens=2048 # Max response length
	)

	# Return translated text
	return response.choices[0].message.content


	# Gradio interface layout
	with gr.Blocks() as demo:
	# Header description
	gr.Markdown("# 💌 Handwritten Letter Translator")
	gr.Markdown("Upload a letter image to extract Korean text using Upstage Document OCR.\nClick the 🌐 Translate button to translate it into English using Solar LLM!")
	gr.Markdown("The example images are AI-generated. Click the Files button to view or download them.")

	# ✅ API Key input
	api_key_input = gr.Textbox(label="🔑 Upstage API Key", type="password", placeholder="Paste your API key here")

	# Layout: 2-column format
	with gr.Row():
	# Left column: image upload
	with gr.Column(scale=1):
	image_input = gr.Image(type="pil", label=" 💌 Upload Letter Image")

	# Right column: extracted text and translation
	with gr.Column(scale=2):
	korean_box = gr.Textbox(label="📝 Extracted Korean Text", lines=10)
	translate_button = gr.Button("🌐 Translate")
	english_box = gr.Textbox(label="Translated English Text", lines=10)

	# Step 1: Run OCR when image is uploaded → display extracted text
	image_input.change(fn=extract_text_from_image, inputs=[image_input, api_key_input], outputs=korean_box)

	# Step 2: Run translation when button is clicked → display translated result
	translate_button.click(fn=translate_text_with_solar, inputs=[korean_box, api_key_input], outputs=english_box)

	# Run app
	if __name__ == "__main__":
	demo.launch()