Spaces:

numind
/

NuMarkdown-8B-Thinking

Running on L40S

App Files Files Community

NuMarkdown-8B-Thinking / app.py

liamcripwell

Update app.py

3c2969a verified 21 days ago

raw

history blame contribute delete

7.14 kB

	import gradio as gr
	import requests
	import base64
	from PIL import Image
	from io import BytesIO

	print("=== DEBUG: Starting app.py ===")

	# Get example images
	import os
	example_dir = os.path.join(os.environ.get('HOME', '/home/user'), 'app', 'example_images')
	# example_dir = "example_images" # Relative path since it's in the same directory
	example_images = []
	if os.path.exists(example_dir):
	for filename in os.listdir(example_dir):
	if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
	example_images.append(os.path.join(example_dir, filename))
	print(f"Found {len(example_images)} example images")

	def encode_image_to_base64(image: Image.Image) -> str:
	buffered = BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode()
	return f"data:image/jpeg;base64,{img_str}"

	def query_vllm_api(image, temperature, max_tokens=12_000):
	print(f"=== DEBUG: query_vllm_api called with image={image is not None}, temp={temperature} ===")

	if image is None:
	return "No image provided", "No image provided", "Please upload an image first."

	try:
	messages = []
	# Optional: Resize image if needed (to avoid huge uploads)
	max_size = 2048
	if max(image.size) > max_size:
	ratio = max_size / max(image.size)
	new_size = tuple(int(dim * ratio) for dim in image.size)
	image = image.resize(new_size, Image.Resampling.LANCZOS)

	image_b64 = encode_image_to_base64(image)
	messages.append({
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": image_b64}}
	]
	})

	payload = {
	"model": "numind/NuMarkdown-8B-Thinking",
	"messages": messages,
	"max_tokens": max_tokens,
	"temperature": temperature
	}

	print("=== DEBUG: About to make vLLM API request ===")
	response = requests.post(
	"http://localhost:8000/v1/chat/completions",
	json=payload,
	timeout=60
	)
	response.raise_for_status()
	data = response.json()

	result = data["choices"][0]["message"]["content"]

	# Handle the thinking/answer parsing
	try:
	reasoning = result.split("<think>")[1].split("</think>")[0]
	answer = result.split("<answer>")[1].split("</answer>")[0]
	except IndexError:
	# If no thinking tags, return the full result
	reasoning = "No thinking trace found"
	answer = result

	return reasoning, answer, answer

	except requests.exceptions.RequestException as e:
	error_msg = f"API request failed: {e}"
	print(f"=== DEBUG: Request error: {error_msg} ===")
	return error_msg, error_msg, error_msg
	except Exception as e:
	error_msg = f"Unexpected error: {e}"
	print(f"=== DEBUG: Unexpected error: {error_msg} ===")
	return error_msg, error_msg, error_msg

	print("=== DEBUG: Creating Gradio interface ===")


	with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft(), css="""
	* {
	font-family: 'Inter', 'Segoe UI', 'Roboto', 'Helvetica Neue', Arial, sans-serif !important;
	}
	""") as demo:
	gr.HTML("""
	<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">👁️ NuMarkdown-8B-Thinking</h1>
	<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
	<div style="margin-top: 15px;">
	<a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🖥️ API / Platform</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🗣️ Discord</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🔗 GitHub</a>
	<span style="color: rgba(255,255,255,0.7);">\|</span>
	<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">🤗 Model</a>
	</div>
	</div>
	<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
	<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
	""")

	with gr.Row():
	with gr.Column(scale=2):
	temperature = gr.Slider(0.1, 1.5, value=0.4, step=0.1, label="Temperature")
	btn = gr.Button("Generate Response", variant="primary", size="lg")
	img_in = gr.Image(type="pil", label="Upload Image")

	with gr.Column(scale=2):

	# Debug section - collapsible
	with gr.Accordion("🔍 Model Outputs", open=True):
	with gr.Tabs():
	with gr.TabItem("🧠 Thinking Trace"):
	thinking = gr.Textbox(
	lines=15,
	max_lines=25,
	show_label=False,
	placeholder="The model's reasoning process will appear here..."
	)
	with gr.TabItem("📝 Rendered Markdown"):
	output = gr.Markdown(label="📝 Generated Markdown")
	with gr.TabItem("📄 Raw Markdown"):
	raw_answer = gr.Textbox(
	lines=15,
	max_lines=25,
	show_label=False,
	placeholder="The raw model output will appear here..."
	)

	btn.click(
	query_vllm_api,
	inputs=[img_in, temperature],
	outputs=[thinking, raw_answer, output],
	)

	# Add examples if we have any
	if example_images:
	gr.Examples(
	examples=example_images[:5], # Limit to 5 examples
	inputs=img_in,
	label="📸 Try these example images"
	)

	print("=== DEBUG: Gradio interface created ===")

	if __name__ == "__main__":
	print("=== DEBUG: About to launch Gradio ===")
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True
	)
	print("=== DEBUG: Gradio launched ===")