Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from ultralytics import YOLO
|
3 |
+
from PIL import Image
|
4 |
+
import numpy as np
|
5 |
+
import os
|
6 |
+
|
7 |
+
# Load the YOLO model - the model will be loaded from the HF repo
|
8 |
+
model_repo = "ashen007/document-structure-detection"
|
9 |
+
model = YOLO(model_repo)
|
10 |
+
|
11 |
+
# Define your class names - update this with your actual class names
|
12 |
+
class_names = [
|
13 |
+
"Author", "Bigletter", "Bleeding", "Bold", "Caption", "Date", "Figure",
|
14 |
+
"Footnote", "Header", "Italic", "List", "Map", "SubSubTitle", "SubTitle",
|
15 |
+
"Table", "TextColumn", "Title", "Underline", "equations"
|
16 |
+
]
|
17 |
+
|
18 |
+
def predict(image):
|
19 |
+
"""
|
20 |
+
Runs prediction on the input image and returns the annotated image
|
21 |
+
"""
|
22 |
+
if image is None:
|
23 |
+
return None
|
24 |
+
|
25 |
+
try:
|
26 |
+
# Convert to numpy array if needed
|
27 |
+
if isinstance(image, Image.Image):
|
28 |
+
image_np = np.array(image)
|
29 |
+
else:
|
30 |
+
image_np = image
|
31 |
+
|
32 |
+
# Run prediction
|
33 |
+
results = model(image_np, conf=0.35)
|
34 |
+
|
35 |
+
# Return the annotated image
|
36 |
+
return Image.fromarray(results[0].plot(labels=True))
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Error during prediction: {e}")
|
39 |
+
return None
|
40 |
+
|
41 |
+
# Create examples folder if it doesn't exist
|
42 |
+
if not os.path.exists("examples"):
|
43 |
+
os.makedirs("examples")
|
44 |
+
|
45 |
+
# Create Gradio interface
|
46 |
+
demo = gr.Interface(
|
47 |
+
fn=predict,
|
48 |
+
inputs=gr.Image(type="pil"),
|
49 |
+
outputs=gr.Image(type="pil"),
|
50 |
+
title="Document Layout Analysis with YOLOv8",
|
51 |
+
description="""
|
52 |
+
## Document Layout Detection
|
53 |
+
|
54 |
+
This model identifies various elements in document layouts including:
|
55 |
+
- Text structures (TextColumns, Lists)
|
56 |
+
- Semantic elements (Titles, Headers)
|
57 |
+
- Typographical features (Bold, Italic)
|
58 |
+
- Visual components (Figures, Tables)
|
59 |
+
|
60 |
+
Upload an image of a document to analyze its layout structure.
|
61 |
+
""",
|
62 |
+
examples=[
|
63 |
+
# Add paths to example images here
|
64 |
+
# "examples/example1.jpg",
|
65 |
+
# "examples/example2.jpg"
|
66 |
+
]
|
67 |
+
)
|
68 |
+
|
69 |
+
# Launch the app
|
70 |
+
if __name__ == "__main__":
|
71 |
+
demo.launch()
|