Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,37 +1,29 @@
|
|
1 |
import gradio as gr
|
2 |
import pdfplumber
|
3 |
-
|
4 |
|
5 |
-
|
6 |
-
extractor = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")
|
7 |
-
|
8 |
-
def extract_info(pdf_file):
|
9 |
with pdfplumber.open(pdf_file) as pdf:
|
10 |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
# Formatowanie wynik贸w
|
16 |
-
extracted_data = {}
|
17 |
-
for entity in entities:
|
18 |
-
label = entity["entity_group"]
|
19 |
-
word = entity["word"]
|
20 |
-
|
21 |
-
if label not in extracted_data:
|
22 |
-
extracted_data[label] = []
|
23 |
|
24 |
-
|
|
|
|
|
|
|
25 |
|
26 |
-
return
|
27 |
|
28 |
-
# Interfejs u偶ytkownika w Hugging Face
|
29 |
iface = gr.Interface(
|
30 |
-
fn=
|
31 |
inputs=gr.File(label="Wybierz plik PDF"),
|
32 |
outputs="json",
|
33 |
-
title="Ekstrakcja
|
34 |
-
description="Prze艣lij plik PDF
|
35 |
)
|
36 |
|
37 |
if __name__ == "__main__":
|
|
|
1 |
import gradio as gr
|
2 |
import pdfplumber
|
3 |
+
import re
|
4 |
|
5 |
+
def extract_seller(pdf_file):
|
|
|
|
|
|
|
6 |
with pdfplumber.open(pdf_file) as pdf:
|
7 |
text = "\n".join(page.extract_text() for page in pdf.pages if page.extract_text())
|
8 |
|
9 |
+
# Szukamy linii zawieraj膮cej "Sprzedawca"
|
10 |
+
pattern = r"(Sprzedawca[:\s]+)(.+)"
|
11 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
if match:
|
14 |
+
seller_name = match.group(2).strip() # Pobiera nazw臋 firmy po "Sprzedawca:"
|
15 |
+
else:
|
16 |
+
seller_name = "Nie znaleziono"
|
17 |
|
18 |
+
return {"Sprzedawca": seller_name}
|
19 |
|
20 |
+
# Interfejs u偶ytkownika w Hugging Face Spaces
|
21 |
iface = gr.Interface(
|
22 |
+
fn=extract_seller,
|
23 |
inputs=gr.File(label="Wybierz plik PDF"),
|
24 |
outputs="json",
|
25 |
+
title="Ekstrakcja Sprzedawcy z Faktury",
|
26 |
+
description="Prze艣lij plik PDF, aby wydoby膰 nazw臋 sprzedawcy."
|
27 |
)
|
28 |
|
29 |
if __name__ == "__main__":
|