Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -31,18 +31,28 @@ doc_texts = []
|
|
31 |
# Extract text from PDF or TXT
|
32 |
def extract_text(file):
|
33 |
text = ""
|
34 |
-
|
35 |
-
if file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
pdf_stream = BytesIO(file_bytes)
|
37 |
doc = fitz.open(stream=pdf_stream, filetype="pdf")
|
38 |
for page in doc:
|
39 |
text += page.get_text()
|
40 |
-
elif
|
41 |
text = file_bytes.decode("utf-8")
|
42 |
else:
|
43 |
return "❌ Unsupported file type."
|
44 |
return text
|
45 |
|
|
|
46 |
# Process the file, build FAISS index
|
47 |
def process_file(file):
|
48 |
global index, doc_texts
|
|
|
31 |
# Extract text from PDF or TXT
|
32 |
def extract_text(file):
|
33 |
text = ""
|
34 |
+
|
35 |
+
# Handle if file is a NamedString (like on HF Spaces)
|
36 |
+
if isinstance(file, str):
|
37 |
+
with open(file, "rb") as f:
|
38 |
+
file_bytes = f.read()
|
39 |
+
filename = file
|
40 |
+
else:
|
41 |
+
file_bytes = file.read()
|
42 |
+
filename = file.name
|
43 |
+
|
44 |
+
if filename.endswith(".pdf"):
|
45 |
pdf_stream = BytesIO(file_bytes)
|
46 |
doc = fitz.open(stream=pdf_stream, filetype="pdf")
|
47 |
for page in doc:
|
48 |
text += page.get_text()
|
49 |
+
elif filename.endswith(".txt"):
|
50 |
text = file_bytes.decode("utf-8")
|
51 |
else:
|
52 |
return "❌ Unsupported file type."
|
53 |
return text
|
54 |
|
55 |
+
|
56 |
# Process the file, build FAISS index
|
57 |
def process_file(file):
|
58 |
global index, doc_texts
|