pradeepsengarr commited on
Commit
9a8a050
·
verified ·
1 Parent(s): f1e12d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -31,18 +31,28 @@ doc_texts = []
31
  # Extract text from PDF or TXT
32
  def extract_text(file):
33
  text = ""
34
- file_bytes = file.read()
35
- if file.name.endswith(".pdf"):
 
 
 
 
 
 
 
 
 
36
  pdf_stream = BytesIO(file_bytes)
37
  doc = fitz.open(stream=pdf_stream, filetype="pdf")
38
  for page in doc:
39
  text += page.get_text()
40
- elif file.name.endswith(".txt"):
41
  text = file_bytes.decode("utf-8")
42
  else:
43
  return "❌ Unsupported file type."
44
  return text
45
 
 
46
  # Process the file, build FAISS index
47
  def process_file(file):
48
  global index, doc_texts
 
31
  # Extract text from PDF or TXT
32
  def extract_text(file):
33
  text = ""
34
+
35
+ # Handle if file is a NamedString (like on HF Spaces)
36
+ if isinstance(file, str):
37
+ with open(file, "rb") as f:
38
+ file_bytes = f.read()
39
+ filename = file
40
+ else:
41
+ file_bytes = file.read()
42
+ filename = file.name
43
+
44
+ if filename.endswith(".pdf"):
45
  pdf_stream = BytesIO(file_bytes)
46
  doc = fitz.open(stream=pdf_stream, filetype="pdf")
47
  for page in doc:
48
  text += page.get_text()
49
+ elif filename.endswith(".txt"):
50
  text = file_bytes.decode("utf-8")
51
  else:
52
  return "❌ Unsupported file type."
53
  return text
54
 
55
+
56
  # Process the file, build FAISS index
57
  def process_file(file):
58
  global index, doc_texts