yamanavijayavardhan commited on
Commit
84e4e9f
·
1 Parent(s): 8331040

printing extracted text9

Browse files
Files changed (1) hide show
  1. main.py +14 -15
main.py CHANGED
@@ -283,23 +283,10 @@ def compute_answers():
283
  log_print(error_msg, "ERROR")
284
  return jsonify({"error": error_msg}), 400
285
 
286
- # Read and validate query file
287
- try:
288
- queries = query_file.read().decode('utf-8').splitlines()
289
- queries = [q.strip() for q in queries if q.strip()]
290
- if not queries:
291
- error_msg = "Query file is empty"
292
- log_print(error_msg, "ERROR")
293
- return jsonify({"error": error_msg}), 400
294
- log_print(f"Successfully read {len(queries)} queries")
295
- except Exception as e:
296
- error_msg = f"Error reading query file: {str(e)}"
297
- log_print(error_msg, "ERROR")
298
- return jsonify({"error": error_msg}), 400
299
-
300
  file_type = request.form.get('file_type')
301
  log_print(f"Processing file type: {file_type}")
302
 
 
303
  if file_type == "csv":
304
  ans_csv_file = request.files.get('ans_csv_file')
305
  if not ans_csv_file:
@@ -308,7 +295,7 @@ def compute_answers():
308
  return jsonify({"error": error_msg}), 400
309
 
310
  try:
311
- # Read and process CSV answers
312
  log_print("Reading CSV answer file...")
313
  ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
314
  c_answers = []
@@ -330,7 +317,11 @@ def compute_answers():
330
  log_print(error_msg, "ERROR")
331
  return jsonify({"error": error_msg}), 400
332
 
 
333
  elif file_type == 'pdf':
 
 
 
334
  # Create a temporary directory for PDF files
335
  pdf_dir = tempfile.mkdtemp()
336
  os.makedirs(pdf_dir, exist_ok=True)
@@ -354,6 +345,14 @@ def compute_answers():
354
  database_creation(pdf_file)
355
 
356
  # Generate answers
 
 
 
 
 
 
 
 
357
  for query in queries:
358
  ans = []
359
  for pdf_file in pdf_files:
 
283
  log_print(error_msg, "ERROR")
284
  return jsonify({"error": error_msg}), 400
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  file_type = request.form.get('file_type')
287
  log_print(f"Processing file type: {file_type}")
288
 
289
+ # For CSV files, skip model initialization
290
  if file_type == "csv":
291
  ans_csv_file = request.files.get('ans_csv_file')
292
  if not ans_csv_file:
 
295
  return jsonify({"error": error_msg}), 400
296
 
297
  try:
298
+ # Read and process CSV answers directly without loading models
299
  log_print("Reading CSV answer file...")
300
  ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
301
  c_answers = []
 
317
  log_print(error_msg, "ERROR")
318
  return jsonify({"error": error_msg}), 400
319
 
320
+ # Only initialize models for PDF processing
321
  elif file_type == 'pdf':
322
+ # Wait for initialization only for PDF files
323
+ wait_for_initialization()
324
+
325
  # Create a temporary directory for PDF files
326
  pdf_dir = tempfile.mkdtemp()
327
  os.makedirs(pdf_dir, exist_ok=True)
 
345
  database_creation(pdf_file)
346
 
347
  # Generate answers
348
+ queries = query_file.read().decode('utf-8').splitlines()
349
+ queries = [q.strip() for q in queries if q.strip()]
350
+ if not queries:
351
+ error_msg = "Query file is empty"
352
+ log_print(error_msg, "ERROR")
353
+ return jsonify({"error": error_msg}), 400
354
+
355
+ c_answers = []
356
  for query in queries:
357
  ans = []
358
  for pdf_file in pdf_files: