yamanavijayavardhan commited on
Commit
e31bcf4
·
1 Parent(s): 84e4e9f

printing extracted text10

Browse files
Files changed (1) hide show
  1. main.py +22 -26
main.py CHANGED
@@ -277,47 +277,40 @@ def new_value(value, old_min, old_max, new_min, new_max):
277
  @app.route('/compute_answers', methods=['POST'])
278
  def compute_answers():
279
  try:
280
- query_file = request.files.get('query_file')
281
- if not query_file:
282
- error_msg = "Missing query file"
283
- log_print(error_msg, "ERROR")
284
- return jsonify({"error": error_msg}), 400
285
-
286
  file_type = request.form.get('file_type')
287
  log_print(f"Processing file type: {file_type}")
288
 
289
- # For CSV files, skip model initialization
290
  if file_type == "csv":
291
  ans_csv_file = request.files.get('ans_csv_file')
292
  if not ans_csv_file:
293
- error_msg = "Missing answer CSV file"
294
- log_print(error_msg, "ERROR")
295
- return jsonify({"error": error_msg}), 400
296
 
297
  try:
298
- # Read and process CSV answers directly without loading models
299
- log_print("Reading CSV answer file...")
300
- ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
 
 
 
301
  c_answers = []
302
- for line in ans_csv_content:
303
- if line.strip(): # Skip empty lines
304
  answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
305
- c_answers.append(answers)
 
306
 
307
  if not c_answers:
308
- error_msg = "No valid answers found in CSV file"
309
- log_print(error_msg, "ERROR")
310
- return jsonify({"error": error_msg}), 400
311
 
312
  log_print(f"Successfully processed {len(c_answers)} answers from CSV")
313
  return jsonify({"answers": c_answers}), 200
314
 
315
  except Exception as e:
316
- error_msg = f"Error processing CSV file: {str(e)}"
317
- log_print(error_msg, "ERROR")
318
- return jsonify({"error": error_msg}), 400
319
 
320
- # Only initialize models for PDF processing
321
  elif file_type == 'pdf':
322
  # Wait for initialization only for PDF files
323
  wait_for_initialization()
@@ -345,7 +338,7 @@ def compute_answers():
345
  database_creation(pdf_file)
346
 
347
  # Generate answers
348
- queries = query_file.read().decode('utf-8').splitlines()
349
  queries = [q.strip() for q in queries if q.strip()]
350
  if not queries:
351
  error_msg = "Query file is empty"
@@ -810,12 +803,15 @@ def wait_for_initialization():
810
  initialization_complete.wait()
811
  return True
812
 
813
- # Add this to the compute_marks route before processing files
814
  @app.before_request
815
  def ensure_initialization():
816
  """Ensure all resources are initialized before processing requests"""
817
- if request.endpoint in ['compute_marks', 'compute_answers']:
818
  wait_for_initialization()
 
 
 
 
819
 
820
  def cleanup_temp_files():
821
  """Clean up temporary files with proper error handling"""
 
277
  @app.route('/compute_answers', methods=['POST'])
278
  def compute_answers():
279
  try:
 
 
 
 
 
 
280
  file_type = request.form.get('file_type')
281
  log_print(f"Processing file type: {file_type}")
282
 
283
+ # For CSV files, process directly without any model initialization
284
  if file_type == "csv":
285
  ans_csv_file = request.files.get('ans_csv_file')
286
  if not ans_csv_file:
287
+ return jsonify({"error": "Missing answer CSV file"}), 400
 
 
288
 
289
  try:
290
+ # Read CSV content directly without unnecessary processing
291
+ content = ans_csv_file.read().decode('utf-8')
292
+ if not content.strip():
293
+ return jsonify({"error": "CSV file is empty"}), 400
294
+
295
+ # Process answers more efficiently
296
  c_answers = []
297
+ for line in content.splitlines():
298
+ if line.strip():
299
  answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
300
+ if answers: # Only add if there are valid answers
301
+ c_answers.append(answers)
302
 
303
  if not c_answers:
304
+ return jsonify({"error": "No valid answers found in CSV file"}), 400
 
 
305
 
306
  log_print(f"Successfully processed {len(c_answers)} answers from CSV")
307
  return jsonify({"answers": c_answers}), 200
308
 
309
  except Exception as e:
310
+ log_print(f"Error processing CSV file: {str(e)}", "ERROR")
311
+ return jsonify({"error": f"Error processing CSV file: {str(e)}"}), 400
 
312
 
313
+ # For PDF files, continue with existing processing
314
  elif file_type == 'pdf':
315
  # Wait for initialization only for PDF files
316
  wait_for_initialization()
 
338
  database_creation(pdf_file)
339
 
340
  # Generate answers
341
+ queries = request.files.get('query_file').read().decode('utf-8').splitlines()
342
  queries = [q.strip() for q in queries if q.strip()]
343
  if not queries:
344
  error_msg = "Query file is empty"
 
803
  initialization_complete.wait()
804
  return True
805
 
 
806
  @app.before_request
807
  def ensure_initialization():
808
  """Ensure all resources are initialized before processing requests"""
809
+ if request.endpoint == 'compute_marks':
810
  wait_for_initialization()
811
+ elif request.endpoint == 'compute_answers':
812
+ # Only wait for initialization if processing PDF files
813
+ if request.form.get('file_type') == 'pdf':
814
+ wait_for_initialization()
815
 
816
  def cleanup_temp_files():
817
  """Clean up temporary files with proper error handling"""