Commit
·
e31bcf4
1
Parent(s):
84e4e9f
printing extracted text10
Browse files
main.py
CHANGED
@@ -277,47 +277,40 @@ def new_value(value, old_min, old_max, new_min, new_max):
|
|
277 |
@app.route('/compute_answers', methods=['POST'])
|
278 |
def compute_answers():
|
279 |
try:
|
280 |
-
query_file = request.files.get('query_file')
|
281 |
-
if not query_file:
|
282 |
-
error_msg = "Missing query file"
|
283 |
-
log_print(error_msg, "ERROR")
|
284 |
-
return jsonify({"error": error_msg}), 400
|
285 |
-
|
286 |
file_type = request.form.get('file_type')
|
287 |
log_print(f"Processing file type: {file_type}")
|
288 |
|
289 |
-
# For CSV files,
|
290 |
if file_type == "csv":
|
291 |
ans_csv_file = request.files.get('ans_csv_file')
|
292 |
if not ans_csv_file:
|
293 |
-
|
294 |
-
log_print(error_msg, "ERROR")
|
295 |
-
return jsonify({"error": error_msg}), 400
|
296 |
|
297 |
try:
|
298 |
-
# Read
|
299 |
-
|
300 |
-
|
|
|
|
|
|
|
301 |
c_answers = []
|
302 |
-
for line in
|
303 |
-
if line.strip():
|
304 |
answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
|
305 |
-
|
|
|
306 |
|
307 |
if not c_answers:
|
308 |
-
|
309 |
-
log_print(error_msg, "ERROR")
|
310 |
-
return jsonify({"error": error_msg}), 400
|
311 |
|
312 |
log_print(f"Successfully processed {len(c_answers)} answers from CSV")
|
313 |
return jsonify({"answers": c_answers}), 200
|
314 |
|
315 |
except Exception as e:
|
316 |
-
|
317 |
-
|
318 |
-
return jsonify({"error": error_msg}), 400
|
319 |
|
320 |
-
#
|
321 |
elif file_type == 'pdf':
|
322 |
# Wait for initialization only for PDF files
|
323 |
wait_for_initialization()
|
@@ -345,7 +338,7 @@ def compute_answers():
|
|
345 |
database_creation(pdf_file)
|
346 |
|
347 |
# Generate answers
|
348 |
-
queries = query_file.read().decode('utf-8').splitlines()
|
349 |
queries = [q.strip() for q in queries if q.strip()]
|
350 |
if not queries:
|
351 |
error_msg = "Query file is empty"
|
@@ -810,12 +803,15 @@ def wait_for_initialization():
|
|
810 |
initialization_complete.wait()
|
811 |
return True
|
812 |
|
813 |
-
# Add this to the compute_marks route before processing files
|
814 |
@app.before_request
|
815 |
def ensure_initialization():
|
816 |
"""Ensure all resources are initialized before processing requests"""
|
817 |
-
if request.endpoint
|
818 |
wait_for_initialization()
|
|
|
|
|
|
|
|
|
819 |
|
820 |
def cleanup_temp_files():
|
821 |
"""Clean up temporary files with proper error handling"""
|
|
|
277 |
@app.route('/compute_answers', methods=['POST'])
|
278 |
def compute_answers():
|
279 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
file_type = request.form.get('file_type')
|
281 |
log_print(f"Processing file type: {file_type}")
|
282 |
|
283 |
+
# For CSV files, process directly without any model initialization
|
284 |
if file_type == "csv":
|
285 |
ans_csv_file = request.files.get('ans_csv_file')
|
286 |
if not ans_csv_file:
|
287 |
+
return jsonify({"error": "Missing answer CSV file"}), 400
|
|
|
|
|
288 |
|
289 |
try:
|
290 |
+
# Read CSV content directly without unnecessary processing
|
291 |
+
content = ans_csv_file.read().decode('utf-8')
|
292 |
+
if not content.strip():
|
293 |
+
return jsonify({"error": "CSV file is empty"}), 400
|
294 |
+
|
295 |
+
# Process answers more efficiently
|
296 |
c_answers = []
|
297 |
+
for line in content.splitlines():
|
298 |
+
if line.strip():
|
299 |
answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
|
300 |
+
if answers: # Only add if there are valid answers
|
301 |
+
c_answers.append(answers)
|
302 |
|
303 |
if not c_answers:
|
304 |
+
return jsonify({"error": "No valid answers found in CSV file"}), 400
|
|
|
|
|
305 |
|
306 |
log_print(f"Successfully processed {len(c_answers)} answers from CSV")
|
307 |
return jsonify({"answers": c_answers}), 200
|
308 |
|
309 |
except Exception as e:
|
310 |
+
log_print(f"Error processing CSV file: {str(e)}", "ERROR")
|
311 |
+
return jsonify({"error": f"Error processing CSV file: {str(e)}"}), 400
|
|
|
312 |
|
313 |
+
# For PDF files, continue with existing processing
|
314 |
elif file_type == 'pdf':
|
315 |
# Wait for initialization only for PDF files
|
316 |
wait_for_initialization()
|
|
|
338 |
database_creation(pdf_file)
|
339 |
|
340 |
# Generate answers
|
341 |
+
queries = request.files.get('query_file').read().decode('utf-8').splitlines()
|
342 |
queries = [q.strip() for q in queries if q.strip()]
|
343 |
if not queries:
|
344 |
error_msg = "Query file is empty"
|
|
|
803 |
initialization_complete.wait()
|
804 |
return True
|
805 |
|
|
|
806 |
@app.before_request
|
807 |
def ensure_initialization():
|
808 |
"""Ensure all resources are initialized before processing requests"""
|
809 |
+
if request.endpoint == 'compute_marks':
|
810 |
wait_for_initialization()
|
811 |
+
elif request.endpoint == 'compute_answers':
|
812 |
+
# Only wait for initialization if processing PDF files
|
813 |
+
if request.form.get('file_type') == 'pdf':
|
814 |
+
wait_for_initialization()
|
815 |
|
816 |
def cleanup_temp_files():
|
817 |
"""Clean up temporary files with proper error handling"""
|