Commit
·
84e4e9f
1
Parent(s):
8331040
printing extracted text9
Browse files
main.py
CHANGED
@@ -283,23 +283,10 @@ def compute_answers():
|
|
283 |
log_print(error_msg, "ERROR")
|
284 |
return jsonify({"error": error_msg}), 400
|
285 |
|
286 |
-
# Read and validate query file
|
287 |
-
try:
|
288 |
-
queries = query_file.read().decode('utf-8').splitlines()
|
289 |
-
queries = [q.strip() for q in queries if q.strip()]
|
290 |
-
if not queries:
|
291 |
-
error_msg = "Query file is empty"
|
292 |
-
log_print(error_msg, "ERROR")
|
293 |
-
return jsonify({"error": error_msg}), 400
|
294 |
-
log_print(f"Successfully read {len(queries)} queries")
|
295 |
-
except Exception as e:
|
296 |
-
error_msg = f"Error reading query file: {str(e)}"
|
297 |
-
log_print(error_msg, "ERROR")
|
298 |
-
return jsonify({"error": error_msg}), 400
|
299 |
-
|
300 |
file_type = request.form.get('file_type')
|
301 |
log_print(f"Processing file type: {file_type}")
|
302 |
|
|
|
303 |
if file_type == "csv":
|
304 |
ans_csv_file = request.files.get('ans_csv_file')
|
305 |
if not ans_csv_file:
|
@@ -308,7 +295,7 @@ def compute_answers():
|
|
308 |
return jsonify({"error": error_msg}), 400
|
309 |
|
310 |
try:
|
311 |
-
# Read and process CSV answers
|
312 |
log_print("Reading CSV answer file...")
|
313 |
ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
|
314 |
c_answers = []
|
@@ -330,7 +317,11 @@ def compute_answers():
|
|
330 |
log_print(error_msg, "ERROR")
|
331 |
return jsonify({"error": error_msg}), 400
|
332 |
|
|
|
333 |
elif file_type == 'pdf':
|
|
|
|
|
|
|
334 |
# Create a temporary directory for PDF files
|
335 |
pdf_dir = tempfile.mkdtemp()
|
336 |
os.makedirs(pdf_dir, exist_ok=True)
|
@@ -354,6 +345,14 @@ def compute_answers():
|
|
354 |
database_creation(pdf_file)
|
355 |
|
356 |
# Generate answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
for query in queries:
|
358 |
ans = []
|
359 |
for pdf_file in pdf_files:
|
|
|
283 |
log_print(error_msg, "ERROR")
|
284 |
return jsonify({"error": error_msg}), 400
|
285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
file_type = request.form.get('file_type')
|
287 |
log_print(f"Processing file type: {file_type}")
|
288 |
|
289 |
+
# For CSV files, skip model initialization
|
290 |
if file_type == "csv":
|
291 |
ans_csv_file = request.files.get('ans_csv_file')
|
292 |
if not ans_csv_file:
|
|
|
295 |
return jsonify({"error": error_msg}), 400
|
296 |
|
297 |
try:
|
298 |
+
# Read and process CSV answers directly without loading models
|
299 |
log_print("Reading CSV answer file...")
|
300 |
ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
|
301 |
c_answers = []
|
|
|
317 |
log_print(error_msg, "ERROR")
|
318 |
return jsonify({"error": error_msg}), 400
|
319 |
|
320 |
+
# Only initialize models for PDF processing
|
321 |
elif file_type == 'pdf':
|
322 |
+
# Wait for initialization only for PDF files
|
323 |
+
wait_for_initialization()
|
324 |
+
|
325 |
# Create a temporary directory for PDF files
|
326 |
pdf_dir = tempfile.mkdtemp()
|
327 |
os.makedirs(pdf_dir, exist_ok=True)
|
|
|
345 |
database_creation(pdf_file)
|
346 |
|
347 |
# Generate answers
|
348 |
+
queries = query_file.read().decode('utf-8').splitlines()
|
349 |
+
queries = [q.strip() for q in queries if q.strip()]
|
350 |
+
if not queries:
|
351 |
+
error_msg = "Query file is empty"
|
352 |
+
log_print(error_msg, "ERROR")
|
353 |
+
return jsonify({"error": error_msg}), 400
|
354 |
+
|
355 |
+
c_answers = []
|
356 |
for query in queries:
|
357 |
ans = []
|
358 |
for pdf_file in pdf_files:
|