Commit
·
8331040
1
Parent(s):
d8359af
printing extracted text8
Browse files- main.py +81 -65
- templates/2.html +19 -11
main.py
CHANGED
@@ -280,87 +280,103 @@ def compute_answers():
|
|
280 |
query_file = request.files.get('query_file')
|
281 |
if not query_file:
|
282 |
error_msg = "Missing query file"
|
283 |
-
|
284 |
-
"type": "error",
|
285 |
-
"message": error_msg
|
286 |
-
})
|
287 |
return jsonify({"error": error_msg}), 400
|
288 |
|
289 |
-
|
290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
291 |
|
292 |
file_type = request.form.get('file_type')
|
293 |
-
|
294 |
|
295 |
if file_type == "csv":
|
296 |
-
ans_csv_file =
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
return jsonify({"answers": c_answers}), 200
|
302 |
-
|
303 |
-
try:
|
304 |
-
c_answers = []
|
305 |
|
306 |
-
|
307 |
-
#
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
filename = secure_filename(file.filename)
|
316 |
-
filepath = os.path.join(pdf_dir, filename)
|
317 |
-
file.save(filepath)
|
318 |
-
pdf_files.append(filepath)
|
319 |
|
320 |
-
if not
|
321 |
-
|
|
|
|
|
322 |
|
323 |
-
|
|
|
324 |
|
325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
for pdf_file in pdf_files:
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
# Clean up PDF directory
|
337 |
-
try:
|
338 |
-
shutil.rmtree(pdf_dir)
|
339 |
-
except Exception as e:
|
340 |
-
logger.warning(f"Could not clean up PDF directory: {e}")
|
341 |
-
|
342 |
-
else:
|
343 |
-
return jsonify({"error": "Unsupported file type"}), 400
|
344 |
-
|
345 |
logger.info(f"Generated answers: {c_answers}")
|
346 |
return jsonify({"answers": c_answers}), 200
|
347 |
|
348 |
-
|
349 |
-
|
350 |
-
error_msg
|
351 |
-
|
352 |
-
"type": "error",
|
353 |
-
"message": error_msg
|
354 |
-
})
|
355 |
-
return jsonify({"error": error_msg}), 500
|
356 |
|
357 |
except Exception as e:
|
358 |
-
|
359 |
-
error_msg
|
360 |
-
notification_queue.put({
|
361 |
-
"type": "error",
|
362 |
-
"message": error_msg
|
363 |
-
})
|
364 |
return jsonify({"error": error_msg}), 500
|
365 |
|
366 |
def validate_folder_structure(files):
|
|
|
280 |
query_file = request.files.get('query_file')
|
281 |
if not query_file:
|
282 |
error_msg = "Missing query file"
|
283 |
+
log_print(error_msg, "ERROR")
|
|
|
|
|
|
|
284 |
return jsonify({"error": error_msg}), 400
|
285 |
|
286 |
+
# Read and validate query file
|
287 |
+
try:
|
288 |
+
queries = query_file.read().decode('utf-8').splitlines()
|
289 |
+
queries = [q.strip() for q in queries if q.strip()]
|
290 |
+
if not queries:
|
291 |
+
error_msg = "Query file is empty"
|
292 |
+
log_print(error_msg, "ERROR")
|
293 |
+
return jsonify({"error": error_msg}), 400
|
294 |
+
log_print(f"Successfully read {len(queries)} queries")
|
295 |
+
except Exception as e:
|
296 |
+
error_msg = f"Error reading query file: {str(e)}"
|
297 |
+
log_print(error_msg, "ERROR")
|
298 |
+
return jsonify({"error": error_msg}), 400
|
299 |
|
300 |
file_type = request.form.get('file_type')
|
301 |
+
log_print(f"Processing file type: {file_type}")
|
302 |
|
303 |
if file_type == "csv":
|
304 |
+
ans_csv_file = request.files.get('ans_csv_file')
|
305 |
+
if not ans_csv_file:
|
306 |
+
error_msg = "Missing answer CSV file"
|
307 |
+
log_print(error_msg, "ERROR")
|
308 |
+
return jsonify({"error": error_msg}), 400
|
|
|
|
|
|
|
|
|
309 |
|
310 |
+
try:
|
311 |
+
# Read and process CSV answers
|
312 |
+
log_print("Reading CSV answer file...")
|
313 |
+
ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
|
314 |
+
c_answers = []
|
315 |
+
for line in ans_csv_content:
|
316 |
+
if line.strip(): # Skip empty lines
|
317 |
+
answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
|
318 |
+
c_answers.append(answers)
|
|
|
|
|
|
|
|
|
319 |
|
320 |
+
if not c_answers:
|
321 |
+
error_msg = "No valid answers found in CSV file"
|
322 |
+
log_print(error_msg, "ERROR")
|
323 |
+
return jsonify({"error": error_msg}), 400
|
324 |
|
325 |
+
log_print(f"Successfully processed {len(c_answers)} answers from CSV")
|
326 |
+
return jsonify({"answers": c_answers}), 200
|
327 |
|
328 |
+
except Exception as e:
|
329 |
+
error_msg = f"Error processing CSV file: {str(e)}"
|
330 |
+
log_print(error_msg, "ERROR")
|
331 |
+
return jsonify({"error": error_msg}), 400
|
332 |
+
|
333 |
+
elif file_type == 'pdf':
|
334 |
+
# Create a temporary directory for PDF files
|
335 |
+
pdf_dir = tempfile.mkdtemp()
|
336 |
+
os.makedirs(pdf_dir, exist_ok=True)
|
337 |
+
|
338 |
+
# Save uploaded PDF files
|
339 |
+
pdf_files = []
|
340 |
+
for file in request.files.getlist('pdf_files[]'):
|
341 |
+
if file.filename.endswith('.pdf'):
|
342 |
+
filename = secure_filename(file.filename)
|
343 |
+
filepath = os.path.join(pdf_dir, filename)
|
344 |
+
file.save(filepath)
|
345 |
+
pdf_files.append(filepath)
|
346 |
+
|
347 |
+
if not pdf_files:
|
348 |
+
return jsonify({"error": "No PDF files uploaded"}), 400
|
349 |
+
|
350 |
+
logger.info(f"Processing {len(pdf_files)} PDF files")
|
351 |
+
|
352 |
+
# Process PDFs
|
353 |
+
for pdf_file in pdf_files:
|
354 |
+
database_creation(pdf_file)
|
355 |
+
|
356 |
+
# Generate answers
|
357 |
+
for query in queries:
|
358 |
+
ans = []
|
359 |
for pdf_file in pdf_files:
|
360 |
+
ans.append(answer_generation(pdf_file, query))
|
361 |
+
c_answers.append(ans)
|
362 |
+
|
363 |
+
# Clean up PDF directory
|
364 |
+
try:
|
365 |
+
shutil.rmtree(pdf_dir)
|
366 |
+
except Exception as e:
|
367 |
+
logger.warning(f"Could not clean up PDF directory: {e}")
|
368 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
logger.info(f"Generated answers: {c_answers}")
|
370 |
return jsonify({"answers": c_answers}), 200
|
371 |
|
372 |
+
else:
|
373 |
+
error_msg = "Unsupported file type"
|
374 |
+
log_print(error_msg, "ERROR")
|
375 |
+
return jsonify({"error": error_msg}), 400
|
|
|
|
|
|
|
|
|
376 |
|
377 |
except Exception as e:
|
378 |
+
error_msg = f"Error in compute_answers: {str(e)}"
|
379 |
+
log_print(error_msg, "ERROR")
|
|
|
|
|
|
|
|
|
380 |
return jsonify({"error": error_msg}), 500
|
381 |
|
382 |
def validate_folder_structure(files):
|
templates/2.html
CHANGED
@@ -1052,46 +1052,49 @@
|
|
1052 |
|
1053 |
async function computeAnswers() {
|
1054 |
try {
|
1055 |
-
|
|
|
|
|
1056 |
const fileType = document.getElementById('file-type').value;
|
1057 |
const queryfile = document.getElementById('query-file').files[0];
|
1058 |
-
|
1059 |
-
const pdfFiles = document.getElementById('pdf-files').files;
|
1060 |
-
|
1061 |
if (!queryfile) {
|
|
|
1062 |
notificationSystem.error("Please upload a query file first!");
|
1063 |
-
hideLoading();
|
1064 |
return;
|
1065 |
}
|
1066 |
|
1067 |
-
|
1068 |
const formData = new FormData();
|
1069 |
formData.append('file_type', fileType);
|
1070 |
formData.append('query_file', queryfile);
|
1071 |
|
1072 |
if (fileType === 'csv') {
|
|
|
1073 |
if (!anscsvFile) {
|
|
|
1074 |
notificationSystem.error("Please upload a CSV file for answers!");
|
1075 |
-
hideLoading();
|
1076 |
return;
|
1077 |
}
|
1078 |
formData.append('ans_csv_file', anscsvFile);
|
1079 |
-
|
1080 |
} else if (fileType === 'pdf') {
|
|
|
1081 |
if (!pdfFiles || pdfFiles.length < 2) {
|
|
|
1082 |
notificationSystem.error("Please upload at least 2 PDF files!");
|
1083 |
-
hideLoading();
|
1084 |
return;
|
1085 |
}
|
1086 |
for (let file of pdfFiles) {
|
1087 |
formData.append('pdf_files[]', file);
|
1088 |
}
|
1089 |
-
|
1090 |
}
|
1091 |
|
1092 |
const computeBtn = document.getElementById('compute-btn');
|
1093 |
computeBtn.disabled = true;
|
1094 |
|
|
|
1095 |
const response = await fetch('/compute_answers', {
|
1096 |
method: 'POST',
|
1097 |
body: formData
|
@@ -1107,12 +1110,15 @@
|
|
1107 |
}
|
1108 |
|
1109 |
if (result.answers) {
|
|
|
|
|
1110 |
// Check for empty answers
|
1111 |
const emptyAnswers = result.answers.filter(answer =>
|
1112 |
!answer || (Array.isArray(answer) && answer.every(a => !a || a.trim() === ''))
|
1113 |
);
|
1114 |
|
1115 |
if (emptyAnswers.length > 0) {
|
|
|
1116 |
notificationSystem.warning(`Warning: ${emptyAnswers.length} empty answer(s) detected. Please check your input files.`);
|
1117 |
}
|
1118 |
|
@@ -1133,17 +1139,19 @@
|
|
1133 |
: [firstValidAnswer]
|
1134 |
});
|
1135 |
} else {
|
|
|
1136 |
notificationSystem.warning("No valid answers found in the input files.");
|
1137 |
}
|
1138 |
|
1139 |
displayAnswers(result.answers);
|
|
|
1140 |
notificationSystem.success("Successfully generated answers!");
|
1141 |
} else {
|
1142 |
throw new Error('No answers received from server');
|
1143 |
}
|
1144 |
|
1145 |
} catch (error) {
|
1146 |
-
|
1147 |
notificationSystem.error('Error: ' + error.message);
|
1148 |
} finally {
|
1149 |
hideLoading();
|
|
|
1052 |
|
1053 |
async function computeAnswers() {
|
1054 |
try {
|
1055 |
+
showLogModal();
|
1056 |
+
addLogMessage("Starting answer computation...", "info");
|
1057 |
+
|
1058 |
const fileType = document.getElementById('file-type').value;
|
1059 |
const queryfile = document.getElementById('query-file').files[0];
|
1060 |
+
|
|
|
|
|
1061 |
if (!queryfile) {
|
1062 |
+
addLogMessage("Error: Please upload a query file first!", "error");
|
1063 |
notificationSystem.error("Please upload a query file first!");
|
|
|
1064 |
return;
|
1065 |
}
|
1066 |
|
1067 |
+
addLogMessage("Processing files...", "info");
|
1068 |
const formData = new FormData();
|
1069 |
formData.append('file_type', fileType);
|
1070 |
formData.append('query_file', queryfile);
|
1071 |
|
1072 |
if (fileType === 'csv') {
|
1073 |
+
const anscsvFile = document.getElementById('csv-file').files[0];
|
1074 |
if (!anscsvFile) {
|
1075 |
+
addLogMessage("Error: Please upload a CSV file for answers!", "error");
|
1076 |
notificationSystem.error("Please upload a CSV file for answers!");
|
|
|
1077 |
return;
|
1078 |
}
|
1079 |
formData.append('ans_csv_file', anscsvFile);
|
1080 |
+
addLogMessage("Processing CSV file...", "info");
|
1081 |
} else if (fileType === 'pdf') {
|
1082 |
+
const pdfFiles = document.getElementById('pdf-files').files;
|
1083 |
if (!pdfFiles || pdfFiles.length < 2) {
|
1084 |
+
addLogMessage("Error: Please upload at least 2 PDF files!", "error");
|
1085 |
notificationSystem.error("Please upload at least 2 PDF files!");
|
|
|
1086 |
return;
|
1087 |
}
|
1088 |
for (let file of pdfFiles) {
|
1089 |
formData.append('pdf_files[]', file);
|
1090 |
}
|
1091 |
+
addLogMessage(`Processing ${pdfFiles.length} PDF files...`, "info");
|
1092 |
}
|
1093 |
|
1094 |
const computeBtn = document.getElementById('compute-btn');
|
1095 |
computeBtn.disabled = true;
|
1096 |
|
1097 |
+
addLogMessage("Sending request to server...", "info");
|
1098 |
const response = await fetch('/compute_answers', {
|
1099 |
method: 'POST',
|
1100 |
body: formData
|
|
|
1110 |
}
|
1111 |
|
1112 |
if (result.answers) {
|
1113 |
+
addLogMessage("Successfully received answers from server", "success");
|
1114 |
+
|
1115 |
// Check for empty answers
|
1116 |
const emptyAnswers = result.answers.filter(answer =>
|
1117 |
!answer || (Array.isArray(answer) && answer.every(a => !a || a.trim() === ''))
|
1118 |
);
|
1119 |
|
1120 |
if (emptyAnswers.length > 0) {
|
1121 |
+
addLogMessage(`Warning: ${emptyAnswers.length} empty answer(s) detected`, "warning");
|
1122 |
notificationSystem.warning(`Warning: ${emptyAnswers.length} empty answer(s) detected. Please check your input files.`);
|
1123 |
}
|
1124 |
|
|
|
1139 |
: [firstValidAnswer]
|
1140 |
});
|
1141 |
} else {
|
1142 |
+
addLogMessage("Warning: No valid answers found in the input files", "warning");
|
1143 |
notificationSystem.warning("No valid answers found in the input files.");
|
1144 |
}
|
1145 |
|
1146 |
displayAnswers(result.answers);
|
1147 |
+
addLogMessage("Successfully displayed answers!", "success");
|
1148 |
notificationSystem.success("Successfully generated answers!");
|
1149 |
} else {
|
1150 |
throw new Error('No answers received from server');
|
1151 |
}
|
1152 |
|
1153 |
} catch (error) {
|
1154 |
+
addLogMessage(`Error: ${error.message}`, "error");
|
1155 |
notificationSystem.error('Error: ' + error.message);
|
1156 |
} finally {
|
1157 |
hideLoading();
|