yamanavijayavardhan commited on
Commit
8331040
·
1 Parent(s): d8359af

printing extracted text8

Browse files
Files changed (2) hide show
  1. main.py +81 -65
  2. templates/2.html +19 -11
main.py CHANGED
@@ -280,87 +280,103 @@ def compute_answers():
280
  query_file = request.files.get('query_file')
281
  if not query_file:
282
  error_msg = "Missing query file"
283
- notification_queue.put({
284
- "type": "error",
285
- "message": error_msg
286
- })
287
  return jsonify({"error": error_msg}), 400
288
 
289
- queries = query_file.read().decode('utf-8').splitlines()
290
- logger.info(f"Received queries: {queries}")
 
 
 
 
 
 
 
 
 
 
 
291
 
292
  file_type = request.form.get('file_type')
293
- ans_csv_file = request.files.get('ans_csv_file')
294
 
295
  if file_type == "csv":
296
- ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
297
- c_answers = []
298
- for i in ans_csv_file:
299
- c_answers.append(i.split('\\n'))
300
- logger.info(f"Processed CSV answers: {c_answers}")
301
- return jsonify({"answers": c_answers}), 200
302
-
303
- try:
304
- c_answers = []
305
 
306
- if file_type == 'pdf':
307
- # Create a temporary directory for PDF files
308
- pdf_dir = tempfile.mkdtemp()
309
- os.makedirs(pdf_dir, exist_ok=True)
310
-
311
- # Save uploaded PDF files
312
- pdf_files = []
313
- for file in request.files.getlist('pdf_files[]'):
314
- if file.filename.endswith('.pdf'):
315
- filename = secure_filename(file.filename)
316
- filepath = os.path.join(pdf_dir, filename)
317
- file.save(filepath)
318
- pdf_files.append(filepath)
319
 
320
- if not pdf_files:
321
- return jsonify({"error": "No PDF files uploaded"}), 400
 
 
322
 
323
- logger.info(f"Processing {len(pdf_files)} PDF files")
 
324
 
325
- # Process PDFs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  for pdf_file in pdf_files:
327
- database_creation(pdf_file)
328
-
329
- # Generate answers
330
- for query in queries:
331
- ans = []
332
- for pdf_file in pdf_files:
333
- ans.append(answer_generation(pdf_file, query))
334
- c_answers.append(ans)
335
-
336
- # Clean up PDF directory
337
- try:
338
- shutil.rmtree(pdf_dir)
339
- except Exception as e:
340
- logger.warning(f"Could not clean up PDF directory: {e}")
341
-
342
- else:
343
- return jsonify({"error": "Unsupported file type"}), 400
344
-
345
  logger.info(f"Generated answers: {c_answers}")
346
  return jsonify({"answers": c_answers}), 200
347
 
348
- except Exception as e:
349
- logger.error(f"Error processing files: {str(e)}")
350
- error_msg = str(e)
351
- notification_queue.put({
352
- "type": "error",
353
- "message": error_msg
354
- })
355
- return jsonify({"error": error_msg}), 500
356
 
357
  except Exception as e:
358
- logger.error(f"Error in compute_answers: {str(e)}")
359
- error_msg = str(e)
360
- notification_queue.put({
361
- "type": "error",
362
- "message": error_msg
363
- })
364
  return jsonify({"error": error_msg}), 500
365
 
366
  def validate_folder_structure(files):
 
280
  query_file = request.files.get('query_file')
281
  if not query_file:
282
  error_msg = "Missing query file"
283
+ log_print(error_msg, "ERROR")
 
 
 
284
  return jsonify({"error": error_msg}), 400
285
 
286
+ # Read and validate query file
287
+ try:
288
+ queries = query_file.read().decode('utf-8').splitlines()
289
+ queries = [q.strip() for q in queries if q.strip()]
290
+ if not queries:
291
+ error_msg = "Query file is empty"
292
+ log_print(error_msg, "ERROR")
293
+ return jsonify({"error": error_msg}), 400
294
+ log_print(f"Successfully read {len(queries)} queries")
295
+ except Exception as e:
296
+ error_msg = f"Error reading query file: {str(e)}"
297
+ log_print(error_msg, "ERROR")
298
+ return jsonify({"error": error_msg}), 400
299
 
300
  file_type = request.form.get('file_type')
301
+ log_print(f"Processing file type: {file_type}")
302
 
303
  if file_type == "csv":
304
+ ans_csv_file = request.files.get('ans_csv_file')
305
+ if not ans_csv_file:
306
+ error_msg = "Missing answer CSV file"
307
+ log_print(error_msg, "ERROR")
308
+ return jsonify({"error": error_msg}), 400
 
 
 
 
309
 
310
+ try:
311
+ # Read and process CSV answers
312
+ log_print("Reading CSV answer file...")
313
+ ans_csv_content = ans_csv_file.read().decode('utf-8').splitlines()
314
+ c_answers = []
315
+ for line in ans_csv_content:
316
+ if line.strip(): # Skip empty lines
317
+ answers = [ans.strip() for ans in line.split('\\n') if ans.strip()]
318
+ c_answers.append(answers)
 
 
 
 
319
 
320
+ if not c_answers:
321
+ error_msg = "No valid answers found in CSV file"
322
+ log_print(error_msg, "ERROR")
323
+ return jsonify({"error": error_msg}), 400
324
 
325
+ log_print(f"Successfully processed {len(c_answers)} answers from CSV")
326
+ return jsonify({"answers": c_answers}), 200
327
 
328
+ except Exception as e:
329
+ error_msg = f"Error processing CSV file: {str(e)}"
330
+ log_print(error_msg, "ERROR")
331
+ return jsonify({"error": error_msg}), 400
332
+
333
+ elif file_type == 'pdf':
334
+ # Create a temporary directory for PDF files
335
+ pdf_dir = tempfile.mkdtemp()
336
+ os.makedirs(pdf_dir, exist_ok=True)
337
+
338
+ # Save uploaded PDF files
339
+ pdf_files = []
340
+ for file in request.files.getlist('pdf_files[]'):
341
+ if file.filename.endswith('.pdf'):
342
+ filename = secure_filename(file.filename)
343
+ filepath = os.path.join(pdf_dir, filename)
344
+ file.save(filepath)
345
+ pdf_files.append(filepath)
346
+
347
+ if not pdf_files:
348
+ return jsonify({"error": "No PDF files uploaded"}), 400
349
+
350
+ logger.info(f"Processing {len(pdf_files)} PDF files")
351
+
352
+ # Process PDFs
353
+ for pdf_file in pdf_files:
354
+ database_creation(pdf_file)
355
+
356
+ # Generate answers
357
+ for query in queries:
358
+ ans = []
359
  for pdf_file in pdf_files:
360
+ ans.append(answer_generation(pdf_file, query))
361
+ c_answers.append(ans)
362
+
363
+ # Clean up PDF directory
364
+ try:
365
+ shutil.rmtree(pdf_dir)
366
+ except Exception as e:
367
+ logger.warning(f"Could not clean up PDF directory: {e}")
368
+
 
 
 
 
 
 
 
 
 
369
  logger.info(f"Generated answers: {c_answers}")
370
  return jsonify({"answers": c_answers}), 200
371
 
372
+ else:
373
+ error_msg = "Unsupported file type"
374
+ log_print(error_msg, "ERROR")
375
+ return jsonify({"error": error_msg}), 400
 
 
 
 
376
 
377
  except Exception as e:
378
+ error_msg = f"Error in compute_answers: {str(e)}"
379
+ log_print(error_msg, "ERROR")
 
 
 
 
380
  return jsonify({"error": error_msg}), 500
381
 
382
  def validate_folder_structure(files):
templates/2.html CHANGED
@@ -1052,46 +1052,49 @@
1052
 
1053
  async function computeAnswers() {
1054
  try {
1055
- showLoading();
 
 
1056
  const fileType = document.getElementById('file-type').value;
1057
  const queryfile = document.getElementById('query-file').files[0];
1058
- const anscsvFile = document.getElementById('csv-file').files[0];
1059
- const pdfFiles = document.getElementById('pdf-files').files;
1060
-
1061
  if (!queryfile) {
 
1062
  notificationSystem.error("Please upload a query file first!");
1063
- hideLoading();
1064
  return;
1065
  }
1066
 
1067
- notificationSystem.info("Processing files...");
1068
  const formData = new FormData();
1069
  formData.append('file_type', fileType);
1070
  formData.append('query_file', queryfile);
1071
 
1072
  if (fileType === 'csv') {
 
1073
  if (!anscsvFile) {
 
1074
  notificationSystem.error("Please upload a CSV file for answers!");
1075
- hideLoading();
1076
  return;
1077
  }
1078
  formData.append('ans_csv_file', anscsvFile);
1079
- notificationSystem.info("Processing CSV file...");
1080
  } else if (fileType === 'pdf') {
 
1081
  if (!pdfFiles || pdfFiles.length < 2) {
 
1082
  notificationSystem.error("Please upload at least 2 PDF files!");
1083
- hideLoading();
1084
  return;
1085
  }
1086
  for (let file of pdfFiles) {
1087
  formData.append('pdf_files[]', file);
1088
  }
1089
- notificationSystem.info(`Processing ${pdfFiles.length} PDF files...`);
1090
  }
1091
 
1092
  const computeBtn = document.getElementById('compute-btn');
1093
  computeBtn.disabled = true;
1094
 
 
1095
  const response = await fetch('/compute_answers', {
1096
  method: 'POST',
1097
  body: formData
@@ -1107,12 +1110,15 @@
1107
  }
1108
 
1109
  if (result.answers) {
 
 
1110
  // Check for empty answers
1111
  const emptyAnswers = result.answers.filter(answer =>
1112
  !answer || (Array.isArray(answer) && answer.every(a => !a || a.trim() === ''))
1113
  );
1114
 
1115
  if (emptyAnswers.length > 0) {
 
1116
  notificationSystem.warning(`Warning: ${emptyAnswers.length} empty answer(s) detected. Please check your input files.`);
1117
  }
1118
 
@@ -1133,17 +1139,19 @@
1133
  : [firstValidAnswer]
1134
  });
1135
  } else {
 
1136
  notificationSystem.warning("No valid answers found in the input files.");
1137
  }
1138
 
1139
  displayAnswers(result.answers);
 
1140
  notificationSystem.success("Successfully generated answers!");
1141
  } else {
1142
  throw new Error('No answers received from server');
1143
  }
1144
 
1145
  } catch (error) {
1146
- console.error('Error:', error);
1147
  notificationSystem.error('Error: ' + error.message);
1148
  } finally {
1149
  hideLoading();
 
1052
 
1053
  async function computeAnswers() {
1054
  try {
1055
+ showLogModal();
1056
+ addLogMessage("Starting answer computation...", "info");
1057
+
1058
  const fileType = document.getElementById('file-type').value;
1059
  const queryfile = document.getElementById('query-file').files[0];
1060
+
 
 
1061
  if (!queryfile) {
1062
+ addLogMessage("Error: Please upload a query file first!", "error");
1063
  notificationSystem.error("Please upload a query file first!");
 
1064
  return;
1065
  }
1066
 
1067
+ addLogMessage("Processing files...", "info");
1068
  const formData = new FormData();
1069
  formData.append('file_type', fileType);
1070
  formData.append('query_file', queryfile);
1071
 
1072
  if (fileType === 'csv') {
1073
+ const anscsvFile = document.getElementById('csv-file').files[0];
1074
  if (!anscsvFile) {
1075
+ addLogMessage("Error: Please upload a CSV file for answers!", "error");
1076
  notificationSystem.error("Please upload a CSV file for answers!");
 
1077
  return;
1078
  }
1079
  formData.append('ans_csv_file', anscsvFile);
1080
+ addLogMessage("Processing CSV file...", "info");
1081
  } else if (fileType === 'pdf') {
1082
+ const pdfFiles = document.getElementById('pdf-files').files;
1083
  if (!pdfFiles || pdfFiles.length < 2) {
1084
+ addLogMessage("Error: Please upload at least 2 PDF files!", "error");
1085
  notificationSystem.error("Please upload at least 2 PDF files!");
 
1086
  return;
1087
  }
1088
  for (let file of pdfFiles) {
1089
  formData.append('pdf_files[]', file);
1090
  }
1091
+ addLogMessage(`Processing ${pdfFiles.length} PDF files...`, "info");
1092
  }
1093
 
1094
  const computeBtn = document.getElementById('compute-btn');
1095
  computeBtn.disabled = true;
1096
 
1097
+ addLogMessage("Sending request to server...", "info");
1098
  const response = await fetch('/compute_answers', {
1099
  method: 'POST',
1100
  body: formData
 
1110
  }
1111
 
1112
  if (result.answers) {
1113
+ addLogMessage("Successfully received answers from server", "success");
1114
+
1115
  // Check for empty answers
1116
  const emptyAnswers = result.answers.filter(answer =>
1117
  !answer || (Array.isArray(answer) && answer.every(a => !a || a.trim() === ''))
1118
  );
1119
 
1120
  if (emptyAnswers.length > 0) {
1121
+ addLogMessage(`Warning: ${emptyAnswers.length} empty answer(s) detected`, "warning");
1122
  notificationSystem.warning(`Warning: ${emptyAnswers.length} empty answer(s) detected. Please check your input files.`);
1123
  }
1124
 
 
1139
  : [firstValidAnswer]
1140
  });
1141
  } else {
1142
+ addLogMessage("Warning: No valid answers found in the input files", "warning");
1143
  notificationSystem.warning("No valid answers found in the input files.");
1144
  }
1145
 
1146
  displayAnswers(result.answers);
1147
+ addLogMessage("Successfully displayed answers!", "success");
1148
  notificationSystem.success("Successfully generated answers!");
1149
  } else {
1150
  throw new Error('No answers received from server');
1151
  }
1152
 
1153
  } catch (error) {
1154
+ addLogMessage(`Error: ${error.message}`, "error");
1155
  notificationSystem.error('Error: ' + error.message);
1156
  } finally {
1157
  hideLoading();