File size: 2,260 Bytes
b7484d7 58a1c58 a5177da b7484d7 8bed2ce f908316 b7484d7 2d019a6 b7484d7 eeebb29 b7484d7 071a451 b7484d7 071a451 b7484d7 eeebb29 c01b75e eeebb29 c01b75e 01f0b80 eeebb29 b7484d7 2d019a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import uuid
import json
from flask import Flask, request, jsonify, Response
import pytesseract
from pdf2image import convert_from_bytes
from flask_cors import CORS
from lib import ocr_2 as ocr
from lib import llm_3_deepinfra as llm
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/5/tessdata'
app = Flask(__name__)
CORS(app)
UPLOAD_FOLDER = './tmp'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
# Endpoint for uploading PDF and extracting text
@app.route('/recognize', methods=['POST'])
def upload_file():
# Check if the post request has the file part
if 'file' not in request.files:
return jsonify({'error': 'No file part'})
file = request.files['file']
# Check if the file is a PDF
if file.filename == '':
return jsonify({'error': 'No selected file'})
if file and file.filename.endswith('.pdf'):
# Convert PDF to images
# images = convert_from_bytes(file.read())
filename = str(uuid.uuid4()) + '.pdf'
# Save the file to the temporary upload directory
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
# Construct and return the path where the file is saved
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
text = ''
# for img in images:
# # Perform OCR on each page
# text += pytesseract.image_to_string(img, lang='rus')
docs_info = ocr.processSingleFile(temp_path)
os.remove(temp_path)
return Response(json.dumps(docs_info, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8')
else:
return jsonify({'error': 'File must be a PDF'})
# Endpoint for uploading PDF and extracting text
@app.route('/analize', methods=['POST'])
async def analize():
# Get the text data from the request
text_data = request.json.get('text')
app_info = await llm.getApplicationInfo(text_data)
result = {
"application": app_info,
"debug": {}
}
return Response(json.dumps(result, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8')
if __name__ == '__main__':
app.run(debug=False)
|