|
import os |
|
import uuid |
|
import json |
|
from flask import Flask, request, jsonify, Response |
|
import pytesseract |
|
from pdf2image import convert_from_bytes |
|
from flask_cors import CORS |
|
from lib import ocr_2 as ocr |
|
from lib import llm_3_deepinfra as llm |
|
|
|
os.environ['TESSDATA_PREFIX'] = '/usr/share/tesseract-ocr/5/tessdata' |
|
|
|
|
|
|
|
app = Flask(__name__) |
|
CORS(app) |
|
UPLOAD_FOLDER = './tmp' |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
|
|
|
@app.route('/recognize', methods=['POST']) |
|
def upload_file(): |
|
|
|
if 'file' not in request.files: |
|
return jsonify({'error': 'No file part'}) |
|
|
|
file = request.files['file'] |
|
|
|
|
|
if file.filename == '': |
|
return jsonify({'error': 'No selected file'}) |
|
if file and file.filename.endswith('.pdf'): |
|
|
|
|
|
filename = str(uuid.uuid4()) + '.pdf' |
|
|
|
|
|
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) |
|
|
|
|
|
temp_path = os.path.join(app.config['UPLOAD_FOLDER'], filename) |
|
|
|
text = '' |
|
|
|
|
|
|
|
|
|
|
|
docs_info = ocr.processSingleFile(temp_path) |
|
|
|
|
|
os.remove(temp_path) |
|
return Response(json.dumps(docs_info, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8') |
|
else: |
|
return jsonify({'error': 'File must be a PDF'}) |
|
|
|
|
|
@app.route('/analize', methods=['POST']) |
|
async def analize(): |
|
|
|
text_data = request.json.get('text') |
|
app_info = await llm.getApplicationInfo(text_data) |
|
result = { |
|
"application": app_info, |
|
"debug": {} |
|
} |
|
return Response(json.dumps(result, sort_keys=False, ensure_ascii=False), content_type='application/json; charset=utf-8') |
|
|
|
if __name__ == '__main__': |
|
app.run(debug=False) |
|
|