File size: 19,340 Bytes
8397f09
fca1742
8397f09
718633d
 
 
 
 
 
 
8397f09
 
 
718633d
 
 
 
8397f09
 
718633d
 
89a2809
fca1742
 
 
 
 
 
 
 
8397f09
 
 
 
 
 
 
 
fca1742
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
 
 
 
 
 
 
 
8397f09
 
 
 
 
 
 
89a2809
8397f09
 
 
 
fca1742
8397f09
89a2809
 
8397f09
 
 
89a2809
8397f09
 
 
 
fca1742
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fca1742
 
 
8397f09
fca1742
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fca1742
8397f09
fca1742
8397f09
fca1742
 
 
 
 
 
 
 
 
 
 
8397f09
fca1742
8397f09
 
fca1742
8397f09
fca1742
8397f09
fca1742
 
 
 
 
 
 
 
 
 
 
8397f09
fca1742
8397f09
 
 
 
 
 
fca1742
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
8397f09
89a2809
 
 
8397f09
89a2809
 
8397f09
 
89a2809
8397f09
 
 
89a2809
8397f09
 
 
 
 
 
 
 
 
 
89a2809
8397f09
89a2809
 
 
 
8397f09
 
 
 
 
 
89a2809
8397f09
 
 
 
 
fca1742
 
 
 
8397f09
fca1742
 
 
 
 
8397f09
fca1742
8397f09
 
 
 
 
 
fca1742
 
 
 
 
 
 
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
fca1742
 
8397f09
fca1742
8397f09
fca1742
8397f09
fca1742
8397f09
fca1742
 
8397f09
fca1742
 
 
 
 
 
 
8397f09
fca1742
8397f09
89a2809
fca1742
 
89a2809
fca1742
 
 
 
 
 
8397f09
 
fca1742
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
 
 
8397f09
89a2809
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
 
 
8397f09
89a2809
 
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
8397f09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89a2809
8397f09
 
 
 
89a2809
 
 
fca1742
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8397f09
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
import os
from flask import Blueprint, request, jsonify, send_file
from werkzeug.utils import secure_filename
from ..utils.extract_text import extract_text_from_pdf
from ..utils.summarizer import generate_summary
from ..utils.clause_detector import detect_clauses
from ..database import save_document, delete_document, Document
from ..database import get_all_documents, get_document_by_id
from ..database import search_documents, save_question_answer, search_questions_answers
from ..nlp.qa import answer_question
from flask_jwt_extended import create_access_token, jwt_required, get_jwt_identity, exceptions as jwt_exceptions
from flask_jwt_extended.exceptions import JWTDecodeError as JWTError
from werkzeug.security import generate_password_hash, check_password_hash
from ..utils.error_handler import handle_errors
from ..utils.enhanced_legal_processor import EnhancedLegalProcessor
from ..utils.legal_domain_features import LegalDomainFeatures
from ..utils.context_understanding import ContextUnderstanding
import logging
import textract
from ..database import get_user_profile, update_user_profile, change_user_password
from ..database import SessionLocal, User
from sqlalchemy.exc import IntegrityError
from sqlalchemy import or_, Index
import io
from datetime import datetime, timedelta, timezone
from sqlalchemy import Column, Integer, String, Text, DateTime, LargeBinary, func
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.pool import NullPool

main = Blueprint("main", __name__)

# Initialize the processors
enhanced_legal_processor = EnhancedLegalProcessor()
legal_domain_processor = LegalDomainFeatures()
context_processor = ContextUnderstanding()

# Remove UPLOAD_FOLDER, file_path, and local file logic

ALLOWED_EXTENSIONS = {'pdf', 'doc', 'docx'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def extract_text_from_file(file_path):
    ext = file_path.rsplit('.', 1)[1].lower()
    if ext == 'pdf':
        return extract_text_from_pdf(file_path)
    elif ext in ['doc', 'docx']:
        try:
            text = textract.process(file_path)
            return text.decode('utf-8')
        except Exception as e:
            raise Exception(f"Failed to extract text from {ext.upper()} file: {str(e)}")
    else:
        raise Exception("Unsupported file type for text extraction.")

def get_user_id_by_username(username):
    session = SessionLocal()
    try:
        user = session.query(User).filter(User.username == username).first()
        return user.id if user else None
    finally:
        session.close()

@main.route('/upload', methods=['POST'])
@jwt_required()
def upload_file():
    try:
        if 'file' not in request.files:
            return jsonify({'error': 'No file part'}), 400
        file = request.files['file']
        if not file or file.filename == '':
            return jsonify({'error': 'No selected file'}), 400
        if not (file.filename.lower().endswith('.pdf')):
            return jsonify({'error': 'File type not allowed. Only PDF files are supported.'}), 400
        filename = secure_filename(file.filename)
        file_content = file.read()  # Read file content as bytes
        identity = get_jwt_identity()
        user_id = get_user_id_by_username(identity)
        if not user_id:
            return jsonify({"success": False, "error": "User not found"}), 401
        doc_id = save_document(
            title=filename,
            full_text="",
            summary="Processing...",
            clauses="[]",
            features="{}",
            context_analysis="{}",
            file_data=file_content,  # Store file in DB
            user_id=user_id
        )
        return jsonify({
            'message': 'File uploaded successfully',
            'document_id': doc_id,
            'title': filename,
            'status': 'processing'
        }), 200
    except Exception as e:
        logging.error(f"Error during file upload: {str(e)}")
        return jsonify({'error': str(e)}), 500

@main.route('/documents', methods=['GET'])
@jwt_required()
def list_documents():
    page = int(request.args.get('page', 1))
    limit = int(request.args.get('limit', 20))
    offset = (page - 1) * limit
    try:
        identity = get_jwt_identity()
        user_id = get_user_id_by_username(identity)
        session = SessionLocal()
        query = session.query(Document).filter(Document.user_id == user_id).order_by(Document.upload_time.desc())
        documents = query.offset(offset).limit(limit).all()
        result = []
        for doc in documents:
            result.append({
                'id': doc.id,
                'title': doc.title,
                'summary': doc.summary,
                'file_size': doc.file_size,
                'upload_time': doc.upload_time.isoformat() if doc.upload_time else None,
                'type': doc.title.split('.')[-1].upper() if '.' in doc.title else 'UNKNOWN',
            })
        session.close()
        return jsonify(result), 200
    except Exception as e:
        logging.error(f"Error listing documents: {str(e)}", exc_info=True)
        return jsonify({"error": str(e)}), 500

@main.route('/get_document/<int:doc_id>', methods=['GET'])
@jwt_required()
def get_document(doc_id):
    try:
        doc = get_document_by_id(doc_id)
        if doc:
            return jsonify(doc), 200
        else:
            return jsonify({"error": "Document not found"}), 404
    except Exception as e:
        logging.error(f"Error getting document {doc_id}: {str(e)}", exc_info=True)
        return jsonify({"error": str(e)}), 500

@main.route('/documents/download/<int:doc_id>', methods=['GET'])
@jwt_required()
def download_document(doc_id):
    try:
        session = SessionLocal()
        doc = session.query(Document).filter(Document.id == doc_id).first()
        session.close()
        if not doc or not doc.file_data:
            return jsonify({"error": "File not found"}), 404
        return send_file(
            io.BytesIO(doc.file_data),
            as_attachment=True,
            download_name=doc.title,
            mimetype='application/pdf'
        )
    except Exception as e:
        logging.error(f"Error downloading file: {str(e)}", exc_info=True)
        return jsonify({"error": f"Error downloading file: {str(e)}"}), 500

@main.route('/documents/view/<int:doc_id>', methods=['GET'])
@jwt_required()
def view_document(doc_id):
    try:
        session = SessionLocal()
        doc = session.query(Document).filter(Document.id == doc_id).first()
        session.close()
        if not doc or not doc.file_data:
            return jsonify({"error": "File not found"}), 404
        return send_file(
            io.BytesIO(doc.file_data),
            as_attachment=False,
            download_name=doc.title,
            mimetype='application/pdf'
        )
    except Exception as e:
        logging.error(f"Error viewing file: {str(e)}", exc_info=True)
        return jsonify({"error": f"Error viewing file: {str(e)}"}), 500

@main.route('/documents/<int:doc_id>', methods=['DELETE'])
@jwt_required()
def delete_document_route(doc_id):
    try:
        delete_document(doc_id)
        return jsonify({"success": True, "message": "Document deleted successfully"}), 200
    except Exception as e:
        logging.error(f"Error deleting document {doc_id}: {str(e)}", exc_info=True)
        return jsonify({"success": False, "error": f"Error deleting document: {str(e)}"}), 500

@main.route('/register', methods=['POST'])
@handle_errors
def register():
    data = request.get_json()
    username = data.get("username")
    password = data.get("password")
    email = data.get("email")
    if not username or not password:
        logging.warning("Registration attempt with missing username or password.")
        return jsonify({"error": "Username and password are required"}), 400
    hashed_pw = generate_password_hash(password)
    session = SessionLocal()
    try:
        user = User(username=username, password_hash=hashed_pw, email=email)
        session.add(user)
        session.commit()
        return jsonify({"message": "User registered successfully", "username": username, "email": email}), 201
    except IntegrityError:
        session.rollback()
        return jsonify({"error": "Username already exists"}), 409
    except Exception as e:
        session.rollback()
        logging.error(f"Database error during registration: {str(e)}", exc_info=True)
        return jsonify({"error": f"Database error: {str(e)}"}), 500
    finally:
        session.close()

@main.route('/login', methods=['POST'])
@handle_errors
def login():
    data = request.get_json()
    username = data.get("username")
    password = data.get("password")
    if not username or not password:
        logging.warning("Login attempt with missing username or password.")
        return jsonify({"error": "Username and password are required"}), 400
    session = SessionLocal()
    try:
        user = session.query(User).filter(or_(User.username == username, User.email == username)).first()
        if user and check_password_hash(user.password_hash, password):
            access_token = create_access_token(identity=user.username)
            return jsonify(access_token=access_token, username=user.username, email=user.email), 200
        else:
            return jsonify({"error": "Bad username or password"}), 401
    except Exception as e:
        logging.error(f"Database error during login: {str(e)}", exc_info=True)
        return jsonify({"error": f"Database error: {str(e)}"}), 500
    finally:
        session.close()

@main.route('/process-document/<int:doc_id>', methods=['POST'])
@jwt_required()
def process_document(doc_id):
    try:
        session = SessionLocal()
        doc = session.query(Document).filter(Document.id == doc_id).first()
        if not doc:
            session.close()
            return jsonify({'error': 'Document not found'}), 404
        if not doc.file_data:
            session.close()
            return jsonify({'error': 'File not found for this document'}), 404
        # Extract text from file_data
        text = extract_text_from_pdf(io.BytesIO(doc.file_data))
        if not text:
            session.close()
            return jsonify({'error': 'Could not extract text from file'}), 400
        summary = generate_summary(text)
        clauses = detect_clauses(text)
        features = legal_domain_processor.process_legal_document(text)
        context_analysis = context_processor.analyze_context(text)
        # Update the document with processed content
        doc.full_text = text
        doc.summary = summary
        doc.clauses = str(clauses)
        doc.features = str(features)
        doc.context_analysis = str(context_analysis)
        session.commit()
        session.close()
        return jsonify({
            'message': 'Document processed successfully',
            'document_id': doc_id,
            'status': 'completed'
        }), 200
    except Exception as e:
        logging.error(f"Error processing document: {str(e)}")
        return jsonify({'error': str(e)}), 500

@main.route('/documents/summary/<int:doc_id>', methods=['POST'])
@jwt_required()
def generate_document_summary(doc_id):
    try:
        session = SessionLocal()
        doc = session.query(Document).filter(Document.id == doc_id).first()
        if not doc:
            session.close()
            return jsonify({"error": "Document not found"}), 404
        summary = doc.summary
        if summary and summary.strip() and summary != 'Processing...':
            session.close()
            return jsonify({"summary": summary}), 200
        if not doc.file_data:
            session.close()
            return jsonify({"error": "File not found for this document"}), 404
        # Extract text from file_data
        try:
            text = extract_text_from_pdf(io.BytesIO(doc.file_data))
        except Exception as e:
            session.close()
            logging.error(f"Error extracting text from PDF: {e}")
            return jsonify({"error": f"Error extracting text from PDF: {e}"}), 500
        if not text.strip():
            session.close()
            return jsonify({"error": "No text available for summarization"}), 400
        try:
            summary = generate_summary(text)
        except Exception as e:
            session.close()
            logging.error(f"Error generating summary: {e}")
            return jsonify({"error": f"Error generating summary: {e}"}), 500
        # Save the summary to the database
        doc.summary = summary
        session.commit()
        session.close()
        return jsonify({"summary": summary}), 200
    except Exception as e:
        logging.error(f"Error in generate_document_summary: {e}", exc_info=True)
        return jsonify({"error": f"Error generating summary: {str(e)}"}), 500

@main.route('/ask-question', methods=['POST', 'OPTIONS'])
def ask_question():
    if request.method == 'OPTIONS':
        return '', 204
    return _ask_question_impl()

@jwt_required()
def _ask_question_impl():
    data = request.get_json()
    document_id = data.get('document_id')
    question = data.get('question', '').strip()
    if not document_id or not question:
        return jsonify({"success": False, "error": "document_id and question are required"}), 400
    if not question:
        return jsonify({"success": False, "error": "Question cannot be empty"}), 400
    identity = get_jwt_identity()
    user_id = get_user_id_by_username(identity)
    doc = get_document_by_id(document_id, user_id=user_id)
    if not doc:
        return jsonify({"success": False, "error": "Document not found or not owned by user"}), 404
    summary = doc.get('summary', '')
    if not summary or not summary.strip():
        return jsonify({"success": False, "error": "Summary not available for this document"}), 400
    try:
        result = answer_question(question, summary)
        save_question_answer(document_id, user_id, question, result.get('answer', ''), result.get('score', 0.0))
        return jsonify({"success": True, "answer": result.get('answer', ''), "score": result.get('score', 0.0)}), 200
    except Exception as e:
        logging.error(f"Error answering question: {str(e)}")
        return jsonify({"success": False, "error": f"Error answering question: {str(e)}"}), 500

@main.route('/previous-questions/<int:doc_id>', methods=['GET'])
@jwt_required()
def get_previous_questions(doc_id):
    try:
        identity = get_jwt_identity()
        user_id = get_user_id_by_username(identity)
        doc = get_document_by_id(doc_id, user_id=user_id)
        if not doc:
            return jsonify({"success": False, "error": "Document not found or not owned by user"}), 404
        qa_results = search_questions_answers('', user_id=user_id)
        questions = [q for q in qa_results if q['document_id'] == doc_id]
        return jsonify({"success": True, "questions": questions}), 200
    except Exception as e:
        logging.error(f"Error fetching previous questions: {str(e)}")
        return jsonify({"success": False, "error": f"Error fetching previous questions: {str(e)}"}), 500

@main.route('/search', methods=['GET'])
@jwt_required()
def search_all():
    try:
        query = request.args.get('q', '').strip()
        if not query:
            return jsonify({'error': 'Query parameter "q" is required.'}), 400
        identity = get_jwt_identity()
        user_id = get_user_id_by_username(identity)
        doc_results = search_documents(query)
        qa_results = search_questions_answers(query, user_id=user_id)
        return jsonify({
            'documents': doc_results,
            'qa': qa_results
        }), 200
    except Exception as e:
        return jsonify({'error': f'Error during search: {str(e)}'}), 500

@main.route('/user/profile', methods=['GET'])
@jwt_required()
def get_profile():
    identity = get_jwt_identity()
    profile = get_user_profile(identity)
    if profile:
        return jsonify(profile), 200
    else:
        return jsonify({'error': 'User not found'}), 404

@main.route('/user/profile', methods=['POST'])
@jwt_required()
def update_profile():
    identity = get_jwt_identity()
    data = request.get_json()
    email = data.get('email')
    phone = data.get('phone')
    company = data.get('company')
    if not email:
        return jsonify({'error': 'Email is required'}), 400
    updated = update_user_profile(identity, email, phone, company)
    if updated:
        return jsonify({'message': 'Profile updated successfully'}), 200
    else:
        return jsonify({'error': 'Failed to update profile'}), 400

@main.route('/user/change-password', methods=['POST'])
@jwt_required()
def change_password():
    identity = get_jwt_identity()
    data = request.get_json()
    current_password = data.get('current_password')
    new_password = data.get('new_password')
    confirm_password = data.get('confirm_password')
    if not current_password or not new_password or not confirm_password:
        return jsonify({'error': 'All password fields are required'}), 400
    if new_password != confirm_password:
        return jsonify({'error': 'New passwords do not match'}), 400
    success, msg = change_user_password(identity, current_password, new_password)
    if success:
        return jsonify({'message': msg}), 200
    else:
        return jsonify({'error': msg}), 400

@main.route('/dashboard-stats', methods=['GET'])
@jwt_required()
def dashboard_stats():
    try:
        identity = get_jwt_identity()
        user_id = get_user_id_by_username(identity)
        documents = get_all_documents(user_id=user_id)
        total_documents = len(documents)
        processed_documents = sum(1 for doc in documents if doc.get('summary') and doc.get('summary') != 'Processing...')
        pending_analysis = total_documents - processed_documents
        qa_results = search_questions_answers('', user_id=user_id)
        now = datetime.utcnow()
        last_30_days = now - timedelta(days=30)
        def parse_dt(val):
            if isinstance(val, datetime):
                # Convert to naive UTC
                if val.tzinfo is not None:
                    return val.astimezone(timezone.utc).replace(tzinfo=None)
                return val
            if isinstance(val, str):
                try:
                    dt = datetime.fromisoformat(val)
                    if dt.tzinfo is not None:
                        return dt.astimezone(timezone.utc).replace(tzinfo=None)
                    return dt
                except Exception:
                    return None
            return None
        recent_questions = sum(1 for q in qa_results if q['created_at'] and parse_dt(q['created_at']) and parse_dt(q['created_at']) >= last_30_days)
        return jsonify({
            'total_documents': total_documents,
            'processed_documents': processed_documents,
            'pending_analysis': pending_analysis,
            'recent_questions': recent_questions
        }), 200
    except Exception as e:
        logging.error(f"Error fetching dashboard stats: {str(e)}")
        return jsonify({'error': f'Error fetching dashboard stats: {str(e)}'}), 500