RagGV1 / app.py
ramysaidagieb's picture
Update app.py
2d575e5 verified
import gradio as gr
import os
from rag_pipeline import RAGPipeline
from PyPDF2 import PdfReader
import docx
logs = []
def logger(message):
logs.append(message)
if len(logs) > 100:
logs.pop(0)
rag = RAGPipeline(logger=logger)
def extract_text_from_file(file):
text = ""
if file.name.endswith(".pdf"):
reader = PdfReader(file.name)
for page in reader.pages:
text += page.extract_text() + "\n"
elif file.name.endswith(".docx"):
doc = docx.Document(file.name)
for para in doc.paragraphs:
text += para.text + "\n"
elif file.name.endswith(".txt"):
with open(file.name, "r", encoding="utf-8") as f:
text = f.read()
else:
logger(f"[RAG] تنسيق الملف غير مدعوم: {file.name}")
return text
def process_documents(files):
all_text = ""
for file in files:
all_text += extract_text_from_file(file) + "\n"
chunks = [t.strip() for t in all_text.split("\n") if t.strip()]
msg = rag.build_index(chunks)
return msg, "\n".join(logs)
def answer_question(question):
answer, sources = rag.generate_answer(question)
return answer, "\n".join(sources), "\n".join(logs)
with gr.Blocks() as demo:
gr.Markdown("## 🧠 نظام RAG باللغة العربية")
with gr.Tab("📄 رفع المستندات وبناء الفهرس"):
file_input = gr.File(label="اختر ملفات المستندات", file_types=[".pdf", ".docx", ".txt"], file_count="multiple")
build_btn = gr.Button("🔨 بناء الفهرس")
build_status = gr.Textbox(label="حالة بناء الفهرس", lines=2)
logs_box = gr.Textbox(label="📜 سجلات التشغيل", lines=10)
build_btn.click(fn=process_documents, inputs=file_input, outputs=[build_status, logs_box])
with gr.Tab("❓ طرح الأسئلة"):
question_input = gr.Textbox(label="اكتب سؤالك هنا", lines=2)
ask_btn = gr.Button("📩 إرسال السؤال")
answer_output = gr.Textbox(label="الإجابة", lines=5)
sources_output = gr.Textbox(label="المراجع المستخدمة", lines=5)
logs_box2 = gr.Textbox(label="📜 سجلات التشغيل", lines=10)
ask_btn.click(fn=answer_question, inputs=question_input, outputs=[answer_output, sources_output, logs_box2])
demo.launch()