pradeepsengarr commited on
Commit
841e2b8
Β·
verified Β·
1 Parent(s): c33b536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -1,14 +1,15 @@
1
  import os
2
  import gradio as gr
3
- import fitz
4
  import faiss
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
- from transformers import AutoTokenizer, pipeline
8
- from transformers import BitsAndBytesConfig, AutoModelForCausalLM
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from huggingface_hub import login
11
 
 
12
  hf_token = os.environ.get("HUGGINGFACE_TOKEN")
13
  if not hf_token:
14
  raise ValueError("Hugging Face token not found.")
@@ -17,28 +18,21 @@ login(token=hf_token)
17
  # Load embedding model
18
  embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
19
 
20
- # Load quantized Mistral with 8-bit
21
- model_id = "mistralai/Mistral-7B-Instruct-v0.1"
22
- bnb_config = BitsAndBytesConfig(
23
- load_in_8bit=True,
24
- llm_int8_threshold=6.0,
25
- llm_int8_skip_modules=None,
26
- )
27
-
28
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
29
- model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
- quantization_config=bnb_config,
32
- device_map="auto",
33
- token=hf_token
34
  )
35
- llm = pipeline("text-generation", model=model, tokenizer=tokenizer)
36
 
37
- # State
38
  index = None
39
  doc_texts = []
40
 
41
- # Extract text
42
  def extract_text(file):
43
  try:
44
  text = ""
@@ -56,7 +50,7 @@ def extract_text(file):
56
  except Exception as e:
57
  return f"❌ Error extracting text: {e}"
58
 
59
- # Process file
60
  def process_file(file):
61
  global index, doc_texts
62
  try:
@@ -64,25 +58,23 @@ def process_file(file):
64
  if text.startswith("❌"):
65
  return text
66
 
67
- # Trim large documents
68
- text = text[:15000]
69
-
70
  splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
71
  doc_texts = splitter.split_text(text)
72
 
73
  if not doc_texts:
74
- return "❌ Could not split document."
75
 
76
  embeddings = embed_model.encode(doc_texts, convert_to_numpy=True)
77
  dim = embeddings.shape[1]
78
  index = faiss.IndexFlatL2(dim)
79
  index.add(embeddings)
80
 
81
- return "βœ… Document processed. You may ask your question below."
82
  except Exception as e:
83
  return f"❌ Error processing file: {e}"
84
 
85
- # Answer generator
86
  def generate_answer(question):
87
  global index, doc_texts
88
  try:
@@ -94,31 +86,40 @@ def generate_answer(question):
94
  context = "\n".join([doc_texts[i] for i in I[0]])
95
 
96
  prompt = (
97
- f"You are a helpful assistant. Use the context below to answer the question clearly.\n\n"
98
  f"Context:\n{context}\n\n"
99
  f"Question: {question}\n\n"
100
  f"Answer:"
101
  )
102
 
103
- result = llm(prompt, max_new_tokens=200, do_sample=True, temperature=0.7)
104
- return result[0]["generated_text"].split("Answer:")[-1].strip()
 
 
 
 
 
 
 
 
 
105
  except Exception as e:
106
  return f"❌ Error generating answer: {e}"
107
 
108
  # Gradio UI
109
- with gr.Blocks(title="πŸ“„ Document Q&A Assistant") as demo:
110
- gr.Markdown("<h1 style='text-align: center;'>πŸ“„ Document AI Assistant</h1>")
111
- gr.Markdown("Upload a PDF or TXT file, and ask questions about its content. The assistant uses Mistral 7B (quantized) for reasoning.")
112
 
113
  with gr.Row():
114
- file_input = gr.File(label="Upload PDF or TXT", file_types=[".pdf", ".txt"])
115
  upload_output = gr.Textbox(label="Upload Status")
116
 
117
  with gr.Row():
118
- question_input = gr.Textbox(label="Ask a Question", placeholder="e.g. What is the summary?")
119
  answer_output = gr.Textbox(label="Answer")
120
 
121
  file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
122
  question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)
123
 
124
- demo.launch(show_error=True, share=False)
 
1
  import os
2
  import gradio as gr
3
+ import fitz # PyMuPDF
4
  import faiss
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
+ from transformers import AutoTokenizer
8
+ from auto_gptq import AutoGPTQForCausalLM
9
  from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from huggingface_hub import login
11
 
12
+ # Authenticate
13
  hf_token = os.environ.get("HUGGINGFACE_TOKEN")
14
  if not hf_token:
15
  raise ValueError("Hugging Face token not found.")
 
18
  # Load embedding model
19
  embed_model = SentenceTransformer("BAAI/bge-base-en-v1.5")
20
 
21
+ # Load 4-bit quantized Mistral model
22
+ model_id = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
24
+ model = AutoGPTQForCausalLM.from_quantized(
 
 
 
 
 
 
25
  model_id,
26
+ use_safetensors=True,
27
+ trust_remote_code=True,
28
+ device_map="auto"
29
  )
 
30
 
31
+ # Internal state
32
  index = None
33
  doc_texts = []
34
 
35
+ # PDF/TXT text extraction
36
  def extract_text(file):
37
  try:
38
  text = ""
 
50
  except Exception as e:
51
  return f"❌ Error extracting text: {e}"
52
 
53
+ # Preprocess and embed
54
  def process_file(file):
55
  global index, doc_texts
56
  try:
 
58
  if text.startswith("❌"):
59
  return text
60
 
61
+ text = text[:15000] # Limit size
 
 
62
  splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
63
  doc_texts = splitter.split_text(text)
64
 
65
  if not doc_texts:
66
+ return "❌ Document could not be split."
67
 
68
  embeddings = embed_model.encode(doc_texts, convert_to_numpy=True)
69
  dim = embeddings.shape[1]
70
  index = faiss.IndexFlatL2(dim)
71
  index.add(embeddings)
72
 
73
+ return "βœ… Document processed. Ask your question below."
74
  except Exception as e:
75
  return f"❌ Error processing file: {e}"
76
 
77
+ # Generate answer using context
78
  def generate_answer(question):
79
  global index, doc_texts
80
  try:
 
86
  context = "\n".join([doc_texts[i] for i in I[0]])
87
 
88
  prompt = (
89
+ f"You are a helpful assistant. Use the context below to answer clearly.\n\n"
90
  f"Context:\n{context}\n\n"
91
  f"Question: {question}\n\n"
92
  f"Answer:"
93
  )
94
 
95
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
96
+ output = model.generate(
97
+ **inputs,
98
+ max_new_tokens=150,
99
+ do_sample=True,
100
+ temperature=0.7,
101
+ top_k=50,
102
+ top_p=0.95
103
+ )
104
+ answer = tokenizer.decode(output[0], skip_special_tokens=True)
105
+ return answer.split("Answer:")[-1].strip()
106
  except Exception as e:
107
  return f"❌ Error generating answer: {e}"
108
 
109
  # Gradio UI
110
+ with gr.Blocks(title="πŸ“„ Document Q&A (Mistral 4-bit)") as demo:
111
+ gr.Markdown("<h1 style='text-align: center;'>πŸ“„ Document Q&A with Mistral 4-bit</h1>")
112
+ gr.Markdown("Upload a PDF or TXT and ask questions. Powered by Mistral-7B GPTQ.")
113
 
114
  with gr.Row():
115
+ file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt"])
116
  upload_output = gr.Textbox(label="Upload Status")
117
 
118
  with gr.Row():
119
+ question_input = gr.Textbox(label="Ask a Question", placeholder="e.g. What is this document about?")
120
  answer_output = gr.Textbox(label="Answer")
121
 
122
  file_input.change(fn=process_file, inputs=file_input, outputs=upload_output)
123
  question_input.submit(fn=generate_answer, inputs=question_input, outputs=answer_output)
124
 
125
+ demo.launch(show_error=True)