pradeepsengarr commited on
Commit
6705397
Β·
verified Β·
1 Parent(s): f9cbbf2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -57
app.py CHANGED
@@ -1,71 +1,83 @@
1
  import os
 
2
  import torch
3
  import gradio as gr
4
- import faiss
5
- from transformers import AutoTokenizer, pipeline
6
- from langchain_community.vectorstores import FAISS
7
- from langchain_community.document_loaders import PyPDFLoader
8
- from langchain_community.embeddings import HuggingFaceEmbeddings
9
- from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from huggingface_hub import login
 
 
 
 
11
 
12
- # πŸ” Authenticate with Hugging Face using token stored in Secrets
13
- hf_token = os.getenv("HUGGINGFACE_TOKEN")
14
  if not hf_token:
15
- raise ValueError("❌ HUGGINGFACE_TOKEN not set in environment variables.")
16
- login(token=hf_token)
17
-
18
- # πŸ” Load model and tokenizer
19
- model_id = "TheBloke/Mistral-7B-Instruct-v0.1-GPTQ"
20
- tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=True)
21
- pipe = pipeline("text-generation", model=model_id, tokenizer=tokenizer,
22
- torch_dtype=torch.float16, device_map="auto", use_auth_token=True)
23
-
24
- # πŸ”Ž Sentence transformer for embeddings
25
- embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
26
-
27
- # Global store for vector DB
28
- db = None
29
 
30
- def process_pdf(pdf_path):
31
- """Load, chunk, embed and index PDF into FAISS."""
32
- loader = PyPDFLoader(pdf_path)
33
- pages = loader.load()
34
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
35
- docs = text_splitter.split_documents(pages)
36
-
37
- global db
38
- db = FAISS.from_documents(docs, embed_model)
39
- return "βœ… PDF processed successfully. Ask your questions now."
40
-
41
- def query_answer(question):
42
- if not db:
43
- return "⚠️ Please upload and process a PDF first."
44
-
45
- docs = db.similarity_search(question, k=3)
46
- context = "\n".join([doc.page_content for doc in docs])
47
- prompt = f"[INST] You are a helpful assistant. Use the context below to answer the question:\n\nContext:\n{context}\n\nQuestion: {question}\n\nAnswer: [/INST]"
48
-
49
- result = pipe(prompt, max_new_tokens=256, do_sample=True, top_k=5)[0]["generated_text"]
50
- return result.replace(prompt, "").strip()
51
 
52
- # πŸ”§ Gradio UI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  with gr.Blocks() as demo:
54
- gr.Markdown("# πŸ“„ Document Q&A using Mistral-GPTQ")
55
-
56
- with gr.Row():
57
- pdf_file = gr.File(label="Upload PDF", type="filepath")
58
- upload_btn = gr.Button("Process PDF")
59
 
60
- status = gr.Textbox(label="Status", interactive=False)
 
 
 
61
 
62
- with gr.Row():
63
- user_question = gr.Textbox(label="Ask a Question")
64
- ask_btn = gr.Button("Get Answer")
65
 
66
- answer = gr.Textbox(label="Answer", lines=10)
 
 
 
67
 
68
- upload_btn.click(process_pdf, inputs=pdf_file, outputs=status)
69
- ask_btn.click(query_answer, inputs=user_question, outputs=answer)
70
 
71
- demo.launch()
 
 
1
  import os
2
+ import time
3
  import torch
4
  import gradio as gr
 
 
 
 
 
 
5
  from huggingface_hub import login
6
+ from transformers import AutoTokenizer
7
+ from auto_gptq import AutoGPTQForCausalLM
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_community.vectorstores import FAISS
10
 
11
+ # Load HF token and login
12
+ hf_token = os.environ.get("HUGGINGFACE_TOKEN")
13
  if not hf_token:
14
+ raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable")
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ login(token=hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Load tokenizer and quantized model
19
+ model_id = "TheBloke/mistral-7B-GPTQ"
20
+ device = "cuda" if torch.cuda.is_available() else "cpu"
21
+
22
+ print("Loading tokenizer...")
23
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
24
+
25
+ print("Loading quantized model...")
26
+ start = time.time()
27
+ model = AutoGPTQForCausalLM.from_quantized(
28
+ model_id,
29
+ use_safetensors=True,
30
+ device=device,
31
+ use_triton=True,
32
+ quantize_config=None,
33
+ )
34
+ print(f"Model loaded in {time.time() - start:.2f} seconds on {device}")
35
+
36
+ # Load embedding model for FAISS vector store
37
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
38
+
39
+ # Sample documents to build vector index (can replace with your own)
40
+ texts = [
41
+ "Hello world",
42
+ "Mistral 7B is a powerful language model",
43
+ "Langchain and FAISS make vector search easy",
44
+ "This is a test document for vector search",
45
+ ]
46
+ embeddings = embedder.encode(texts)
47
+
48
+ faiss_index = FAISS.from_embeddings(embeddings, texts)
49
+
50
+ # Generate text from prompt
51
+ def generate_text(prompt, max_length=128):
52
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
53
+ with torch.no_grad():
54
+ outputs = model.generate(**inputs, max_length=max_length)
55
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+ return decoded
57
+
58
+ # Search docs with vector similarity
59
+ def search_docs(query):
60
+ query_emb = embedder.encode([query])
61
+ results = faiss_index.similarity_search_by_vector(query_emb[0], k=3)
62
+ return "\n\n".join(results)
63
+
64
+ # Gradio UI
65
  with gr.Blocks() as demo:
66
+ gr.Markdown("# Mistral GPTQ + FAISS Vector Search Demo")
 
 
 
 
67
 
68
+ with gr.Tab("Text Generation"):
69
+ prompt_input = gr.Textbox(label="Enter prompt", lines=3)
70
+ generate_btn = gr.Button("Generate")
71
+ output_text = gr.Textbox(label="Output", lines=6)
72
 
73
+ generate_btn.click(fn=generate_text, inputs=prompt_input, outputs=output_text)
 
 
74
 
75
+ with gr.Tab("Vector Search"):
76
+ query_input = gr.Textbox(label="Enter search query", lines=2)
77
+ search_btn = gr.Button("Search")
78
+ search_output = gr.Textbox(label="Search Results", lines=6)
79
 
80
+ search_btn.click(fn=search_docs, inputs=query_input, outputs=search_output)
 
81
 
82
+ if __name__ == "__main__":
83
+ demo.launch()