khalednabawi11 commited on
Commit
2505e32
·
verified ·
1 Parent(s): 4dd7aa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -75,25 +75,28 @@ from qdrant_client.models import VectorParams, Distance
75
  from langchain.llms import HuggingFacePipeline
76
  from langchain.chains import RetrievalQA
77
  from langchain.vectorstores import Qdrant
78
- from transformers import GenerationConfig, FastLanguageModel
79
  from langchain.embeddings import HuggingFaceEmbeddings
 
 
 
 
 
 
 
80
 
81
  # Define model path
82
  model_name = "FreedomIntelligence/Apollo-7B"
83
 
84
- # Load model with Unsloth (4-bit QLoRA)
85
- model, tokenizer = FastLanguageModel.from_pretrained(
86
- model_name=model_name,
87
- max_seq_length=2048,
88
- dtype=torch.float16,
89
- load_in_4bit=True
90
- )
91
 
92
  # Enable padding token if missing
93
  tokenizer.pad_token = tokenizer.eos_token
94
 
95
  # Set up Qdrant vector store
96
- qdrant_client = QdrantClient(url="https://your-qdrant-instance.com")
97
  vector_size = 768
98
  embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
99
 
@@ -164,6 +167,22 @@ iface = gr.Interface(
164
  theme="compact"
165
  )
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  # Launch Gradio interface
168
  if __name__ == "__main__":
169
  iface.launch()
 
75
  from langchain.llms import HuggingFacePipeline
76
  from langchain.chains import RetrievalQA
77
  from langchain.vectorstores import Qdrant
78
+ from transformers import GenerationConfig, AutoTokenizer, AutoModelForCausalLM
79
  from langchain.embeddings import HuggingFaceEmbeddings
80
+ import os
81
+
82
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
83
+ QDRANT_URL = os.getenv("QDRANT_URL")
84
+
85
+
86
+
87
 
88
  # Define model path
89
  model_name = "FreedomIntelligence/Apollo-7B"
90
 
91
+ # Load model directly
92
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
93
+ model = AutoModelForCausalLM.from_pretrained(model_name)
 
 
 
 
94
 
95
  # Enable padding token if missing
96
  tokenizer.pad_token = tokenizer.eos_token
97
 
98
  # Set up Qdrant vector store
99
+ qdrant_client = QdrantClient(url=QDRANT_URL, api_key = QDRANT_API_KEY)
100
  vector_size = 768
101
  embedding = HuggingFaceEmbeddings(model_name="Omartificial-Intelligence-Space/GATE-AraBert-v1")
102
 
 
167
  theme="compact"
168
  )
169
 
170
+ # demo = gr.ChatInterface(
171
+ # respond,
172
+ # additional_inputs=[
173
+ # gr.Textbox(value="You are a Medical Chatbot.", label="System message"),
174
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
175
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
176
+ # gr.Slider(
177
+ # minimum=0.1,
178
+ # maximum=1.0,
179
+ # value=0.95,
180
+ # step=0.05,
181
+ # label="Top-p (nucleus sampling)",
182
+ # ),
183
+ # ],
184
+ # )
185
+
186
  # Launch Gradio interface
187
  if __name__ == "__main__":
188
  iface.launch()