Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,20 +15,17 @@ from langchain_core.output_parsers import StrOutputParser
|
|
15 |
from langchain_core.runnables import RunnableLambda
|
16 |
from datetime import date
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
-
from setup import download_olmo_model, OLMO_MODEL
|
19 |
|
20 |
# Ensure model is downloaded before proceeding
|
21 |
@st.cache_resource
|
22 |
-
def
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
except Exception as e:
|
28 |
-
st.error(f"Failed to download or locate the model: {str(e)}")
|
29 |
-
st.stop()
|
30 |
|
31 |
-
|
32 |
|
33 |
# # Define the path to your bash script
|
34 |
# script_path = "./start.sh"
|
@@ -101,18 +98,17 @@ def get_chain(temperature):
|
|
101 |
retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
|
102 |
|
103 |
# Replace the local OLMOLLM with the Hugging Face model
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
st.stop()
|
116 |
|
117 |
|
118 |
|
|
|
15 |
from langchain_core.runnables import RunnableLambda
|
16 |
from datetime import date
|
17 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
18 |
+
# from setup import download_olmo_model, OLMO_MODEL
|
19 |
|
20 |
# Ensure model is downloaded before proceeding
|
21 |
@st.cache_resource
|
22 |
+
def load_model():
|
23 |
+
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
25 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
26 |
+
return model, tokenizer
|
|
|
|
|
|
|
27 |
|
28 |
+
model, tokenizer = load_model()
|
29 |
|
30 |
# # Define the path to your bash script
|
31 |
# script_path = "./start.sh"
|
|
|
98 |
retriever = load_retriever(docstore_path,chroma_path,embeddings,child_splitter,parent_splitter)
|
99 |
|
100 |
# Replace the local OLMOLLM with the Hugging Face model
|
101 |
+
pipe = pipeline(
|
102 |
+
"text-generation",
|
103 |
+
model=model,
|
104 |
+
tokenizer=tokenizer,
|
105 |
+
max_length=4000,
|
106 |
+
temperature=temperature,
|
107 |
+
top_p=0.95,
|
108 |
+
repetition_penalty=1.15
|
109 |
+
)
|
110 |
+
|
111 |
+
llm = HuggingFacePipeline(pipeline=pipe)
|
|
|
112 |
|
113 |
|
114 |
|