Update app.py
Browse files
app.py
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
|
4 |
-
|
5 |
-
from dotenv import load_dotenv
|
6 |
-
|
7 |
-
from langchain.llms.base import LLM
|
8 |
-
from transformers import AutoTokenizer
|
9 |
-
from huggingface_hub import HfApi
|
10 |
-
import requests
|
11 |
|
12 |
|
13 |
from langchain_community.vectorstores import FAISS
|
@@ -19,67 +12,12 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
19 |
from langchain_community.llms import HuggingFacePipeline
|
20 |
from langchain.chains import ConversationChain
|
21 |
from langchain.memory import ConversationBufferMemory
|
22 |
-
from langchain_community.llms import
|
23 |
import torch
|
24 |
|
25 |
-
|
26 |
-
list_llm = ["meta-llama/Llama-3.1-8B-Instruct"] # , "HuggingFaceH4/zephyr-7b-beta"] # "mistralai/Mistral-7B-Instruct-v0.2" # meta-llama/Meta-Llama-3-8B-Instruct
|
27 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
28 |
|
29 |
-
def retrieve_api():
|
30 |
-
"""Retrieve HuggingFace API Key"""
|
31 |
-
_ = load_dotenv()
|
32 |
-
global huggingfacehub_api_token
|
33 |
-
api_token = os.environ.get("HUGGINGFACE_API_KEY")
|
34 |
-
|
35 |
-
# class ZephyrLLM(LLM):
|
36 |
-
# def __init__(self, repo_id, huggingfacehub_api_token, max_new_tokens=512, temperature=0.7, **kwargs):
|
37 |
-
# super().__init__(**kwargs)
|
38 |
-
# self.repo_id = repo_id
|
39 |
-
# self.api_token = huggingfacehub_api_token
|
40 |
-
# self.api_url = f"https://api-inference.huggingface.co/models/{repo_id}"
|
41 |
-
# self.headers = {"Authorization": f"Bearer {huggingfacehub_api_token}"}
|
42 |
-
# self.tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
43 |
-
# self.max_new_tokens = max_new_tokens
|
44 |
-
# self.temperature = temperature
|
45 |
-
|
46 |
-
# def _call(self, prompt, stop=None):
|
47 |
-
# # Format as chat message
|
48 |
-
# messages = [{"role": "user", "content": prompt}]
|
49 |
-
|
50 |
-
# # Apply Zephyr's chat template
|
51 |
-
# formatted_prompt = self.tokenizer.apply_chat_template(
|
52 |
-
# messages, tokenize=False, add_generation_prompt=True
|
53 |
-
# )
|
54 |
-
# # Send request to Hugging Face Inference API
|
55 |
-
# payload = {
|
56 |
-
# "inputs": formatted_prompt,
|
57 |
-
# "parameters": {
|
58 |
-
# "max_new_tokens": self.max_new_tokens,
|
59 |
-
# "temperature": self.temperature
|
60 |
-
# }
|
61 |
-
# }
|
62 |
-
# response = requests.post(self.api_url, headers=self.headers, json=payload)
|
63 |
-
|
64 |
-
# if response.status_code == 200:
|
65 |
-
# full_response = response.json()[0]["generated_text"]
|
66 |
-
|
67 |
-
# # Extract the assistant reply from the full response
|
68 |
-
# # After <|assistant|>\n, everything is the model's answer
|
69 |
-
# if "<|assistant|>" in full_response:
|
70 |
-
# return full_response.split("<|assistant|>")[-1].strip()
|
71 |
-
# else:
|
72 |
-
# return full_response.strip()
|
73 |
-
|
74 |
-
# else:
|
75 |
-
# raise Exception(f"Failed call [{response.status_code}]: {response.text}")
|
76 |
-
|
77 |
-
|
78 |
-
# @property
|
79 |
-
# def _llm_type(self) -> str:
|
80 |
-
# return "zephyr-custom"
|
81 |
-
|
82 |
-
|
83 |
# Load and split PDF document
|
84 |
def load_doc(list_file_path):
|
85 |
# Processing for one document only
|
@@ -105,34 +43,18 @@ def create_db(splits):
|
|
105 |
|
106 |
# Initialize langchain LLM chain
|
107 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
108 |
-
|
109 |
-
# llm = ZephyrLLM(
|
110 |
-
# repo_id=llm_model,
|
111 |
-
# huggingfacehub_api_token=api_token,
|
112 |
-
# temperature=temperature,
|
113 |
-
# max_new_tokens=max_tokens,
|
114 |
-
# )
|
115 |
-
if llm_model == "meta-llama/Llama-3.1-8B-Instruct":
|
116 |
llm = HuggingFaceEndpoint(
|
117 |
repo_id=llm_model,
|
118 |
-
task="text-generation",
|
119 |
huggingfacehub_api_token = api_token,
|
120 |
temperature = temperature,
|
121 |
max_new_tokens = max_tokens,
|
122 |
top_k = top_k,
|
123 |
)
|
124 |
-
|
125 |
-
# llm = HuggingFaceHub(
|
126 |
-
# repo_id="mistralai/Mistral-7B-Instruct-v0.2",
|
127 |
-
# huggingfacehub_api_token=api_token,
|
128 |
-
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens}
|
129 |
-
# )
|
130 |
-
|
131 |
else:
|
132 |
llm = HuggingFaceEndpoint(
|
133 |
huggingfacehub_api_token = api_token,
|
134 |
-
repo_id=llm_model,
|
135 |
-
task="text-generation",
|
136 |
temperature = temperature,
|
137 |
max_new_tokens = max_tokens,
|
138 |
top_k = top_k,
|
@@ -291,5 +213,4 @@ def demo():
|
|
291 |
|
292 |
|
293 |
if __name__ == "__main__":
|
294 |
-
|
295 |
-
demo()
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
api_token = os.getenv("HF_TOKEN")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
|
6 |
from langchain_community.vectorstores import FAISS
|
|
|
12 |
from langchain_community.llms import HuggingFacePipeline
|
13 |
from langchain.chains import ConversationChain
|
14 |
from langchain.memory import ConversationBufferMemory
|
15 |
+
from langchain_community.llms import HuggingFaceEndpoint
|
16 |
import torch
|
17 |
|
18 |
+
list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
|
|
|
19 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# Load and split PDF document
|
22 |
def load_doc(list_file_path):
|
23 |
# Processing for one document only
|
|
|
43 |
|
44 |
# Initialize langchain LLM chain
|
45 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
46 |
+
if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
llm = HuggingFaceEndpoint(
|
48 |
repo_id=llm_model,
|
|
|
49 |
huggingfacehub_api_token = api_token,
|
50 |
temperature = temperature,
|
51 |
max_new_tokens = max_tokens,
|
52 |
top_k = top_k,
|
53 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
else:
|
55 |
llm = HuggingFaceEndpoint(
|
56 |
huggingfacehub_api_token = api_token,
|
57 |
+
repo_id=llm_model,
|
|
|
58 |
temperature = temperature,
|
59 |
max_new_tokens = max_tokens,
|
60 |
top_k = top_k,
|
|
|
213 |
|
214 |
|
215 |
if __name__ == "__main__":
|
216 |
+
demo()
|
|