fyerfyer commited on
Commit
c9531de
Β·
1 Parent(s): d8e9d44

Initial deploy

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.sqlite filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,70 +1,165 @@
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
-
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
-
19
- messages = [{"role": "system", "content": system_message}]
20
-
21
- messages.extend(history)
22
-
23
- messages.append({"role": "user", "content": message})
24
-
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- type="messages",
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- with gr.Blocks() as demo:
64
- with gr.Sidebar():
65
- gr.LoginButton()
66
- chatbot.render()
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  if __name__ == "__main__":
70
- demo.launch()
 
1
+ import os
2
+ import httpx
3
  import gradio as gr
4
+ from openai import OpenAI
5
+ from qdrant_client import QdrantClient
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ API_KEY = os.environ.get('DEEPSEEK_API_KEY')
9
+ BASE_URL = "https://api.deepseek.com"
10
+
11
+ QDRANT_PATH = "./qdrant_db"
12
+ COLLECTION_NAME = "huggingface_transformers_docs"
13
+ EMBEDDING_MODEL_ID = "fyerfyer/finetune-jina-transformers-v1"
14
+
15
+ class HFRAG:
16
+ def __init__(self):
17
+ self.embed_model = SentenceTransformer(EMBEDDING_MODEL_ID, trust_remote_code=True)
18
+
19
+ lock_file = os.path.join(QDRANT_PATH, ".lock")
20
+ if os.path.exists(lock_file):
21
+ try:
22
+ os.remove(lock_file)
23
+ print("Cleaned up stale lock file.")
24
+ except:
25
+ pass
26
+
27
+ if not os.path.exists(QDRANT_PATH):
28
+ raise ValueError(f"Qdrant path not found: {QDRANT_PATH}.")
29
+
30
+ self.db_client = QdrantClient(path=QDRANT_PATH)
31
+
32
+ if not self.db_client.collection_exists(COLLECTION_NAME):
33
+ raise ValueError(f"Collection '{COLLECTION_NAME}' not found in Qdrant DB.")
34
+
35
+ print(f"Connected to Qdrant")
36
+
37
+ self.llm_client = OpenAI(
38
+ api_key=API_KEY,
39
+ base_url=BASE_URL,
40
+ http_client=httpx.Client(proxy=None, trust_env=False)
41
+ )
42
+
43
+ def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.40):
44
+ query_vector = self.embed_model.encode(query).tolist()
45
+
46
+ if hasattr(self.db_client, 'search'):
47
+ results = self.db_client.search(
48
+ collection_name=COLLECTION_NAME,
49
+ query_vector=query_vector,
50
+ limit=top_k,
51
+ score_threshold=score_threshold
52
+ )
53
+ else:
54
+ results = self.db_client.query_points(
55
+ collection_name=COLLECTION_NAME,
56
+ query=query_vector,
57
+ limit=top_k,
58
+ with_payload=True,
59
+ score_threshold=score_threshold
60
+ ).points
61
+ return results
62
+
63
+ def format_context(self, search_results):
64
+ context_pieces = []
65
+ sources_summary = []
66
+
67
+ for idx, hit in enumerate(search_results, 1):
68
+ raw_source = hit.payload['metadata']['source']
69
+ filename = raw_source.split('/')[-1]
70
+ text = hit.payload['text']
71
+ score = hit.score
72
+
73
+ sources_summary.append(f"`{filename}` (Score: {score:.2f})")
74
+
75
+ piece = f"""<doc id="{idx}" source="{filename}">\n{text}\n</doc>"""
76
+ context_pieces.append(piece)
77
+
78
+ return "\n\n".join(context_pieces), sources_summary
79
 
80
+ rag_system = None
 
 
 
81
 
82
+ def initialize_system():
83
+ global rag_system
84
+ if rag_system is None:
85
+ try:
86
+ rag_system = HFRAG()
87
+ except Exception as e:
88
+ print(f"Error initializing: {e}")
89
+ return None
90
+ return rag_system
91
+
92
+ # ================= Gradio Logic =================
93
+ def predict(message, history):
94
+ rag = initialize_system()
95
+
96
+ if not rag:
97
+ yield "❌ System initialization failed. Check logs."
98
+ return
99
+
100
+ if not API_KEY:
101
+ yield "❌ Error: `DEEPSEEK_API_KEY` not set in Space secrets."
102
+ return
103
+
104
+ # 1. Retrieve
105
+ yield "πŸ” Retrieving relevant documents..."
106
+ results = rag.retrieve(message)
107
+
108
+ if not results:
109
+ yield "⚠️ No relevant documents found in the knowledge base."
110
+ return
111
+
112
+ # 2. Format context
113
+ context_str, sources_list = rag.format_context(results)
114
+
115
+ # 3. Build Prompt
116
+ system_prompt = """You are an expert AI assistant specializing in the Hugging Face Transformers library.
117
+ Your goal is to answer the user's question based ONLY on the provided "Retrieved Context".
118
+
119
+ GUIDELINES:
120
+ 1. **Code First**: Prioritize showing Python code examples.
121
+ 2. **Citation**: Cite source filenames like `[model_doc.md]`.
122
+ 3. **Honesty**: If the answer isn't in the context, say you don't know.
123
+ 4. **Format**: Use Markdown."""
124
+
125
+ user_prompt = f"""### User Query\n{message}\n\n### Retrieved Context\n{context_str}"""
126
+
127
+ header = "**πŸ“š Found relevant documents:**\n" + "\n".join([f"- {s}" for s in sources_list]) + "\n\n---\n\n"
128
+ current_response = header
129
+ yield current_response
130
+
131
+ try:
132
+ response = rag.llm_client.chat.completions.create(
133
+ model="deepseek-chat",
134
+ messages=[
135
+ {"role": "system", "content": system_prompt},
136
+ {"role": "user", "content": user_prompt},
137
+ ],
138
+ temperature=0.1,
139
+ stream=True
140
+ )
141
+
142
+ for chunk in response:
143
+ if chunk.choices[0].delta.content:
144
+ content = chunk.choices[0].delta.content
145
+ current_response += content
146
+ yield current_response
147
+
148
+ except Exception as e:
149
+ yield current_response + f"\n\n❌ LLM API Error: {str(e)}"
150
+
151
+ demo = gr.ChatInterface(
152
+ fn=predict,
153
+ title="πŸ€— Hugging Face RAG Expert",
154
+ description="Ask me anything about Transformers! Powered by DeepSeek-V3 & Finetuned Embeddings.",
155
+ examples=[
156
+ "How to implement padding?",
157
+ "How to use BERT pipeline?",
158
+ "How to fine-tune a model using Trainer?",
159
+ "What is the difference between padding and truncation?"
160
+ ],
161
+ theme="soft"
162
+ )
163
 
164
  if __name__ == "__main__":
165
+ demo.launch()
qdrant_db/.lock ADDED
@@ -0,0 +1 @@
 
 
1
+ tmp lock file
qdrant_db/collection/huggingface_transformers_docs/storage.sqlite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a55f2d047299d73d59f44f05d0ef0bf03ca865ae5dbd5523eed72269cb0f98
3
+ size 56549376
qdrant_db/meta.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"collections": {"huggingface_transformers_docs": {"vectors": {"size": 768, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null, "datatype": null, "multivector_config": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "quantization_config": null, "sparse_vectors": null, "strict_mode_config": null, "metadata": null}}, "aliases": {}}
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ openai
3
+ qdrant-client
4
+ sentence-transformers
5
+ httpx
6
+ torch
7
+ python-dotenv