Spaces:

fyerfyer
/

transformers-rag

Sleeping

App Files Files Community

fyerfyer commited on 15 days ago

Commit

c9531de

1 Parent(s): d8e9d44

Initial deploy

Browse files

Files changed (6) hide show

.gitattributes +1 -0
app.py +160 -65
qdrant_db/.lock +1 -0
qdrant_db/collection/huggingface_transformers_docs/storage.sqlite +3 -0
qdrant_db/meta.json +1 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.sqlite filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,70 +1,165 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+import os
+import httpx
 import gradio as gr
+from openai import OpenAI
+from qdrant_client import QdrantClient
+from sentence_transformers import SentenceTransformer
+API_KEY = os.environ.get('DEEPSEEK_API_KEY')
+BASE_URL = "https://api.deepseek.com"
+QDRANT_PATH = "./qdrant_db"
+COLLECTION_NAME = "huggingface_transformers_docs"
+EMBEDDING_MODEL_ID = "fyerfyer/finetune-jina-transformers-v1"
+class HFRAG:
+  def __init__(self):
+    self.embed_model = SentenceTransformer(EMBEDDING_MODEL_ID, trust_remote_code=True)
+    lock_file = os.path.join(QDRANT_PATH, ".lock")
+    if os.path.exists(lock_file):
+      try:
+        os.remove(lock_file)
+        print("Cleaned up stale lock file.")
+      except:
+        pass
+    if not os.path.exists(QDRANT_PATH):
+      raise ValueError(f"Qdrant path not found: {QDRANT_PATH}.")
+    self.db_client = QdrantClient(path=QDRANT_PATH)
+    if not self.db_client.collection_exists(COLLECTION_NAME):
+      raise ValueError(f"Collection '{COLLECTION_NAME}' not found in Qdrant DB.")
+    print(f"Connected to Qdrant")
+    self.llm_client = OpenAI(
+      api_key=API_KEY,
+      base_url=BASE_URL,
+      http_client=httpx.Client(proxy=None, trust_env=False)
+    )
+  def retrieve(self, query: str, top_k: int = 5, score_threshold: float = 0.40):
+    query_vector = self.embed_model.encode(query).tolist()
+    if hasattr(self.db_client, 'search'):
+      results = self.db_client.search(
+        collection_name=COLLECTION_NAME,
+        query_vector=query_vector,
+        limit=top_k,
+        score_threshold=score_threshold
+      )
+    else:
+      results = self.db_client.query_points(
+        collection_name=COLLECTION_NAME,
+        query=query_vector,
+        limit=top_k,
+        with_payload=True,
+        score_threshold=score_threshold
+      ).points
+    return results
+  def format_context(self, search_results):
+    context_pieces = []
+    sources_summary = []
+    for idx, hit in enumerate(search_results, 1):
+      raw_source = hit.payload['metadata']['source']
+      filename = raw_source.split('/')[-1]
+      text = hit.payload['text']
+      score = hit.score
+      sources_summary.append(f"`{filename}` (Score: {score:.2f})")
+      piece = f"""<doc id="{idx}" source="{filename}">\n{text}\n</doc>"""
+      context_pieces.append(piece)
+    return "\n\n".join(context_pieces), sources_summary
+rag_system = None
+def initialize_system():
+  global rag_system
+  if rag_system is None:
+    try:
+      rag_system = HFRAG()
+    except Exception as e:
+      print(f"Error initializing: {e}")
+      return None
+  return rag_system
+# ================= Gradio Logic =================
+def predict(message, history):
+  rag = initialize_system()
+  if not rag:
+    yield "❌ System initialization failed. Check logs."
+    return
+  if not API_KEY:
+    yield "❌ Error: `DEEPSEEK_API_KEY` not set in Space secrets."
+    return
+  # 1. Retrieve
+  yield "🔍 Retrieving relevant documents..."
+  results = rag.retrieve(message)
+  if not results:
+    yield "⚠️ No relevant documents found in the knowledge base."
+    return
+  # 2. Format context
+  context_str, sources_list = rag.format_context(results)
+  # 3. Build Prompt
+  system_prompt = """You are an expert AI assistant specializing in the Hugging Face Transformers library.
+Your goal is to answer the user's question based ONLY on the provided "Retrieved Context".
+GUIDELINES:
+1. **Code First**: Prioritize showing Python code examples.
+2. **Citation**: Cite source filenames like `[model_doc.md]`.
+3. **Honesty**: If the answer isn't in the context, say you don't know.
+4. **Format**: Use Markdown."""
+  user_prompt = f"""### User Query\n{message}\n\n### Retrieved Context\n{context_str}"""
+  header = "**📚 Found relevant documents:**\n" + "\n".join([f"- {s}" for s in sources_list]) + "\n\n---\n\n"
+  current_response = header
+  yield current_response
+  try:
+    response = rag.llm_client.chat.completions.create(
+      model="deepseek-chat",
+      messages=[
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+      ],
+      temperature=0.1,
+      stream=True
+    )
+    for chunk in response:
+      if chunk.choices[0].delta.content:
+        content = chunk.choices[0].delta.content
+        current_response += content
+        yield current_response
+  except Exception as e:
+    yield current_response + f"\n\n❌ LLM API Error: {str(e)}"
+demo = gr.ChatInterface(
+  fn=predict,
+  title="🤗 Hugging Face RAG Expert",
+  description="Ask me anything about Transformers! Powered by DeepSeek-V3 & Finetuned Embeddings.",
+  examples=[
+    "How to implement padding?",
+    "How to use BERT pipeline?",
+    "How to fine-tune a model using Trainer?",
+    "What is the difference between padding and truncation?"
+  ],
+  theme="soft"
+)
 if __name__ == "__main__":
+  demo.launch()

qdrant_db/.lock ADDED Viewed

	@@ -0,0 +1 @@


1	+ tmp lock file

qdrant_db/collection/huggingface_transformers_docs/storage.sqlite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88a55f2d047299d73d59f44f05d0ef0bf03ca865ae5dbd5523eed72269cb0f98
+size 56549376

qdrant_db/meta.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"collections": {"huggingface_transformers_docs": {"vectors": {"size": 768, "distance": "Cosine", "hnsw_config": null, "quantization_config": null, "on_disk": null, "datatype": null, "multivector_config": null}, "shard_number": null, "sharding_method": null, "replication_factor": null, "write_consistency_factor": null, "on_disk_payload": null, "hnsw_config": null, "wal_config": null, "optimizers_config": null, "quantization_config": null, "sparse_vectors": null, "strict_mode_config": null, "metadata": null}}, "aliases": {}}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+openai
+qdrant-client
+sentence-transformers
+httpx
+torch
+python-dotenv