import retrive_docs


import json
from retrive_docs import load_faiss_index_and_metadata, retrieve_relevant_chunks, print_results

INDEX_PATH = "code_faiss.index"
METADATA_PATH = "code_metadata.json"
CHUNKS_JSON_PATH = "code_chunks.json"
MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"  # Must match the model used in create_faiss.py
TOP_K = 5  # Number of results to retrieve

# --- EXECUTION ---
# Load FAISS index and metadata
index, metadata, chunks_dict = load_faiss_index_and_metadata(
    index_path=INDEX_PATH,
    metadata_path=METADATA_PATH,
    chunks_json_path=CHUNKS_JSON_PATH
)

if index is None or metadata is None or chunks_dict is None:
    print("Failed to load index, metadata, or chunks. Exiting.")
    exit(1)

# Get user query
print("\nEnter your query (e.g., 'function to process text data'):")
# query = input("> ")
query= '''
Bug
when i add (cache=True)in Classification Training , the Ram using is increasing every epoch , until it crash the training , start like from 3 to 6 to 11 to 15 ....... 50 , GB
but if i don't add it , the ram using work fine , it be like 4 GB and all training is fixed

i work on colab
!yolo task=classify mode=train cache=True model=yolov8n-cls.pt data='/content/Classification-1' epochs=5  batch=265 imgsz=128 

Environment
No response

Minimal Reproducible Example
No response

Additional
No response'''
# Retrieve and display results
results = retrieve_relevant_chunks(
    query=query,
    model_name=MODEL_NAME,
    index=index,
    metadata=metadata,
    chunks_dict=chunks_dict,
    top_k=TOP_K
    )


print(print_results(results))
#call llm
# import requests
# import json
# import time
# import os

# sys_prompt = "You ar "
# # Set API key and API base for the custom API server
# api_key = os.getenv("API_KEY")  # Replace with your actual API key
# api_base_url = os.getenv("API_BASE_URL")  # Replace with your API base URL

# # Setup headers for the request
# headers = {
#     "Authorization": f"Bearer {api_key}",
#     "Content-Type": "application/json"
# }

# # System message and query
# # sys_msg = "you are a helpful assistant"
# # query = "what is machine learning?"

# # Prepare the data payload for the POST request
# data = json.dumps({
#     "model": "Meta-Llama-3.1-8B-Instruct-AWQ-INT4",
#     "messages": [
#         {"role": "system", "content":sys_prompt },
#         {"role": "user", "content": query}
#     ],
#     "temperature": 0.2
# })

# # Measure request execution time
# t1 = time.time()

# # Perform the POST request
# response = requests.post(f"{api_base_url}/chat/completions", headers=headers, data=data)
# print("Request time:", time.time() - t1)

# # Check the response and handle errors
# if response.status_code == 200:
#     # Parse response if request was successful
#     chat_response = response.json()
#     print("Chat response:", chat_response['choices'][0]['message']['content'])
# else:
#     # Print error information if something went wrong
#     print("Failed to fetch response:", response.status_code, response.text)

# print("this output based on this query :",query)