from fuzzywuzzy import fuzz
from google.genai import Client, types
from datasets import load_dataset
import json
import os


def search_and_retrieve(user_input, config):
    dataset = config["dataset"]
    model = config["model"]

    user_embedding = model.encode(user_input)
    results = dataset.get_nearest_examples('embeddings', user_embedding, k=5)

    s=results.scores
    t=results.examples
    n = len(t['name'])

    result = []

    for i in range(n):
        item = {}

        for key, value in t.items():
          if key!="embeddings":
            item[key] = value[i]

        result.append(item)

    for i,r in enumerate(result):
      r["score"]=float(s[i])
    
    final_output = {"title": result[0]["name"], "purpose": result[0]["purpose"], "score": result[0]["score"]}
    final_output["top5"] = result
    print(final_output)

    return final_output


def generate_tech(user_input, user_instructions):
    prompt = f"""
    # ROLE

    You are a meticulous senior technical analyst and technology scout. Your task is to generate a technology into a structured JSON object.

    # OBJECTIVE

    Analyze the provided `<USER_INPUT>`. Identify what is technology discussed, focus on the highest level of the technology. 
    Create a complete JSON object according to the schema below. 
    Your final output must be a single, valid JSON document containing a technology you created. 
    The technology should be described with sentences.

    # INSTRUCTIONS & RULES

    1.  **JSON List Output**: Your entire response MUST be a single JSON code block starting with a hyphen (`-`) to denote a list. 
    Do not include any explanatory text before or after the JSON.
    2.  **Discover and Iterate**: Your primary task is to understand the technology and create a JSON entry for it.
    3.  **Descriptive Sentences**: You MUST write clear, full sentences that describe the technology's abilities and the issues it resolves. 
    Do not use single keywords.
    4.  **Infer Where Necessary**: The source material may not contain all details. Infer plausible information based on the context.

    # YAML SCHEMA & EXAMPLE

    Your output must be a list of YAML objects matching this structure. Note how `functional_capabilities` and `problem_types_solved` contain full sentences.

    {{"name": "Generative Watermarking"
      "purpose": "Add an invisible, machine-readable tags to content generated by AI models and enables the tracing and authentication of digital media to its source."
      "problem_types_solved": "Helps to combat digital misinformation by providing a method to verify content authenticity and addresses the erosion of trust in digital media caused by the proliferation of deepfakes."
      "advantages": "Way faster to generate by an AI"
      "limitations": "Takes a lot of computational time to generate"
      "domain_tags": "Present in the domains of : AI ethics, cybersecurity, digital media, content moderation"
    }}

    Take into account those additionnal informations if there is any:
    {user_instructions}
    ---
    ***NOW, BEGIN THE TASK.***

    <USER_INPUT>
    {user_input}
    </USER_INPUT>
    """ 

    client = Client(api_key=os.getenv("GEMINI_API_KEY"))
    
    client = Client(api_key=os.getenv("GEMINI_API_KEY"))

    # Define the grounding tool
    grounding_tool = types.Tool(
        google_search=types.GoogleSearch()
    )

    # Configure generation settings
    config = types.GenerateContentConfig(
        tools=[grounding_tool]
    )

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=config,
    )

    data = response.text
    data = data[data.find("{"):data.find("}")+1].replace('\n','')
    json_data = json.loads(data[data.find("{"):data.find("}")+1].replace('\n',''))

    return json_data


def send_to_dataset(data, model):
    data_embedding = model.encode(str(data))
    data["embeddings"] = data_embedding

    dataset = load_dataset("OrganizedProgrammers/Technologies", split="train")
    updated_dataset = dataset.add_item(data)
    updated_dataset.push_to_hub("OrganizedProgrammers/Technologies")