|
|
|
import sentence_transformers |
|
import pandas as pd |
|
import os |
|
|
|
from fastapi import FastAPI, HTTPException |
|
from huggingface_hub import hf_hub_download, login |
|
from src.processor import send_to_dataset,search_and_retrieve,generate_tech |
|
from typing import List, Dict |
|
from pydantic import BaseModel |
|
from datasets import load_dataset |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
|
|
|
|
login(token=os.getenv("HF_TOKEN")) |
|
|
|
|
|
app = FastAPI( |
|
title="My Standalone API", |
|
description="An API hosted on Hugging Face Spaces", |
|
version="1.0.0" |
|
) |
|
|
|
|
|
model = sentence_transformers.SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
dataset = load_dataset("OrganizedProgrammers/Technologies", split="train") |
|
dataset.add_faiss_index(column='embeddings') |
|
|
|
class SearchInput(BaseModel): |
|
title: str |
|
|
|
class SearchOutput(BaseModel): |
|
title: str |
|
purpose: str |
|
score: float |
|
top5: List[Dict] |
|
|
|
class GenerateInput(BaseModel): |
|
title: str |
|
instructions: str |
|
force: bool = False |
|
|
|
class GenerateOutput(BaseModel): |
|
name: str |
|
purpose: str |
|
problem_types_solved: str |
|
advantages: str |
|
limitations: str |
|
domain_tags: str |
|
|
|
@app.post("/search-technologies", response_model=SearchOutput) |
|
def post_search(payload: SearchInput): |
|
""" |
|
Endpoint that returns a search result. |
|
""" |
|
config = {"dataset": dataset, "model": model} |
|
res = search_and_retrieve(payload.title, config) |
|
return res |
|
|
|
@app.post("/generate-technology", response_model=GenerateOutput) |
|
def post_generate_and_push(payload: GenerateInput): |
|
""" |
|
Endpoint to generate a technology and push it to the dataset |
|
""" |
|
|
|
config = {"dataset": dataset, "model": model} |
|
res = search_and_retrieve(payload.title, config) |
|
if res["score"] >= 0.7 and not payload.force: |
|
raise HTTPException(status_code=500, detail=f"Cannot generate the technology a high score of {res['score']} have been found for the technology : {res['title']}") |
|
|
|
json_response = generate_tech(payload.title, payload.instructions) |
|
|
|
send_to_dataset(json_response, model) |
|
|
|
return json_response |