import os import json from transformers import pipeline from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex, LLMPredictor, OpenAI _index = None def query_symptoms_tool(prompt_json: str): # parse “prompt_json” into Python dict and call your existing query_symptoms() data = json.loads(prompt_json) return query_symptoms(data["raw_input"]) def get_llm_predictor(): """ Return an LLMPredictor configured for local GPU (transformers) if USE_LOCAL_GPU=1, otherwise uses OpenAI. """ if os.getenv("USE_LOCAL_GPU") == "1": # Local GPU inference using GPT-2 as an example local_pipe = pipeline("text-generation", model="gpt2", device=0) return LLMPredictor(llm=local_pipe) # Default to OpenAI provider return LLMPredictor(llm=OpenAI(temperature=0)) def build_index(data_path="data/icd10cm_tabular_2025"): # noqa: C901 """ Build (or retrieve cached) GPTVectorStoreIndex from ICD documents. """ global _index if _index is None: # Load documents from the ICD data directory docs = SimpleDirectoryReader(data_path).load_data() # Initialize the index with chosen LLM predictor predictor = get_llm_predictor() _index = GPTVectorStoreIndex.from_documents(docs, llm_predictor=predictor) return _index def query_symptoms(prompt: str, top_k: int = 5): """ Query the index for the given symptom prompt and return the result. """ idx = build_index() # Create a query engine with the same predictor predictor = get_llm_predictor() query_engine = idx.as_query_engine(similarity_top_k=top_k, llm_predictor=predictor) return query_engine.query(prompt)