|
import json |
|
from sklearn.feature_extraction.text import TfidfVectorizer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
from transformers import pipeline |
|
import gradio as gr |
|
|
|
|
|
with open("electricity_corpus.json", "r") as f: |
|
corpus = json.load(f) |
|
|
|
|
|
vectorizer = TfidfVectorizer() |
|
tfidf_matrix = vectorizer.fit_transform(corpus) |
|
|
|
|
|
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
|
|
|
def get_top_contexts(question, top_k=3): |
|
question_vec = vectorizer.transform([question]) |
|
similarities = cosine_similarity(question_vec, tfidf_matrix).flatten() |
|
top_indices = similarities.argsort()[-top_k:][::-1] |
|
return [corpus[i] for i in top_indices] |
|
|
|
|
|
def answer_question(question, top_k=3): |
|
if not question.strip(): |
|
return "Please enter a valid question." |
|
|
|
contexts = get_top_contexts(question, top_k) |
|
combined_context = " ".join(contexts)[:4096] |
|
result = qa_pipeline(question=question, context=combined_context) |
|
return result["answer"] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=answer_question, |
|
inputs=gr.Textbox(label="Ask your question about electricity usage..."), |
|
outputs=gr.Textbox(label="Answer"), |
|
title="π Electricity Data Q&A", |
|
description="Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'", |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|
|
|