Spaces:
Sleeping
Sleeping
File size: 6,667 Bytes
c8a3495 43b346b 2212ca0 ac17754 2212ca0 49e974f 2212ca0 c8a3495 237037e c8a3495 237037e 795555f 237037e c8a3495 237037e c8a3495 2eaf83b 2212ca0 237037e 2212ca0 c8a3495 2212ca0 c8a3495 2212ca0 c8a3495 2212ca0 795555f c8a3495 cb9ff77 2212ca0 cb9ff77 2212ca0 cb9ff77 7e5bbef e2b7d51 2212ca0 7e5bbef 2212ca0 e2b7d51 7e5bbef 2212ca0 e2b7d51 2212ca0 e2b7d51 2212ca0 e2b7d51 53d4059 e2b7d51 2212ca0 50b01c5 2212ca0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import torch
import gradio as gr
import pandas as pd
import numpy as np
import json
import os
import uuid
import spaces
import re
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, AutoPeftModelForCausalLM
import io
from openpyxl import load_workbook
from typing import List, Dict, Any, Tuple
from utils import *
# base_model_id = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
# lora_path = "tat-llm-final-e4"
# base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
# model = PeftModel.from_pretrained(base_model, lora_path)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = model.to(device)
# model.eval()
# tokenizer = AutoTokenizer.from_pretrained(lora_path)
@spaces.GPU(duration=60)
def generate_answer(json_data: Dict[str, Any], question: str) -> str:
"""
Generate answer using the fine-tuned model.
"""
base_model_id = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
lora_path = "tat-llm-final-e4"
# Load base model and LoRA adapter
base_model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16)
model = PeftModel.from_pretrained(base_model, lora_path)
tokenizer = AutoTokenizer.from_pretrained(lora_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()
prompt = create_prompt(json_data, question)
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
# Move to GPU if available
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
input_length = inputs["input_ids"].shape[1]
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=1024,
do_sample=False,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id
)
generated_tokens = outputs[0][input_length:]
answer = tokenizer.decode(generated_tokens, skip_special_tokens=True)
return answer
# Gradio interface functions
def process_xlsx(file):
"""
Process uploaded XLSX file and return JSON, JSONL, and Markdown.
"""
if file is None:
return None, "", "", ""
try:
json_data = xlsx_to_json(file.name)
json_str = json.dumps(json_data, indent=2, ensure_ascii=False)
jsonl_str = json_to_jsonl(json_data)
markdown_str = json_to_markdown(json_data)
return json_data, json_str, jsonl_str, markdown_str
except Exception as e:
return None, f"Error: {str(e)}", "", ""
def chat_interface(json_data, question, history):
"""
Chat interface for Q&A.
"""
if json_data is None:
return history + [[question, "Please upload an XLSX file first."]]
if not question.strip():
return history + [[question, "Please enter a question."]]
try:
answer = generate_answer(json_data, question)
return history + [[question, answer]]
except Exception as e:
return history + [[question, f"Error generating answer: {str(e)}"]]
# Gradio UI
with gr.Blocks(title="terTATa-LLM: Dari Tabel dan Teks Menjadi Langkah Bisnis Strategis", theme=gr.themes.Soft()) as demo:
gr.HTML("""
<style>
body, .gradio-container {
font-family: 'Poppins', sans-serif;
}
h1, h2, h3, h4, h5 {
font-family: 'Poppins', sans-serif;
}
</style>
<link href="https://fonts.googleapis.com/css2?family=Poppins&display=swap" rel="stylesheet">
""")
gr.Markdown("""
# terTATa-LLM: Dari Tabel dan Teks Menjadi Langkah Bisnis Strategis
Unggah berkas XLSX berisi tabel dan paragraf, lalu ajukan pertanyaan tentang data tersebut.
Sistem akan mengonversi berkas Anda ke format JSON dan menggunakan model terTATa-LLM untuk menjawab pertanyaan.
""")
json_data_state = gr.State()
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(
label="Upload XLSX File",
file_types=[".xlsx"],
type="filepath"
)
process_btn = gr.Button("Process File", variant="primary")
with gr.Tabs():
with gr.Tab("Markdown Preview"):
markdown_output = gr.Markdown(label="Markdown Preview")
with gr.Tab("JSON Output"):
json_output = gr.Code(
label="JSON Format",
language="json",
lines=15
)
with gr.Tab("JSONL Output"):
jsonl_output = gr.Code(
label="JSONL Format",
language="json",
lines=5
)
with gr.Column(scale=1):
gr.Markdown("### Ajukan Pertanyaan Mengenai Data Anda")
chatbot = gr.Chatbot(height=400)
msg = gr.Textbox(
label="Prompt",
placeholder="Ajukan pertanyaan tentang data tabel...",
lines=2
)
with gr.Row():
submit_btn = gr.Button("Submit", variant="primary")
clear_btn = gr.Button("Clear Chat")
gr.Examples(
examples=[
"Apa saja wawasan yang bisa kita ambil dari data ini?",
"Bagaimana perubahan dari tahun ke tahun?",
"Apa saja tren utama yang terlihat dalam data?",
"Hitung persentase perubahan antar tahun!",
"Rekomendasi apa yang dapat diberikan berdasarkan data ini?"
],
inputs=msg
)
process_btn.click(
fn=process_xlsx,
inputs=[file_input],
outputs=[json_data_state, json_output, jsonl_output, markdown_output]
)
msg.submit(
fn=chat_interface,
inputs=[json_data_state, msg, chatbot],
outputs=[chatbot]
).then(
lambda: "",
outputs=[msg]
)
submit_btn.click(
fn=chat_interface,
inputs=[json_data_state, msg, chatbot],
outputs=[chatbot]
).then(
lambda: "",
outputs=[msg]
)
clear_btn.click(
lambda: [],
outputs=[chatbot]
)
if __name__ == "__main__":
demo.queue().launch(share=True) |