Spaces:
Running
Running
File size: 9,090 Bytes
7388122 ba9c6f3 5b13161 ba9c6f3 cf05cdf 5b13161 cf05cdf 7125d5e 4b8ea86 7125d5e ba9c6f3 bb495a5 8b90d88 e70b353 bb495a5 8b90d88 7125d5e bb495a5 cf05cdf 7917908 7125d5e 7917908 d1f445d 7917908 7125d5e 7917908 7125d5e 7917908 7125d5e 7917908 7125d5e 7917908 7125d5e 7917908 7125d5e cf05cdf 7125d5e 8b90d88 7125d5e bb495a5 7125d5e 7ac1ef0 ee9e0dc cf05cdf 7125d5e 65863b8 ee9e0dc 8b90d88 7475a8d cf05cdf ee9e0dc cf05cdf 9679a35 7125d5e cf05cdf 8b90d88 7125d5e ffa1d2e 9679a35 8b90d88 9679a35 cf05cdf 1f27046 5b13161 46196d7 5b13161 cf05cdf 8b90d88 7d99583 5b13161 384b6d3 5b13161 4b8ea86 5b13161 4b8ea86 1f27046 5b13161 7917908 9679a35 7917908 5b13161 1f27046 7917908 1f27046 5b13161 1f27046 8b90d88 1f27046 5bb1bfe d2975c7 9679a35 8b90d88 5b13161 9679a35 1f27046 7388122 5b13161 8b90d88 5b13161 1f27046 ba9c6f3 8b90d88 1f27046 5b13161 ba9c6f3 7388122 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import gradio as gr
import gspread
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from oauth2client.service_account import ServiceAccountCredentials
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document
# =============== 1. Cache dan Inisialisasi Index Google Sheets ===============
cached_index = None
cached_data = {}
def read_google_sheets_separated():
try:
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
client = gspread.authorize(creds)
SPREADSHEET_ID = "1ZLmz1onvPEX4TbgPJbR4LxVZjIluf6BpISTiGS5_5Rg"
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
spreadsheet = client.open_by_key(SPREADSHEET_ID)
data_map = {}
for sheet_name in sheet_names:
try:
sheet = spreadsheet.worksheet(sheet_name)
data = sheet.get_all_values()
headers = data[0]
rows = data[1:]
entries = []
if sheet_name == "datatarget":
for row in rows:
if len(row) >= 4:
try:
jumlah = int(row[3])
status = "KURANG" if jumlah < 0 else "LEBIH"
entries.append(
f"[SELISIH] Mesin: {row[0]} | Kategori: {row[1]} | Bulan: {row[2]} | Selisih: {abs(jumlah)} pcs ({status})"
)
except ValueError:
# Tangani jika data tidak valid
entries.append(
f"[WARNING] Data tidak valid: {' | '.join(row)}"
)
elif sheet_name == "datacuti":
for row in rows:
if len(row) >= 3:
entries.append(f"{row[0]} memiliki sisa cuti {row[1]} hari pada tahun {row[2]}")
elif sheet_name == "dataabsen":
for row in rows:
if len(row) >= 3:
entries.append(f"Kehadiran {row[0]} adalah {row[1]} hari pada bulan {row[2]}")
elif sheet_name == "datalembur":
for row in rows:
if len(row) >= 3:
entries.append(f"{row[0]} telah lembur sebanyak {row[1]} jam pada bulan {row[2]}")
elif sheet_name == "pkb":
for row in rows:
if len(row) >= 4:
bab, poin, kategori, isi = row[0], row[1], row[2], row[3]
entries.append(f"Bab {bab}, Poin {poin} - Kategori: {kategori}\nIsi: {isi}")
data_map[sheet_name] = entries
except gspread.exceptions.WorksheetNotFound:
data_map[sheet_name] = [f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan."]
return data_map
except Exception as e:
return {"error": str(e)}
def detect_intent(message):
msg = message.lower()
intent_keywords = {
"pkb": ["ketentuan", "aturan", "kompensasi", "hak", "berlaku", "diperbolehkan", "pkb", "perusahaan", "pekerja",
"tenaga kerja asing", "jam kerja", "kerja lembur", "perjalanan dinas", "pengupahan",
"pemutusan hubungan kerja", "jaminan sosial", "kesejahteraan", "fasilitas kerja",
"alih tugas", "kewajiban", "disiplin kerja", "larangan", "sanksi", "mogok",
"pesangon", "penghargaan masa kerja", "uang pisah"],
"cuti": ["cuti", "sisa cuti", "jumlah cuti", "berapa hari cuti", "libur"],
"target": ["target", "aktual", "selisih", "produksi", "mesin", "pcs"],
"lembur": ["lembur", "jam lembur", "berapa jam", "jam kerja tambahan"],
"absensi": ["absensi", "hadir", "tidak hadir", "izin", "masuk", "alpha", "berapa hari masuk", "kehadiran"]
}
scores = {}
for intent, keywords in intent_keywords.items():
scores[intent] = sum(1 for k in keywords if k in msg)
best_intent = max(scores, key=scores.get)
# Jika tidak ada keyword yang cocok, fallback ke "all"
return best_intent if scores[best_intent] > 0 else "all"
def initialize_index():
global cached_index, cached_data
cached_data = read_google_sheets_separated()
all_text = sum(cached_data.values(), [])
document = Document(text="\n".join(all_text))
parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)
nodes = parser.get_nodes_from_documents([document])
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
Settings.embed_model = embedding
cached_index = VectorStoreIndex(nodes)
def search_google_sheets_vector(query):
if cached_data == {}:
initialize_index()
intent = detect_intent(query)
if intent == "pkb":
selected_data = cached_data.get("pkb", [])
elif intent == "cuti":
selected_data = cached_data.get("datacuti", [])
elif intent == "target":
selected_data = cached_data.get("datatarget", [])
elif intent == "absensi":
selected_data =cached_data.get("dataabsen", [])
elif intent == "lembur":
selected_data =cached_data.get("datalembur", [])
else:
selected_data = sum(cached_data.values(), [])
document = Document(text="\n".join(selected_data))
parser = SentenceSplitter(chunk_size=256, chunk_overlap=30)
nodes = parser.get_nodes_from_documents([document])
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
Settings.embed_model = embedding
temp_index = VectorStoreIndex(nodes)
retriever = temp_index.as_retriever(similarity_top_k=3)
retriever.similarity_cutoff = 1.0
retrieved_nodes = retriever.retrieve(query)
results = [node.text for node in retrieved_nodes]
return "\n".join(results) if results else "Maaf, saya tidak menemukan informasi yang relevan."
# =============== 2. Load Model Transformers ===============
def load_model():
model_id = "NousResearch/Llama-2-7b-chat-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto",
torch_dtype=torch.float16
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=512,
temperature=0.7,
repetition_penalty=1.2,
do_sample=True,
)
return pipe
# =============== 3. Prompt Generator ===============
def generate_prompt(user_message, context_data):
prompt = f"""
### SISTEM:
Anda adalah chatbot HRD yang membantu karyawan memahami administrasi perusahaan.
Jangan menjawab menggunakan Bahasa Inggris.
Gunakan Bahasa Indonesia dengan gaya profesional dan ramah.
Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu.
Jawaban harus singkat, jelas, dan sesuai konteks.
Jangan memberikan jawaban untuk pertanyaan yang tidak diajukan oleh pengguna.
Jangan menyertakan rekomendasi pertanyaan lain.
### DATA:
{context_data}
### PERTANYAAN:
{user_message}
### JAWABAN:
"""
return prompt.strip()
# =============== 4. Generate Response ===============
def should_use_history(message):
keywords = ["jika", "tadi", "sebelumnya","kalau begitu", "gimana kalau", "lanjutkan", "terus", "bagaimana dengan", "berarti", "jadi", "oke lalu"]
return any(kata in message.lower() for kata in keywords)
def generate_response(message, history, pipe):
if should_use_history(message) and history:
previous_message = history[-1][0]
combined_message = previous_message + " " + message
else:
combined_message = message
context = search_google_sheets_vector(combined_message)
if "❌ ERROR" in context or context.strip() == "" or "tidak ditemukan" in context.lower():
return "Maaf, saya tidak menemukan informasi yang relevan untuk pertanyaan tersebut."
full_prompt = generate_prompt(message, context)
response = pipe(full_prompt)[0]["generated_text"]
cleaned = response.split("### JAWABAN:")[-1].strip()
history = history or []
history.append((message, cleaned))
return cleaned
# =============== 5. Jalankan Gradio ===============
def main():
pipe = load_model()
initialize_index()
def chatbot_fn(message, history):
return generate_response(message, history, pipe)
gr.ChatInterface(
fn=chatbot_fn,
title="Chatbot HRD - Transformers",
theme="compact"
).launch(share=True)
if __name__ == "__main__":
main() |