File size: 9,090 Bytes
7388122
ba9c6f3
5b13161
 
ba9c6f3
cf05cdf
 
 
 
5b13161
cf05cdf
 
7125d5e
4b8ea86
7125d5e
ba9c6f3
 
 
bb495a5
8b90d88
e70b353
bb495a5
 
8b90d88
7125d5e
 
bb495a5
cf05cdf
 
 
7917908
 
7125d5e
7917908
 
 
 
d1f445d
 
 
 
 
 
 
 
 
 
 
7917908
 
 
 
7125d5e
7917908
 
 
 
7125d5e
7917908
 
 
 
7125d5e
7917908
7125d5e
7917908
 
 
7125d5e
7917908
7125d5e
cf05cdf
7125d5e
8b90d88
7125d5e
bb495a5
7125d5e
 
 
 
7ac1ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee9e0dc
cf05cdf
7125d5e
 
 
 
65863b8
ee9e0dc
8b90d88
7475a8d
cf05cdf
ee9e0dc
cf05cdf
9679a35
7125d5e
cf05cdf
8b90d88
7125d5e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffa1d2e
9679a35
8b90d88
9679a35
 
cf05cdf
1f27046
5b13161
46196d7
5b13161
 
 
cf05cdf
8b90d88
7d99583
5b13161
 
 
 
384b6d3
5b13161
 
 
4b8ea86
5b13161
4b8ea86
1f27046
 
5b13161
 
7917908
 
 
9679a35
7917908
 
 
5b13161
 
 
 
 
 
 
 
1f27046
 
7917908
1f27046
 
5b13161
1f27046
 
 
 
 
8b90d88
1f27046
5bb1bfe
d2975c7
 
 
 
9679a35
 
8b90d88
5b13161
9679a35
 
 
1f27046
7388122
5b13161
8b90d88
5b13161
1f27046
 
ba9c6f3
8b90d88
1f27046
5b13161
 
 
ba9c6f3
7388122
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import gradio as gr
import gspread
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from oauth2client.service_account import ServiceAccountCredentials
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import Document

# =============== 1. Cache dan Inisialisasi Index Google Sheets ===============
cached_index = None
cached_data = {}

def read_google_sheets_separated():
    try:
        scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"]
        creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope)
        client = gspread.authorize(creds)

        SPREADSHEET_ID = "1ZLmz1onvPEX4TbgPJbR4LxVZjIluf6BpISTiGS5_5Rg"
        sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"]
        spreadsheet = client.open_by_key(SPREADSHEET_ID)

        data_map = {}

        for sheet_name in sheet_names:
            try:
                sheet = spreadsheet.worksheet(sheet_name)
                data = sheet.get_all_values()
                headers = data[0]
                rows = data[1:]
                entries = []

                if sheet_name == "datatarget":
                    for row in rows:
                        if len(row) >= 4:
                            try:
                                jumlah = int(row[3])
                                status = "KURANG" if jumlah < 0 else "LEBIH"
                                entries.append(
                                    f"[SELISIH] Mesin: {row[0]} | Kategori: {row[1]} | Bulan: {row[2]} | Selisih: {abs(jumlah)} pcs ({status})"
                                )
                            except ValueError:
                                # Tangani jika data tidak valid
                                entries.append(
                                    f"[WARNING] Data tidak valid: {' | '.join(row)}"
                                )

                elif sheet_name == "datacuti":
                    for row in rows:
                        if len(row) >= 3:
                            entries.append(f"{row[0]} memiliki sisa cuti {row[1]} hari pada tahun {row[2]}")

                elif sheet_name == "dataabsen":
                    for row in rows:
                        if len(row) >= 3:
                            entries.append(f"Kehadiran {row[0]} adalah {row[1]} hari pada bulan {row[2]}")

                elif sheet_name == "datalembur":
                    for row in rows:
                        if len(row) >= 3:
                            entries.append(f"{row[0]} telah lembur sebanyak {row[1]} jam pada bulan {row[2]}")

                elif sheet_name == "pkb":
                    for row in rows:
                        if len(row) >= 4:
                            bab, poin, kategori, isi = row[0], row[1], row[2], row[3]
                            entries.append(f"Bab {bab}, Poin {poin} - Kategori: {kategori}\nIsi: {isi}")

                data_map[sheet_name] = entries
            except gspread.exceptions.WorksheetNotFound:
                data_map[sheet_name] = [f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan."]

        return data_map
    except Exception as e:
        return {"error": str(e)}

def detect_intent(message):
    msg = message.lower()

    intent_keywords = {
        "pkb": ["ketentuan", "aturan", "kompensasi", "hak", "berlaku", "diperbolehkan", "pkb", "perusahaan", "pekerja", 
                "tenaga kerja asing", "jam kerja", "kerja lembur", "perjalanan dinas", "pengupahan", 
                "pemutusan hubungan kerja", "jaminan sosial", "kesejahteraan", "fasilitas kerja", 
                "alih tugas", "kewajiban", "disiplin kerja", "larangan", "sanksi", "mogok", 
                "pesangon", "penghargaan masa kerja", "uang pisah"],

        "cuti": ["cuti", "sisa cuti", "jumlah cuti", "berapa hari cuti", "libur"],

        "target": ["target", "aktual", "selisih", "produksi", "mesin", "pcs"],

        "lembur": ["lembur", "jam lembur", "berapa jam", "jam kerja tambahan"],

        "absensi": ["absensi", "hadir", "tidak hadir", "izin", "masuk", "alpha", "berapa hari masuk", "kehadiran"]
    }

    scores = {}
    for intent, keywords in intent_keywords.items():
        scores[intent] = sum(1 for k in keywords if k in msg)

    best_intent = max(scores, key=scores.get)
    
    # Jika tidak ada keyword yang cocok, fallback ke "all"
    return best_intent if scores[best_intent] > 0 else "all"

def initialize_index():
    global cached_index, cached_data
    cached_data = read_google_sheets_separated()
    all_text = sum(cached_data.values(), [])
    document = Document(text="\n".join(all_text))
    parser = SentenceSplitter(chunk_size=256, chunk_overlap=20)
    nodes = parser.get_nodes_from_documents([document])

    embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    Settings.embed_model = embedding
    cached_index = VectorStoreIndex(nodes)

def search_google_sheets_vector(query):
    if cached_data == {}:
        initialize_index()

    intent = detect_intent(query)

    if intent == "pkb":
        selected_data = cached_data.get("pkb", [])
    elif intent == "cuti":
        selected_data = cached_data.get("datacuti", [])
    elif intent == "target":
        selected_data = cached_data.get("datatarget", [])
    elif intent == "absensi":
        selected_data =cached_data.get("dataabsen", [])
    elif intent == "lembur":
        selected_data =cached_data.get("datalembur", [])
    else:
        selected_data = sum(cached_data.values(), [])

    document = Document(text="\n".join(selected_data))
    parser = SentenceSplitter(chunk_size=256, chunk_overlap=30)
    nodes = parser.get_nodes_from_documents([document])

    embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
    Settings.embed_model = embedding
    temp_index = VectorStoreIndex(nodes)

    retriever = temp_index.as_retriever(similarity_top_k=3)
    retriever.similarity_cutoff = 1.0
    retrieved_nodes = retriever.retrieve(query)

    results = [node.text for node in retrieved_nodes]
    return "\n".join(results) if results else "Maaf, saya tidak menemukan informasi yang relevan."

# =============== 2. Load Model Transformers ===============
def load_model():
    model_id = "NousResearch/Llama-2-7b-chat-hf"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="auto",
        torch_dtype=torch.float16
    )
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=512,
        temperature=0.7,
        repetition_penalty=1.2,
        do_sample=True,
    )
    return pipe

# =============== 3. Prompt Generator ===============
def generate_prompt(user_message, context_data):
    prompt = f"""
### SISTEM:
Anda adalah chatbot HRD yang membantu karyawan memahami administrasi perusahaan. 
Jangan menjawab menggunakan Bahasa Inggris. 
Gunakan Bahasa Indonesia dengan gaya profesional dan ramah. 
Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu.
Jawaban harus singkat, jelas, dan sesuai konteks. 
Jangan memberikan jawaban untuk pertanyaan yang tidak diajukan oleh pengguna. 
Jangan menyertakan rekomendasi pertanyaan lain.
### DATA:
{context_data}
### PERTANYAAN:
{user_message}
### JAWABAN:
"""
    return prompt.strip()

# =============== 4. Generate Response ===============
def should_use_history(message):
    keywords = ["jika", "tadi", "sebelumnya","kalau begitu", "gimana kalau", "lanjutkan", "terus", "bagaimana dengan", "berarti", "jadi", "oke lalu"]
    return any(kata in message.lower() for kata in keywords)

def generate_response(message, history, pipe):
    if should_use_history(message) and history:
        previous_message = history[-1][0]
        combined_message = previous_message + " " + message
    else:
        combined_message = message

    context = search_google_sheets_vector(combined_message)

    if "❌ ERROR" in context or context.strip() == "" or "tidak ditemukan" in context.lower():
        return "Maaf, saya tidak menemukan informasi yang relevan untuk pertanyaan tersebut."

    full_prompt = generate_prompt(message, context)
    response = pipe(full_prompt)[0]["generated_text"]
    cleaned = response.split("### JAWABAN:")[-1].strip()

    history = history or []
    history.append((message, cleaned))
    return cleaned

# =============== 5. Jalankan Gradio ===============
def main():
    pipe = load_model()
    initialize_index()

    def chatbot_fn(message, history):
        return generate_response(message, history, pipe)

    gr.ChatInterface(
        fn=chatbot_fn,
        title="Chatbot HRD - Transformers",
        theme="compact"
    ).launch(share=True)

if __name__ == "__main__":
    main()