File size: 5,205 Bytes
4fe7b26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import csv
import torch
import argparse
import numpy as np
import pandas as pd
import huggingface_hub
from transformers import pipeline
from metrics import compute_scores, save_scores

# Get the absolute path of the current script
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

str1 = "f_bgSZT"
str2 = "AFSBqvApwHjMQuTOALqZKRpRBzEUL"
token = "h"+str1+str2
huggingface_hub.login(token = token)

MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
    "text-generation",
    model=MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

EXPERIMENTS = ["zero-shot", "1-shot", "2-shot"]


def generate_summary(texts):
    messages = [{"role": "user", "content": text} for text in texts]
    outputs = pipe(messages, max_new_tokens=128)
    return outputs

def run_experiment(experiment_type, num_examples):
    print(f"Starting {experiment_type} Experiment with Llama-3.2-1B-Instruct")
    
    test = pd.read_csv(os.path.join(BASE_DIR, "datasets/test.csv"))
    test_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_fr.csv"))
    test_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_cross.csv"))
    
    test = test.sample(num_examples) if num_examples else test
    test_fr = test_fr.sample(num_examples) if num_examples else test_fr
    test_cross = test_cross.sample(num_examples) if num_examples else test_cross

    if experiment_type == "zero-shot":
        run_zero_shot(test, test_fr, test_cross)
    elif experiment_type == "1-shot":
        train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv"))
        train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv"))
        train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv"))
        run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots=1)
    elif experiment_type == "2-shot":
        train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv"))
        train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv"))
        train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv"))
        run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots=2)
    else:
        raise ValueError("Invalid experiment type.")


def run_zero_shot(test, test_fr, test_cross, batch_size=16):
    print("Running Zero-Shot Evaluation...")
    for dataset, name in [(test, "English"), (test_fr, "French"), (test_cross, "Cross-lingual")]:
        prefix = "Summarize in English: " if name == "Cross-lingual" else "Summarize the text: "
        texts = [f"{prefix}{row['source']}\n Summary: " for _, row in dataset.iterrows()]
        
        reference_summaries = dataset["target"].tolist()
        
        generated_summaries = []
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            batch_summaries = generate_summary(batch_texts)
            print(batch_summaries)
            batch_summaries = [output[0]["generated_text"] for output in batch_summaries]
            generated_summaries.extend(batch_summaries)

        print(generated_summaries)
        scores = compute_scores(generated_summaries, reference_summaries)
        save_scores(scores, "Llama-3.2-1B-Instruct", "zero-shot", name)
        print(f"{name} Scores:", scores)


def run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots, batch_size=16):
    print(f"Running {shots}-Shot Evaluation...")
    for dataset, train_data, name in [(test, train, "English"), (test_fr, train_fr, "French"), (test_cross, train_cross, "Cross-lingual")]:
        generated_summaries = []
        reference_summaries = []
        
        texts = []
        for _, sample in dataset.iterrows():
            shot_examples = train_data.sample(shots)
            shot_prompt = "\n\n".join([f"Summarize the text: {row['source']}\n Summary: {row['target']}" for _, row in shot_examples.iterrows()])
            prefix = "Summarize in English: " if name == "Cross-lingual" else "Summarize the text: "
            prompt = f"{shot_prompt}\n\n{prefix}{sample['source']}\n Summary: "
            texts.append(prompt)
            reference_summaries.append(sample["target"])
        
        for i in range(0, len(texts), batch_size):
            batch_texts = texts[i:i + batch_size]
            batch_summaries = generate_summary(batch_texts)
            batch_summaries = [output[0]["generated_text"] for output in batch_summaries]
            generated_summaries.extend(batch_summaries)

        scores = compute_scores(generated_summaries, reference_summaries)
        save_scores(scores, "Llama-3.2-1B-Instruct", f"{shots}-shot", name)
        print(f"{name} Scores:", scores)

def main():
    parser = argparse.ArgumentParser(description="Run experiments with Llama-3.2-1B-Instruct.")
    parser.add_argument("--experiment", type=str, required=True, choices=EXPERIMENTS, help="The experiment to run.")
    parser.add_argument("--num_examples", type=int, default=None, help="Number of examples to generate summaries on (optional).")
    args = parser.parse_args()
    
    run_experiment(args.experiment, args.num_examples)


if __name__ == "__main__":
    main()