import os import csv import torch import argparse import numpy as np import pandas as pd import huggingface_hub from transformers import pipeline from metrics import compute_scores, save_scores # Get the absolute path of the current script BASE_DIR = os.path.dirname(os.path.abspath(__file__)) str1 = "f_bgSZT" str2 = "AFSBqvApwHjMQuTOALqZKRpRBzEUL" token = "h"+str1+str2 huggingface_hub.login(token = token) MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" pipe = pipeline( "text-generation", model=MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", ) EXPERIMENTS = ["zero-shot", "1-shot", "2-shot"] def generate_summary(texts): messages = [{"role": "user", "content": text} for text in texts] outputs = pipe(messages, max_new_tokens=128) return outputs def run_experiment(experiment_type, num_examples): print(f"Starting {experiment_type} Experiment with Llama-3.2-1B-Instruct") test = pd.read_csv(os.path.join(BASE_DIR, "datasets/test.csv")) test_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_fr.csv")) test_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/test_cross.csv")) test = test.sample(num_examples) if num_examples else test test_fr = test_fr.sample(num_examples) if num_examples else test_fr test_cross = test_cross.sample(num_examples) if num_examples else test_cross if experiment_type == "zero-shot": run_zero_shot(test, test_fr, test_cross) elif experiment_type == "1-shot": train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv")) train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv")) train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv")) run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots=1) elif experiment_type == "2-shot": train = pd.read_csv(os.path.join(BASE_DIR, "datasets/train.csv")) train_fr = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_fr.csv")) train_cross = pd.read_csv(os.path.join(BASE_DIR, "datasets/train_cross.csv")) run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots=2) else: raise ValueError("Invalid experiment type.") def run_zero_shot(test, test_fr, test_cross, batch_size=16): print("Running Zero-Shot Evaluation...") for dataset, name in [(test, "English"), (test_fr, "French"), (test_cross, "Cross-lingual")]: prefix = "Summarize in English: " if name == "Cross-lingual" else "Summarize the text: " texts = [f"{prefix}{row['source']}\n Summary: " for _, row in dataset.iterrows()] reference_summaries = dataset["target"].tolist() generated_summaries = [] for i in range(0, len(texts), batch_size): batch_texts = texts[i:i + batch_size] batch_summaries = generate_summary(batch_texts) print(batch_summaries) batch_summaries = [output[0]["generated_text"] for output in batch_summaries] generated_summaries.extend(batch_summaries) print(generated_summaries) scores = compute_scores(generated_summaries, reference_summaries) save_scores(scores, "Llama-3.2-1B-Instruct", "zero-shot", name) print(f"{name} Scores:", scores) def run_n_shot(test, test_fr, test_cross, train, train_fr, train_cross, shots, batch_size=16): print(f"Running {shots}-Shot Evaluation...") for dataset, train_data, name in [(test, train, "English"), (test_fr, train_fr, "French"), (test_cross, train_cross, "Cross-lingual")]: generated_summaries = [] reference_summaries = [] texts = [] for _, sample in dataset.iterrows(): shot_examples = train_data.sample(shots) shot_prompt = "\n\n".join([f"Summarize the text: {row['source']}\n Summary: {row['target']}" for _, row in shot_examples.iterrows()]) prefix = "Summarize in English: " if name == "Cross-lingual" else "Summarize the text: " prompt = f"{shot_prompt}\n\n{prefix}{sample['source']}\n Summary: " texts.append(prompt) reference_summaries.append(sample["target"]) for i in range(0, len(texts), batch_size): batch_texts = texts[i:i + batch_size] batch_summaries = generate_summary(batch_texts) batch_summaries = [output[0]["generated_text"] for output in batch_summaries] generated_summaries.extend(batch_summaries) scores = compute_scores(generated_summaries, reference_summaries) save_scores(scores, "Llama-3.2-1B-Instruct", f"{shots}-shot", name) print(f"{name} Scores:", scores) def main(): parser = argparse.ArgumentParser(description="Run experiments with Llama-3.2-1B-Instruct.") parser.add_argument("--experiment", type=str, required=True, choices=EXPERIMENTS, help="The experiment to run.") parser.add_argument("--num_examples", type=int, default=None, help="Number of examples to generate summaries on (optional).") args = parser.parse_args() run_experiment(args.experiment, args.num_examples) if __name__ == "__main__": main()