Spaces:
Sleeping
Sleeping
File size: 1,112 Bytes
2904d0e 7c398ad 2904d0e 7c398ad 2904d0e 7c398ad 2904d0e 7c398ad 2904d0e 7c398ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
labels = ["CV", "AI", "ML", "NE", "CL"]
id2label = {i: label for i, label in enumerate(labels)}
label2id = {label: i for i, label in enumerate(labels)}
category2human = {
"CV": "Computer Vision",
"AI": "Artificial Intelligence",
"ML": "Machine Learning",
"NE": "Neural and Evolutionary Computing",
"CL": "Computation and Language",
}
def load_arxiv_dataset():
import kagglehub
import os
from datasets import load_dataset
# Download latest version
path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")
dataset = load_dataset(
"csv",
data_files=os.path.join(path, "arxiv_data.csv"),
encoding="utf-8",
split="train",
)
# convert string to lists
import ast
def parse_terms(example):
example["terms"] = ast.literal_eval(example["terms"])
return example
dataset = dataset.map(parse_terms)
return dataset
def create_prompt(title, summary):
"""
Create a prompt for the model from the title and summary.
"""
return f"# title:\n{title}\n# abstract:\n{summary}"
|