Spaces:
Sleeping
Sleeping
File size: 2,568 Bytes
8d0095f 3b301b8 8d0095f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
import pandas as pd
# Load the dataset
df = pd.read_csv("books.csv")
# Generate question-answer pairs
qa_pairs = []
for _, row in df.iterrows():
title, author, year, summary = row["title"], row["author"], row["year"], row["summary"]
# Add variations of questions and answers
qa_pairs.extend([
{"question": f"Who wrote '{title}'?", "answer": author},
{"question": f"Can you tell me the author of {title}?", "answer": author},
{"question": f"Who is the author of {title}?", "answer": author},
{"question": f"Who is the writer of {title}?", "answer": author},
{"question": f"Please tell me the author of '{title}'.", "answer": author},
{"question": f"Who is the person behind '{title}'?", "answer": author}
])
# Variants of questions about the publication year
qa_pairs.extend([
{"question": f"What year was {title} published?", "answer": year},
{"question": f"When was {title} written?", "answer": year},
{"question": f"In which year was {title} published?", "answer": year},
{"question": f"Can you tell me when {title} was released?", "answer": year},
{"question": f"Please tell me the year of {title}.", "answer": year},
{"question": f"What is the publication year of {title}?", "answer": year}
])
# Variants of questions about the summary
qa_pairs.extend([
{"question": f"What is '{title}' about?", "answer": summary},
{"question": f"Can you summarize {title}?", "answer": summary},
{"question": f"Tell me the plot of {title}.", "answer": summary},
{"question": f"Give me a summary of {title}.", "answer": summary},
{"question": f"What is the main idea of {title}?", "answer": summary},
{"question": f"Explain what '{title}' is about.", "answer": summary}
])
# Combined questions about author and year
qa_pairs.extend([
{"question": f"Who is the author and what is the year of '{title}'?", "answer": f"The author of {title} is {author} and it was published in {year}."},
{"question": f"What is the year and summary of {title}?", "answer": f"{title} was published in {year}. {summary}"},
{"question": f"Can you tell me the author and plot of {title}?", "answer": f"The author of {title} is {author}. The plot revolves around {summary}."}
])
# Save pairs to a CSV file
qa_df = pd.DataFrame(qa_pairs, columns=["question", "answer"])
qa_df.to_csv("qa_dataset.csv", index=False)
print(f"Generated {len(qa_pairs)} question-answer pairs.")
|