File size: 2,568 Bytes
8d0095f
 
 
 
 
 
 
 
 
 
 
 
 
3b301b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d0095f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pandas as pd

# Load the dataset
df = pd.read_csv("books.csv")

# Generate question-answer pairs
qa_pairs = []

for _, row in df.iterrows():
    title, author, year, summary = row["title"], row["author"], row["year"], row["summary"]

    # Add variations of questions and answers
    qa_pairs.extend([
        {"question": f"Who wrote '{title}'?", "answer": author},
        {"question": f"Can you tell me the author of {title}?", "answer": author},
        {"question": f"Who is the author of {title}?", "answer": author},
        {"question": f"Who is the writer of {title}?", "answer": author},
        {"question": f"Please tell me the author of '{title}'.", "answer": author},
        {"question": f"Who is the person behind '{title}'?", "answer": author}
    ])
    
    # Variants of questions about the publication year
    qa_pairs.extend([
        {"question": f"What year was {title} published?", "answer": year},
        {"question": f"When was {title} written?", "answer": year},
        {"question": f"In which year was {title} published?", "answer": year},
        {"question": f"Can you tell me when {title} was released?", "answer": year},
        {"question": f"Please tell me the year of {title}.", "answer": year},
        {"question": f"What is the publication year of {title}?", "answer": year}
    ])
    
    # Variants of questions about the summary
    qa_pairs.extend([
        {"question": f"What is '{title}' about?", "answer": summary},
        {"question": f"Can you summarize {title}?", "answer": summary},
        {"question": f"Tell me the plot of {title}.", "answer": summary},
        {"question": f"Give me a summary of {title}.", "answer": summary},
        {"question": f"What is the main idea of {title}?", "answer": summary},
        {"question": f"Explain what '{title}' is about.", "answer": summary}
    ])
    # Combined questions about author and year
    qa_pairs.extend([
        {"question": f"Who is the author and what is the year of '{title}'?", "answer": f"The author of {title} is {author} and it was published in {year}."},
        {"question": f"What is the year and summary of {title}?", "answer": f"{title} was published in {year}. {summary}"},
        {"question": f"Can you tell me the author and plot of {title}?", "answer": f"The author of {title} is {author}. The plot revolves around {summary}."}
    ])
# Save pairs to a CSV file
qa_df = pd.DataFrame(qa_pairs, columns=["question", "answer"])
qa_df.to_csv("qa_dataset.csv", index=False)
print(f"Generated {len(qa_pairs)} question-answer pairs.")