InterroGen / populate_db.py
yasserrmd's picture
Upload 5 files
4164d84 verified
from app import create_app, db
from app.models import Country, Case, InterrogationSession, GeneratedQuestion, InterrogationResponse, Report, CaseStatus
import random
from datetime import datetime, timedelta
app = create_app()
# --- Helper function to generate random names ---
def generate_random_name():
first_names = ["Aaliyah", "Aarav", "Beatrix", "Bodhi", "Cassian", "Clara", "Declan", "Elara", "Felix", "Fiona", "Gideon", "Hazel", "Jasper", "Juniper", "Kai", "Luna", "Milo", "Nora", "Orion", "Ophelia", "Phoenix", "Quinn", "Rowan", "Seraphina", "Silas", "Stella", "Theodore", "Violet", "Xavier", "Zara"]
last_names = ["Abe", "Chen", "Da Silva", "El-Sayed", "Fernandez", "Garcia", "Ivanov", "Jones", "Kim", "Kowalski", "Li", "Martinez", "Müller", "Nguyen", "Okafor", "Patel", "Popescu", "Rossi", "Santos", "Schmidt", "Singh", "Smith", "Tanaka", "Tremblay", "Van Der Berg", "Williams", "Wilson", "Yamamoto", "Zhang", "Zimmerman"]
return f"{random.choice(first_names)} {random.choice(last_names)}"
# --- Helper function to generate random text ---
def generate_random_text(length=100):
words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
return " ".join(random.choice(words) for _ in range(length // 5)).capitalize() + "."
# --- Function to populate the database ---
def populate_database():
with app.app_context():
db.drop_all() # Clear existing data
db.create_all() # Create tables
# 1. Populate Countries
countries_data = [
# GCC
{"name": "Saudi Arabia", "region": "GCC"},
{"name": "UAE", "region": "GCC"},
{"name": "Qatar", "region": "GCC"},
{"name": "Bahrain", "region": "GCC"},
{"name": "Oman", "region": "GCC"},
{"name": "Kuwait", "region": "GCC"},
# EU
{"name": "Germany", "region": "EU"},
{"name": "France", "region": "EU"},
{"name": "Italy", "region": "EU"},
{"name": "Spain", "region": "EU"},
# African Countries
{"name": "South Africa", "region": "Africa"},
{"name": "Nigeria", "region": "Africa"},
{"name": "Kenya", "region": "Africa"},
# North America
{"name": "USA", "region": "North America"},
{"name": "Canada", "region": "North America"},
# Indian Subcontinent
{"name": "India", "region": "Indian Subcontinent"},
{"name": "Pakistan", "region": "Indian Subcontinent"},
{"name": "Bangladesh", "region": "Indian Subcontinent"},
{"name": "Sri Lanka", "region": "Indian Subcontinent"},
# East Asia
{"name": "China", "region": "East Asia"},
{"name": "Japan", "region": "East Asia"},
]
created_countries = {}
for country_data in countries_data:
country = Country(name=country_data["name"], region=country_data["region"])
db.session.add(country)
created_countries[country_data["name"]] = country
db.session.commit()
print("Countries populated.")
# 2. Populate Sample Cases
case_types = ["Fraud", "Theft", "Cybercrime", "Assault", "Homicide", "Drug Trafficking"]
all_countries = Country.query.all()
if not all_countries:
print("No countries found, cannot create cases.")
return
cases = []
for i in range(10): # Create 10 sample cases
case = Case(
case_id_display=f"C-2024{str(i+1).zfill(4)}",
case_type=random.choice(case_types),
suspect_name=generate_random_name(),
profile_details=generate_random_text(150),
evidence_summary=generate_random_text(200),
status=random.choice(list(CaseStatus)),
country_context=random.choice(all_countries) # Assign a random country for context
)
db.session.add(case)
cases.append(case)
db.session.commit()
print(f"{len(cases)} Cases populated.")
# 3. Populate Interrogation Sessions, Questions, Responses, and Reports for each case
for case_obj in cases:
# Create an interrogation session
session = InterrogationSession(
case_id=case_obj.id,
session_date=datetime.utcnow() - timedelta(days=random.randint(1, 30)),
summary_notes=generate_random_text(100)
)
db.session.add(session)
db.session.commit() # Commit session to get its ID
# Create generated questions for the session
question_categories = ["Identity & Timeline", "Evidence Confrontation", "Alibi Verification", "Psychological Pressure"]
for _ in range(random.randint(5, 12)): # 5 to 12 questions per session
question = GeneratedQuestion(
interrogation_session_id=session.id,
question_text=generate_random_text(50) + "?",
category=random.choice(question_categories)
)
db.session.add(question)
db.session.commit() # Commit question to get its ID
# Create a response for the question
response_tags = ["evasiveness", "contradiction", "alibi failure", "confession hint", "cooperative"]
response = InterrogationResponse(
generated_question_id=question.id,
response_text=generate_random_text(80),
tags=",".join(random.sample(response_tags, random.randint(0,2)))
)
db.session.add(response)
db.session.commit()
# Create a report for the case
report = Report(
case_id=case_obj.id,
llm_json_output='{"summary": "' + generate_random_text(100) + '", "findings": "' + generate_random_text(150) + '"}',
report_content_summary=generate_random_text(250),
recommendations=generate_random_text(120),
report_country_context=case_obj.country_context # Use the same country as the case for the report
)
db.session.add(report)
db.session.commit()
print("Interrogation Sessions, Questions, Responses, and Reports populated.")
print("Database populated successfully!")
if __name__ == "__main__":
populate_database()