Spaces:

yasserrmd
/

InterroGen

Running

App Files Files Community

InterroGen / populate_db.py

yasserrmd

Upload 5 files

4164d84 verified 3 months ago

raw

history blame contribute delete

6.65 kB

	from app import create_app, db
	from app.models import Country, Case, InterrogationSession, GeneratedQuestion, InterrogationResponse, Report, CaseStatus
	import random
	from datetime import datetime, timedelta

	app = create_app()

	# --- Helper function to generate random names ---
	def generate_random_name():
	first_names = ["Aaliyah", "Aarav", "Beatrix", "Bodhi", "Cassian", "Clara", "Declan", "Elara", "Felix", "Fiona", "Gideon", "Hazel", "Jasper", "Juniper", "Kai", "Luna", "Milo", "Nora", "Orion", "Ophelia", "Phoenix", "Quinn", "Rowan", "Seraphina", "Silas", "Stella", "Theodore", "Violet", "Xavier", "Zara"]
	last_names = ["Abe", "Chen", "Da Silva", "El-Sayed", "Fernandez", "Garcia", "Ivanov", "Jones", "Kim", "Kowalski", "Li", "Martinez", "Müller", "Nguyen", "Okafor", "Patel", "Popescu", "Rossi", "Santos", "Schmidt", "Singh", "Smith", "Tanaka", "Tremblay", "Van Der Berg", "Williams", "Wilson", "Yamamoto", "Zhang", "Zimmerman"]
	return f"{random.choice(first_names)} {random.choice(last_names)}"

	# --- Helper function to generate random text ---
	def generate_random_text(length=100):
	words = ["lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", "aliqua"]
	return " ".join(random.choice(words) for _ in range(length // 5)).capitalize() + "."

	# --- Function to populate the database ---
	def populate_database():
	with app.app_context():
	db.drop_all() # Clear existing data
	db.create_all() # Create tables

	# 1. Populate Countries
	countries_data = [
	# GCC
	{"name": "Saudi Arabia", "region": "GCC"},
	{"name": "UAE", "region": "GCC"},
	{"name": "Qatar", "region": "GCC"},
	{"name": "Bahrain", "region": "GCC"},
	{"name": "Oman", "region": "GCC"},
	{"name": "Kuwait", "region": "GCC"},
	# EU
	{"name": "Germany", "region": "EU"},
	{"name": "France", "region": "EU"},
	{"name": "Italy", "region": "EU"},
	{"name": "Spain", "region": "EU"},
	# African Countries
	{"name": "South Africa", "region": "Africa"},
	{"name": "Nigeria", "region": "Africa"},
	{"name": "Kenya", "region": "Africa"},
	# North America
	{"name": "USA", "region": "North America"},
	{"name": "Canada", "region": "North America"},
	# Indian Subcontinent
	{"name": "India", "region": "Indian Subcontinent"},
	{"name": "Pakistan", "region": "Indian Subcontinent"},
	{"name": "Bangladesh", "region": "Indian Subcontinent"},
	{"name": "Sri Lanka", "region": "Indian Subcontinent"},
	# East Asia
	{"name": "China", "region": "East Asia"},
	{"name": "Japan", "region": "East Asia"},
	]

	created_countries = {}
	for country_data in countries_data:
	country = Country(name=country_data["name"], region=country_data["region"])
	db.session.add(country)
	created_countries[country_data["name"]] = country
	db.session.commit()
	print("Countries populated.")

	# 2. Populate Sample Cases
	case_types = ["Fraud", "Theft", "Cybercrime", "Assault", "Homicide", "Drug Trafficking"]
	all_countries = Country.query.all()
	if not all_countries:
	print("No countries found, cannot create cases.")
	return

	cases = []
	for i in range(10): # Create 10 sample cases
	case = Case(
	case_id_display=f"C-2024{str(i+1).zfill(4)}",
	case_type=random.choice(case_types),
	suspect_name=generate_random_name(),
	profile_details=generate_random_text(150),
	evidence_summary=generate_random_text(200),
	status=random.choice(list(CaseStatus)),
	country_context=random.choice(all_countries) # Assign a random country for context
	)
	db.session.add(case)
	cases.append(case)
	db.session.commit()
	print(f"{len(cases)} Cases populated.")

	# 3. Populate Interrogation Sessions, Questions, Responses, and Reports for each case
	for case_obj in cases:
	# Create an interrogation session
	session = InterrogationSession(
	case_id=case_obj.id,
	session_date=datetime.utcnow() - timedelta(days=random.randint(1, 30)),
	summary_notes=generate_random_text(100)
	)
	db.session.add(session)
	db.session.commit() # Commit session to get its ID

	# Create generated questions for the session
	question_categories = ["Identity & Timeline", "Evidence Confrontation", "Alibi Verification", "Psychological Pressure"]
	for _ in range(random.randint(5, 12)): # 5 to 12 questions per session
	question = GeneratedQuestion(
	interrogation_session_id=session.id,
	question_text=generate_random_text(50) + "?",
	category=random.choice(question_categories)
	)
	db.session.add(question)
	db.session.commit() # Commit question to get its ID

	# Create a response for the question
	response_tags = ["evasiveness", "contradiction", "alibi failure", "confession hint", "cooperative"]
	response = InterrogationResponse(
	generated_question_id=question.id,
	response_text=generate_random_text(80),
	tags=",".join(random.sample(response_tags, random.randint(0,2)))
	)
	db.session.add(response)
	db.session.commit()

	# Create a report for the case
	report = Report(
	case_id=case_obj.id,
	llm_json_output='{"summary": "' + generate_random_text(100) + '", "findings": "' + generate_random_text(150) + '"}',
	report_content_summary=generate_random_text(250),
	recommendations=generate_random_text(120),
	report_country_context=case_obj.country_context # Use the same country as the case for the report
	)
	db.session.add(report)
	db.session.commit()
	print("Interrogation Sessions, Questions, Responses, and Reports populated.")

	print("Database populated successfully!")

	if __name__ == "__main__":
	populate_database()