Promptaid-VIsion / py_backend /alembic /versions /b8fc40bfe3c7_initial_schema_seed.py
SCGR's picture
admin login
ba5edb0
"""initial schema + full dynamic country seed (with json schemas & validation)
Revision ID: 0001_initial_schema_and_seed
Revises:
Create Date: 2025-08-01 20:00:00.000000
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
import pycountry
revision = 'b8fc40bfe3c7'
down_revision = None
branch_labels = None
depends_on = None
def _guess_region(alpha2: str) -> str:
AFR = {'DZ','AO','BJ','BW','BF','BI','CM','CV','CF','TD','KM','CG','CD','CI','DJ','EG',
'GQ','ER','SZ','ET','GA','GM','GH','GN','GW','KE','LS','LR','LY','MG','MW','ML',
'MR','MU','YT','MA','MZ','NA','NE','NG','RE','RW','SH','ST','SN','SC','SL','SO',
'ZA','SS','SD','TZ','TG','TN','UG','EH','ZM','ZW'}
AMR = {'US','CA','MX','BR','AR','CO','PE','VE','CL','EC','GT','CU','BO','DO','HT','HN',
'PY','NI','SV','CR','PA','UY','JM','TT','GY','SR','BZ','KY','AG','BS','BB','BM',
'DM','GD','GP','MQ','MS','PR','KN','LC','VC','SX','TC','VI'}
EUR = {'AL','AD','AT','BY','BE','BA','BG','HR','CY','CZ','DK','EE','FI','FR','DE','GI',
'GR','HU','IS','IE','IT','XK','LV','LI','LT','LU','MT','MD','MC','ME','NL','MK',
'NO','PL','PT','RO','RU','SM','RS','SK','SI','ES','SE','CH','TR','UA','GB','VA'}
MENA = {'DZ','BH','EG','IR','IQ','IL','JO','KW','LB','LY','MA','OM','QA','SA','SY','TN',
'AE','YE','PS','SD','EH'}
if alpha2 in MENA:
return 'MENA'
if alpha2 in (AFR - MENA):
return 'AFR'
if alpha2 in AMR:
return 'AMR'
if alpha2 in EUR:
return 'EUR'
return 'APA'
def upgrade():
op.execute('CREATE EXTENSION IF NOT EXISTS pgcrypto;')
op.execute("DROP TABLE IF EXISTS captions CASCADE;")
op.execute("DROP TABLE IF EXISTS image_countries CASCADE;")
op.execute("DROP TABLE IF EXISTS images CASCADE;")
op.execute("DROP TABLE IF EXISTS json_schemas CASCADE;")
op.execute("DROP TABLE IF EXISTS models CASCADE;")
op.execute("DROP TABLE IF EXISTS image_types CASCADE;")
op.execute("DROP TABLE IF EXISTS spatial_references CASCADE;")
op.execute("DROP TABLE IF EXISTS countries CASCADE;")
op.execute("DROP TABLE IF EXISTS event_types CASCADE;")
op.execute("DROP TABLE IF EXISTS regions CASCADE;")
op.execute("DROP TABLE IF EXISTS sources CASCADE;")
op.create_table(
'sources',
sa.Column('s_code', sa.String(), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
)
op.create_table(
'regions',
sa.Column('r_code', sa.String(), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
)
op.create_table(
'event_types',
sa.Column('t_code', sa.String(), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
)
op.create_table(
'countries',
sa.Column('c_code', sa.CHAR(length=2), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
sa.Column('r_code', sa.String(), sa.ForeignKey('regions.r_code'), nullable=False),
)
op.create_table(
'spatial_references',
sa.Column('epsg', sa.String(), primary_key=True),
sa.Column('srid', sa.String(), nullable=False),
sa.Column('proj4', sa.String(), nullable=False),
sa.Column('wkt', sa.String(), nullable=False),
)
op.create_table(
'image_types',
sa.Column('image_type', sa.String(), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
)
op.create_table(
'prompts',
sa.Column('p_code', sa.String(), primary_key=True),
sa.Column('label', sa.Text(), nullable=False),
sa.Column('metadata_instructions', sa.Text(), nullable=True),
)
op.create_table(
'models',
sa.Column('m_code', sa.String(), primary_key=True),
sa.Column('label', sa.String(), nullable=False),
sa.Column('model_type', sa.String(), nullable=False),
sa.Column('is_available', sa.Boolean(), server_default=sa.text('true')),
sa.Column('config', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
)
op.create_table(
'json_schemas',
sa.Column('schema_id', sa.String(), primary_key=True),
sa.Column('title', sa.String(), nullable=False),
sa.Column('schema', postgresql.JSONB(astext_type=sa.Text()), nullable=False),
sa.Column('version', sa.String(), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()'), nullable=False),
)
op.execute("""
INSERT INTO sources (s_code,label) VALUES
('PDC','PDC'),
('GDACS','GDACS'),
('WFP','WFP ADAM'),
('GFH','Google Flood Hub'),
('GGC','Google GenCast'),
('USGS','USGS'),
('OTHER','Other')
""")
op.execute("""
INSERT INTO regions (r_code,label) VALUES
('AFR','Africa'),
('AMR','Americas'),
('APA','Asia-Pacific'),
('EUR','Europe'),
('MENA','Middle East & N Africa'),
('OTHER','Other')
""")
op.execute("""
INSERT INTO event_types (t_code,label) VALUES
('BIOLOGICAL_EMERGENCY','Biological Emergency'),
('CHEMICAL_EMERGENCY','Chemical Emergency'),
('CIVIL_UNREST','Civil Unrest'),
('COLD_WAVE','Cold Wave'),
('COMPLEX_EMERGENCY','Complex Emergency'),
('CYCLONE','Cyclone'),
('DROUGHT','Drought'),
('EARTHQUAKE','Earthquake'),
('EPIDEMIC','Epidemic'),
('FIRE','Fire'),
('FLOOD','Flood'),
('FLOOD_INSECURITY','Flood Insecurity'),
('HEAT_WAVE','Heat Wave'),
('INSECT_INFESTATION','Insect Infestation'),
('LANDSLIDE','Landslide'),
('OTHER','Other'),
('PLUVIAL','Pluvial'),
('POPULATION_MOVEMENT','Population Movement'),
('RADIOLOGICAL_EMERGENCY','Radiological Emergency'),
('STORM','Storm'),
('TRANSPORTATION_EMERGENCY','Transportation Emergency'),
('TSUNAMI','Tsunami'),
('VOLCANIC_ERUPTION','Volcanic Eruption')
""")
op.execute("""
INSERT INTO spatial_references (epsg, srid, proj4, wkt) VALUES
('4326','4326',
'+proj=longlat +datum=WGS84 +no_defs',
'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
),
('3857','3857',
'+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs',
'PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]]'
),
('32633','32633',
'+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs',
'PROJCS["WGS 84 / UTM zone 33N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
),
('32634','32634',
'+proj=utm +zone=34 +datum=WGS84 +units=m +no_defs',
'PROJCS["WGS 84 / UTM zone 34N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
),
('32617','32617',
'+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs',
'PROJCS["WGS 84 / UTM zone 17N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-81],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1]]'
),
('OTHER','OTHER','','OTHER')
""")
op.execute("""
INSERT INTO image_types (image_type,label) VALUES
('crisis_map','Crisis Map'),
('drone_image','Drone Image')
""")
op.execute("""
INSERT INTO prompts (p_code,label,metadata_instructions) VALUES
('DEFAULT_CRISIS_MAP','Analyze this crisis map and provide a detailed description of the emergency situation, affected areas, and key information shown in the map.','Additionally, extract the following metadata in JSON format. Choose exactly ONE option from each category:
- title: Create a concise title (less than 10 words) for the crisis/event
- source: Choose ONE from: PDC, GDACS, WFP, GFH, GGC, USGS, OTHER
- type: Choose ONE from: BIOLOGICAL_EMERGENCY, CHEMICAL_EMERGENCY, CIVIL_UNREST, COLD_WAVE, COMPLEX_EMERGENCY, CYCLONE, DROUGHT, EARTHQUAKE, EPIDEMIC, FIRE, FLOOD, FLOOD_INSECURITY, HEAT_WAVE, INSECT_INFESTATION, LANDSLIDE, OTHER, PLUVIAL, POPULATION_MOVEMENT, RADIOLOGICAL_EMERGENCY, STORM, TRANSPORTATION_EMERGENCY, TSUNAMI, VOLCANIC_ERUPTION
- countries: List of affected country codes (ISO 2-letter codes like PA, US, etc.)
- epsg: Choose ONE from: 4326, 3857, 32617, 32633, 32634, OTHER. If the map shows a different EPSG code, use "OTHER"
If you cannot find a match, use "OTHER". Return ONLY the JSON object (no markdown formatting) in this exact format:
{
"analysis": "detailed description...",
"metadata": {
"title": "...",
"source": "...",
"type": "...",
"countries": ["..."],
"epsg": "..."
}
}')
""")
op.execute("""
INSERT INTO models (m_code,label,model_type,is_available,config) VALUES
('GPT-4O','GPT-4O','gpt4o',true,'{"provider":"openai","model":"gpt-4o"}'),
('GEMINI15','Gemini 1.5','gemini_pro_vision',true,'{}'),
('CLAUDE3','Claude 3','claude_3_5_sonnet',false,'{}'),
('STUB_MODEL','Stub Model','custom',true,'{"stub": true}'),
('LLAVA_1_5_7B','LLaVA 1.5 7B','custom',true,'{"provider":"huggingface","model_id":"llava-hf/llava-1.5-7b-hf"}'),
('BLIP2_OPT_2_7B','BLIP Image Captioning','custom',true,'{"provider":"huggingface","model_id":"Salesforce/blip-image-captioning-base"}'),
('VIT_GPT2','Vit gpt2 image captioning','custom',true,'{"provider":"huggingface","model_id":"nlpconnect/vit-gpt2-image-captioning"}')
""")
op.execute("""
INSERT INTO json_schemas (schema_id,title,schema,version) VALUES
('default_caption@1.0.0','Default Caption Schema',
'{"type":"object","properties":{"analysis":{"type":"string"},"metadata":{"type":"object","properties":{"title":{"type":"string"},"source":{"type":"string"},"type":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}},"epsg":{"type":"string"}}}},"required":["analysis","metadata"]}',
'1.0.0')
""")
for c in pycountry.countries:
code = c.alpha_2
name = c.name.replace("'", "''")
region = _guess_region(code)
op.execute(
f"INSERT INTO countries (c_code,label,r_code) VALUES ('{code}','{name}','{region}')"
)
op.execute("INSERT INTO countries (c_code,label,r_code) VALUES ('XX','Not Applicable','OTHER')")
op.create_table(
'images',
sa.Column('image_id', postgresql.UUID(as_uuid=True),
server_default=sa.text('gen_random_uuid()'),
primary_key=True),
sa.Column('file_key', sa.String(), nullable=False),
sa.Column('sha256', sa.String(), nullable=False),
sa.Column('source', sa.String(), sa.ForeignKey('sources.s_code'), nullable=False),
sa.Column('event_type', sa.String(), sa.ForeignKey('event_types.t_code'), nullable=False),
sa.Column('epsg', sa.String(), sa.ForeignKey('spatial_references.epsg'), nullable=False),
sa.Column('image_type', sa.String(), sa.ForeignKey('image_types.image_type'), nullable=False),
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()'), nullable=False),
sa.Column('captured_at', sa.TIMESTAMP(timezone=True), nullable=True),
sa.Column('title', sa.String(), nullable=True),
sa.Column('prompt', sa.String(), sa.ForeignKey('prompts.p_code'), nullable=True),
sa.Column('model', sa.String(), sa.ForeignKey('models.m_code'), nullable=True),
sa.Column('schema_id', sa.String(), sa.ForeignKey('json_schemas.schema_id'), nullable=True),
sa.Column('raw_json', postgresql.JSONB(astext_type=sa.Text()), nullable=True),
sa.Column('generated', sa.Text(), nullable=True),
sa.Column('edited', sa.Text(), nullable=True),
sa.Column('accuracy', sa.SmallInteger()),
sa.Column('context', sa.SmallInteger()),
sa.Column('usability', sa.SmallInteger()),
sa.Column('starred', sa.Boolean(), server_default=sa.text('false')),
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), nullable=True),
sa.CheckConstraint('accuracy IS NULL OR (accuracy BETWEEN 0 AND 100)', name='chk_images_accuracy'),
sa.CheckConstraint('context IS NULL OR (context BETWEEN 0 AND 100)', name='chk_images_context'),
sa.CheckConstraint('usability IS NULL OR (usability BETWEEN 0 AND 100)', name='chk_images_usability')
)
op.create_table(
'image_countries',
sa.Column('image_id', postgresql.UUID(as_uuid=True), nullable=False),
sa.Column('c_code', sa.CHAR(length=2), nullable=False),
sa.PrimaryKeyConstraint('image_id', 'c_code', name='pk_image_countries'),
sa.ForeignKeyConstraint(['image_id'], ['images.image_id'], ondelete='CASCADE'),
sa.ForeignKeyConstraint(['c_code'], ['countries.c_code'])
)
op.create_index('ix_images_created_at', 'images', ['created_at'])
def downgrade():
op.drop_index('ix_images_created_at', table_name='images')
op.drop_table('image_countries')
op.drop_table('images')
op.drop_table('json_schemas')
op.drop_table('models')
op.drop_table('image_types')
op.drop_table('spatial_references')
op.drop_table('countries')
op.drop_table('event_types')
op.drop_table('regions')
op.drop_table('sources')