Spaces:
Running
Running
"""initial schema + full dynamic country seed (with json schemas & validation) | |
Revision ID: 0001_initial_schema_and_seed | |
Revises: | |
Create Date: 2025-08-01 20:00:00.000000 | |
""" | |
from alembic import op | |
import sqlalchemy as sa | |
from sqlalchemy.dialects import postgresql | |
import pycountry | |
revision = 'b8fc40bfe3c7' | |
down_revision = None | |
branch_labels = None | |
depends_on = None | |
def _guess_region(alpha2: str) -> str: | |
AFR = {'DZ','AO','BJ','BW','BF','BI','CM','CV','CF','TD','KM','CG','CD','CI','DJ','EG', | |
'GQ','ER','SZ','ET','GA','GM','GH','GN','GW','KE','LS','LR','LY','MG','MW','ML', | |
'MR','MU','YT','MA','MZ','NA','NE','NG','RE','RW','SH','ST','SN','SC','SL','SO', | |
'ZA','SS','SD','TZ','TG','TN','UG','EH','ZM','ZW'} | |
AMR = {'US','CA','MX','BR','AR','CO','PE','VE','CL','EC','GT','CU','BO','DO','HT','HN', | |
'PY','NI','SV','CR','PA','UY','JM','TT','GY','SR','BZ','KY','AG','BS','BB','BM', | |
'DM','GD','GP','MQ','MS','PR','KN','LC','VC','SX','TC','VI'} | |
EUR = {'AL','AD','AT','BY','BE','BA','BG','HR','CY','CZ','DK','EE','FI','FR','DE','GI', | |
'GR','HU','IS','IE','IT','XK','LV','LI','LT','LU','MT','MD','MC','ME','NL','MK', | |
'NO','PL','PT','RO','RU','SM','RS','SK','SI','ES','SE','CH','TR','UA','GB','VA'} | |
MENA = {'DZ','BH','EG','IR','IQ','IL','JO','KW','LB','LY','MA','OM','QA','SA','SY','TN', | |
'AE','YE','PS','SD','EH'} | |
if alpha2 in MENA: | |
return 'MENA' | |
if alpha2 in (AFR - MENA): | |
return 'AFR' | |
if alpha2 in AMR: | |
return 'AMR' | |
if alpha2 in EUR: | |
return 'EUR' | |
return 'APA' | |
def upgrade(): | |
op.execute('CREATE EXTENSION IF NOT EXISTS pgcrypto;') | |
op.execute("DROP TABLE IF EXISTS captions CASCADE;") | |
op.execute("DROP TABLE IF EXISTS image_countries CASCADE;") | |
op.execute("DROP TABLE IF EXISTS images CASCADE;") | |
op.execute("DROP TABLE IF EXISTS json_schemas CASCADE;") | |
op.execute("DROP TABLE IF EXISTS models CASCADE;") | |
op.execute("DROP TABLE IF EXISTS image_types CASCADE;") | |
op.execute("DROP TABLE IF EXISTS spatial_references CASCADE;") | |
op.execute("DROP TABLE IF EXISTS countries CASCADE;") | |
op.execute("DROP TABLE IF EXISTS event_types CASCADE;") | |
op.execute("DROP TABLE IF EXISTS regions CASCADE;") | |
op.execute("DROP TABLE IF EXISTS sources CASCADE;") | |
op.create_table( | |
'sources', | |
sa.Column('s_code', sa.String(), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
) | |
op.create_table( | |
'regions', | |
sa.Column('r_code', sa.String(), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
) | |
op.create_table( | |
'event_types', | |
sa.Column('t_code', sa.String(), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
) | |
op.create_table( | |
'countries', | |
sa.Column('c_code', sa.CHAR(length=2), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
sa.Column('r_code', sa.String(), sa.ForeignKey('regions.r_code'), nullable=False), | |
) | |
op.create_table( | |
'spatial_references', | |
sa.Column('epsg', sa.String(), primary_key=True), | |
sa.Column('srid', sa.String(), nullable=False), | |
sa.Column('proj4', sa.String(), nullable=False), | |
sa.Column('wkt', sa.String(), nullable=False), | |
) | |
op.create_table( | |
'image_types', | |
sa.Column('image_type', sa.String(), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
) | |
op.create_table( | |
'prompts', | |
sa.Column('p_code', sa.String(), primary_key=True), | |
sa.Column('label', sa.Text(), nullable=False), | |
sa.Column('metadata_instructions', sa.Text(), nullable=True), | |
) | |
op.create_table( | |
'models', | |
sa.Column('m_code', sa.String(), primary_key=True), | |
sa.Column('label', sa.String(), nullable=False), | |
sa.Column('model_type', sa.String(), nullable=False), | |
sa.Column('is_available', sa.Boolean(), server_default=sa.text('true')), | |
sa.Column('config', postgresql.JSONB(astext_type=sa.Text()), nullable=True), | |
) | |
op.create_table( | |
'json_schemas', | |
sa.Column('schema_id', sa.String(), primary_key=True), | |
sa.Column('title', sa.String(), nullable=False), | |
sa.Column('schema', postgresql.JSONB(astext_type=sa.Text()), nullable=False), | |
sa.Column('version', sa.String(), nullable=False), | |
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()'), nullable=False), | |
) | |
op.execute(""" | |
INSERT INTO sources (s_code,label) VALUES | |
('PDC','PDC'), | |
('GDACS','GDACS'), | |
('WFP','WFP ADAM'), | |
('GFH','Google Flood Hub'), | |
('GGC','Google GenCast'), | |
('USGS','USGS'), | |
('OTHER','Other') | |
""") | |
op.execute(""" | |
INSERT INTO regions (r_code,label) VALUES | |
('AFR','Africa'), | |
('AMR','Americas'), | |
('APA','Asia-Pacific'), | |
('EUR','Europe'), | |
('MENA','Middle East & N Africa'), | |
('OTHER','Other') | |
""") | |
op.execute(""" | |
INSERT INTO event_types (t_code,label) VALUES | |
('BIOLOGICAL_EMERGENCY','Biological Emergency'), | |
('CHEMICAL_EMERGENCY','Chemical Emergency'), | |
('CIVIL_UNREST','Civil Unrest'), | |
('COLD_WAVE','Cold Wave'), | |
('COMPLEX_EMERGENCY','Complex Emergency'), | |
('CYCLONE','Cyclone'), | |
('DROUGHT','Drought'), | |
('EARTHQUAKE','Earthquake'), | |
('EPIDEMIC','Epidemic'), | |
('FIRE','Fire'), | |
('FLOOD','Flood'), | |
('FLOOD_INSECURITY','Flood Insecurity'), | |
('HEAT_WAVE','Heat Wave'), | |
('INSECT_INFESTATION','Insect Infestation'), | |
('LANDSLIDE','Landslide'), | |
('OTHER','Other'), | |
('PLUVIAL','Pluvial'), | |
('POPULATION_MOVEMENT','Population Movement'), | |
('RADIOLOGICAL_EMERGENCY','Radiological Emergency'), | |
('STORM','Storm'), | |
('TRANSPORTATION_EMERGENCY','Transportation Emergency'), | |
('TSUNAMI','Tsunami'), | |
('VOLCANIC_ERUPTION','Volcanic Eruption') | |
""") | |
op.execute(""" | |
INSERT INTO spatial_references (epsg, srid, proj4, wkt) VALUES | |
('4326','4326', | |
'+proj=longlat +datum=WGS84 +no_defs', | |
'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]' | |
), | |
('3857','3857', | |
'+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs', | |
'PROJCS["WGS 84 / Pseudo-Mercator",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]]' | |
), | |
('32633','32633', | |
'+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs', | |
'PROJCS["WGS 84 / UTM zone 33N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]' | |
), | |
('32634','32634', | |
'+proj=utm +zone=34 +datum=WGS84 +units=m +no_defs', | |
'PROJCS["WGS 84 / UTM zone 34N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]' | |
), | |
('32617','32617', | |
'+proj=utm +zone=17 +datum=WGS84 +units=m +no_defs', | |
'PROJCS["WGS 84 / UTM zone 17N",GEOGCS["WGS 84",DATUM["WGS 84",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-81],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1]]' | |
), | |
('OTHER','OTHER','','OTHER') | |
""") | |
op.execute(""" | |
INSERT INTO image_types (image_type,label) VALUES | |
('crisis_map','Crisis Map'), | |
('drone_image','Drone Image') | |
""") | |
op.execute(""" | |
INSERT INTO prompts (p_code,label,metadata_instructions) VALUES | |
('DEFAULT_CRISIS_MAP','Analyze this crisis map and provide a detailed description of the emergency situation, affected areas, and key information shown in the map.','Additionally, extract the following metadata in JSON format. Choose exactly ONE option from each category: | |
- title: Create a concise title (less than 10 words) for the crisis/event | |
- source: Choose ONE from: PDC, GDACS, WFP, GFH, GGC, USGS, OTHER | |
- type: Choose ONE from: BIOLOGICAL_EMERGENCY, CHEMICAL_EMERGENCY, CIVIL_UNREST, COLD_WAVE, COMPLEX_EMERGENCY, CYCLONE, DROUGHT, EARTHQUAKE, EPIDEMIC, FIRE, FLOOD, FLOOD_INSECURITY, HEAT_WAVE, INSECT_INFESTATION, LANDSLIDE, OTHER, PLUVIAL, POPULATION_MOVEMENT, RADIOLOGICAL_EMERGENCY, STORM, TRANSPORTATION_EMERGENCY, TSUNAMI, VOLCANIC_ERUPTION | |
- countries: List of affected country codes (ISO 2-letter codes like PA, US, etc.) | |
- epsg: Choose ONE from: 4326, 3857, 32617, 32633, 32634, OTHER. If the map shows a different EPSG code, use "OTHER" | |
If you cannot find a match, use "OTHER". Return ONLY the JSON object (no markdown formatting) in this exact format: | |
{ | |
"analysis": "detailed description...", | |
"metadata": { | |
"title": "...", | |
"source": "...", | |
"type": "...", | |
"countries": ["..."], | |
"epsg": "..." | |
} | |
}') | |
""") | |
op.execute(""" | |
INSERT INTO models (m_code,label,model_type,is_available,config) VALUES | |
('GPT-4O','GPT-4O','gpt4o',true,'{"provider":"openai","model":"gpt-4o"}'), | |
('GEMINI15','Gemini 1.5','gemini_pro_vision',true,'{}'), | |
('CLAUDE3','Claude 3','claude_3_5_sonnet',false,'{}'), | |
('STUB_MODEL','Stub Model','custom',true,'{"stub": true}'), | |
('LLAVA_1_5_7B','LLaVA 1.5 7B','custom',true,'{"provider":"huggingface","model_id":"llava-hf/llava-1.5-7b-hf"}'), | |
('BLIP2_OPT_2_7B','BLIP Image Captioning','custom',true,'{"provider":"huggingface","model_id":"Salesforce/blip-image-captioning-base"}'), | |
('VIT_GPT2','Vit gpt2 image captioning','custom',true,'{"provider":"huggingface","model_id":"nlpconnect/vit-gpt2-image-captioning"}') | |
""") | |
op.execute(""" | |
INSERT INTO json_schemas (schema_id,title,schema,version) VALUES | |
('default_caption@1.0.0','Default Caption Schema', | |
'{"type":"object","properties":{"analysis":{"type":"string"},"metadata":{"type":"object","properties":{"title":{"type":"string"},"source":{"type":"string"},"type":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}},"epsg":{"type":"string"}}}},"required":["analysis","metadata"]}', | |
'1.0.0') | |
""") | |
for c in pycountry.countries: | |
code = c.alpha_2 | |
name = c.name.replace("'", "''") | |
region = _guess_region(code) | |
op.execute( | |
f"INSERT INTO countries (c_code,label,r_code) VALUES ('{code}','{name}','{region}')" | |
) | |
op.execute("INSERT INTO countries (c_code,label,r_code) VALUES ('XX','Not Applicable','OTHER')") | |
op.create_table( | |
'images', | |
sa.Column('image_id', postgresql.UUID(as_uuid=True), | |
server_default=sa.text('gen_random_uuid()'), | |
primary_key=True), | |
sa.Column('file_key', sa.String(), nullable=False), | |
sa.Column('sha256', sa.String(), nullable=False), | |
sa.Column('source', sa.String(), sa.ForeignKey('sources.s_code'), nullable=False), | |
sa.Column('event_type', sa.String(), sa.ForeignKey('event_types.t_code'), nullable=False), | |
sa.Column('epsg', sa.String(), sa.ForeignKey('spatial_references.epsg'), nullable=False), | |
sa.Column('image_type', sa.String(), sa.ForeignKey('image_types.image_type'), nullable=False), | |
sa.Column('created_at', sa.TIMESTAMP(timezone=True), server_default=sa.text('NOW()'), nullable=False), | |
sa.Column('captured_at', sa.TIMESTAMP(timezone=True), nullable=True), | |
sa.Column('title', sa.String(), nullable=True), | |
sa.Column('prompt', sa.String(), sa.ForeignKey('prompts.p_code'), nullable=True), | |
sa.Column('model', sa.String(), sa.ForeignKey('models.m_code'), nullable=True), | |
sa.Column('schema_id', sa.String(), sa.ForeignKey('json_schemas.schema_id'), nullable=True), | |
sa.Column('raw_json', postgresql.JSONB(astext_type=sa.Text()), nullable=True), | |
sa.Column('generated', sa.Text(), nullable=True), | |
sa.Column('edited', sa.Text(), nullable=True), | |
sa.Column('accuracy', sa.SmallInteger()), | |
sa.Column('context', sa.SmallInteger()), | |
sa.Column('usability', sa.SmallInteger()), | |
sa.Column('starred', sa.Boolean(), server_default=sa.text('false')), | |
sa.Column('updated_at', sa.TIMESTAMP(timezone=True), nullable=True), | |
sa.CheckConstraint('accuracy IS NULL OR (accuracy BETWEEN 0 AND 100)', name='chk_images_accuracy'), | |
sa.CheckConstraint('context IS NULL OR (context BETWEEN 0 AND 100)', name='chk_images_context'), | |
sa.CheckConstraint('usability IS NULL OR (usability BETWEEN 0 AND 100)', name='chk_images_usability') | |
) | |
op.create_table( | |
'image_countries', | |
sa.Column('image_id', postgresql.UUID(as_uuid=True), nullable=False), | |
sa.Column('c_code', sa.CHAR(length=2), nullable=False), | |
sa.PrimaryKeyConstraint('image_id', 'c_code', name='pk_image_countries'), | |
sa.ForeignKeyConstraint(['image_id'], ['images.image_id'], ondelete='CASCADE'), | |
sa.ForeignKeyConstraint(['c_code'], ['countries.c_code']) | |
) | |
op.create_index('ix_images_created_at', 'images', ['created_at']) | |
def downgrade(): | |
op.drop_index('ix_images_created_at', table_name='images') | |
op.drop_table('image_countries') | |
op.drop_table('images') | |
op.drop_table('json_schemas') | |
op.drop_table('models') | |
op.drop_table('image_types') | |
op.drop_table('spatial_references') | |
op.drop_table('countries') | |
op.drop_table('event_types') | |
op.drop_table('regions') | |
op.drop_table('sources') | |