from fastapi import FastAPI import uvicorn import pandas as pd import numpy as np import pickle import rasterio import h5py from skimage.morphology import disk from joblib import Parallel, delayed from tqdm import tqdm def read_chunk(h5_file_path, slice_value,r,c): with h5py.File(h5_file_path, 'r') as f: data=f['hail'][slice_value, row-radius_miles:row + radius_miles+1, col-radius_miles:col+radius_miles+1] date= f['date_time_hr'][slice_value] return data, date app = FastAPI() #Endpoints #Root endpoints @app.get("/") def root(): return {"API": "HRRR_GUST_hourly"} def get_hail_data(lat, lon, start_date, end_date, radius_miles, get_max): start_date = pd.Timestamp(str(start_date)).strftime('%Y%m%d') end_date = pd.Timestamp(str(end_date)).strftime('%Y%m%d') date_years = pd.date_range(start=start_date, end=end_date, freq='M') date_range_days = pd.date_range(start_date, end_date) years = list(set([d.year for d in date_years])) if len(years) == 0: years = [pd.Timestamp(start_date).year] # Convert Lat Lon to row & col on Array transform = pickle.load(open('Data/transform_mrms.pkl', 'rb')) row, col = rasterio.transform.rowcol(transform, lon, lat) files=[ # "Data/2024_yearly_partial.h5", "Data/mritchey/2020_yearly.h5", # "Data/mritchey/2023_yearly.h5", "Data/mritchey/2021_yearly.h5", "Data/mritchey/2022_yearly.h5"] n=500 total_rows=366*24 list_slices=[slice(i*n, i*n+n) for i in range(total_rows//n)] files_list_slices=[(f,i) for f in files for i in list_slices ] slice_value=list_slices[0] # Function to read a chunk of data %time results=Parallel(n_jobs=len(list_slices))(delayed(read_chunk)(f,i,r,c) for f,i in tqdm(files_list_slices)) data_all = np.vstack(all_data) dates_all = np.concatenate(all_dates) # Convert to Inches data_mat = np.where(data_all < 0, 0, data_all)*0.0393701 # Get Radius of Data disk_mask = np.where(disk(radius_miles) == 1, True, False) data_mat = np.where(disk_mask, data_mat, -1).round(3) # Process to DataFrame # Find Max of Data if get_max == True: data_max = np.max(data_mat, axis=(1, 2)) df_data = pd.DataFrame({'Date': dates_all, 'Hail_max': data_max}) # Get all Data else: data_all = list(data_mat) df_data = pd.DataFrame({'Date': dates_all, 'Hail_all': data_all}) df_data['Date'] = pd.to_datetime(df_data['Date'], format='%Y%m%d') df_data = df_data.set_index('Date') df_data = df_data.reindex(date_range_days, fill_value=0).reset_index().rename( columns={'index': 'Date'}) df_data['Date'] = df_data['Date'].dt.strftime('%Y-%m-%d') return df_data @app.get('/HRRR_GUST_hourly') async def predict(lat: float, lon: float, start_date: str, end_date: str, radius_miles: int, get_max: bool): try: results = get_hail_data(lat, lon, start_date, end_date, radius_miles, get_max) except: results = pd.DataFrame({'Date': ['error'], 'Hail_max': ['error']}) return results.to_json()