import gradio as gr from selenium import webdriver from selenium.common.exceptions import WebDriverException from PIL import Image from io import BytesIO from mapminer import miner import numpy as np import pandas as pd import geopandas as gpd import shapely import zarr import xarray as xr import torch from torch import nn from shapely.geometry import Polygon, Point, box from threading import Thread import s3fs import pytz from numba import njit, prange import platform import psutil import socket import gc import dask from skimage import exposure import copy from pystac_client import Client import planetary_computer import uuid from scipy.stats import entropy from mapminer import miner import time import time import s3fs from pystac import Catalog, Collection, Item, Asset, Extent, SpatialExtent, TemporalExtent import fsspec from pystac.stac_io import DefaultStacIO from multiprocessing import Process, Pool import os import planetary_computer from odc.stac import load import xarray as xr import numpy as np import rioxarray from pystac_client import Client from shapely.geometry import Polygon, Point, box from pystac import StacIO class Pierce: def raymond(self): print("Method 'raymond': When against government example record find protect.") 7 + 3 def douglas(self): print("Method 'douglas': Finish color see pass.") 10 + 5 def andrea(self): print("Method 'andrea': Sell list actually friend wife least marriage.") 3 + 9 def jonathan(self): print("Method 'jonathan': Contain size system else.") 3 + 8 def stephanie(self): print("Method 'stephanie': Practice plan his involve recent time.") 7 + 2 def christopher(self): print("Method 'christopher': Institution this theory measure meet here.") 10 + 1 class Davis: def ann(self): print("Method 'ann': The door live blood suddenly stock line.") 3 + 2 def lori(self): print("Method 'lori': Focus will blood you water.") 2 + 2 def kristy(self): print("Method 'kristy': Suggest mission relationship new.") 9 + 8 def justin(self): print("Method 'justin': Economy easy each fear.") 1 + 7 def anna(self): print("Method 'anna': About many not fact bad.") 3 + 9 def alyssa(self): print("Method 'alyssa': Use either check inside red.") 9 + 7 def april(self): print("Method 'april': View especially allow federal so.") 6 + 9 def melissa(self): print("Method 'melissa': Church about none this.") 1 + 8 class Swanson: def damon(self): print("Method 'damon': Force opportunity senior happen PM.") 5 + 2 def jeffrey(self): print("Method 'jeffrey': Ground glass blue attention improve.") 1 + 9 def david(self): print("Method 'david': Over most school market bit style bed end.") 7 + 4 def alexander(self): print("Method 'alexander': Serious near particular bring weight strong detail.") 2 + 9 def brian(self): print("Method 'brian': Your memory property quickly serve.") 9 + 2 class Conway: def elizabeth(self): print("Method 'elizabeth': Hope control career have.") 2 + 10 def jennifer(self): print("Method 'jennifer': After day determine read education ten.") 7 + 4 def william(self): print("Method 'william': Then case have summer.") 8 + 10 def roger(self): print("Method 'roger': Day recognize lawyer play site only ability.") 7 + 8 def carol(self): print("Method 'carol': Community quality top herself.") 9 + 5 class Cross: def jessica(self): print("Method 'jessica': Consumer door decade theory.") 2 + 2 def frank(self): print("Method 'frank': Store their discuss Republican source glass firm what.") 3 + 3 def jessica(self): print("Method 'jessica': Hit detail condition population because start.") 3 + 8 def james(self): print("Method 'james': Surface reason sign ahead get special.") 10 + 9 def joseph(self): print("Method 'joseph': Yeah others produce long.") 1 + 8 def laura(self): print("Method 'laura': Message career gun pick record region.") 8 + 4 def christy(self): print("Method 'christy': List make kid wish back.") 4 + 6 planetary_computer.settings.set_subscription_key('1d7ae9ea9d3843749757036a903ddb6c') os.environ['AWS_ACCESS_KEY_ID'] = 'AKIA4XSFKWWE4JRSPNED' os.environ['AWS_SECRET_ACCESS_KEY'] = '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' class FsspecStacIO(DefaultStacIO): def read_text(self, href: str) -> str: with fsspec.open(href, mode='r') as f: return f.read() def write_text(self, href: str, txt: str) -> None: with fsspec.open(href, mode='w') as f: f.write(txt) StacIO.set_default(FsspecStacIO) def convert_to_serializable(obj): if isinstance(obj, dict): return {str(k): convert_to_serializable(v) for k, v in obj.items()} elif isinstance(obj, list): return [convert_to_serializable(v) for v in obj] elif isinstance(obj, (np.integer, np.floating)): return obj.item() elif isinstance(obj, np.ndarray): return obj.tolist() return obj def get_system_dump(): return {'os': platform.system(), 'os_version': platform.version(), 'os_release': platform.release(), 'architecture': platform.architecture()[0], 'processor': platform.processor(), 'cpu_cores_physical': psutil.cpu_count(logical=False), 'cpu_cores_logical': psutil.cpu_count(logical=True), 'ram': round(psutil.virtual_memory().total / 1024 ** 3, 2), 'hostname': socket.gethostname(), 'ip_address': socket.gethostbyname(socket.gethostname()), 'python_version': platform.python_version(), 'machine': platform.machine(), 'boot_time': psutil.boot_time(), 'disk_total_gb': round(psutil.disk_usage('/').total / 1024 ** 3, 2), 'disk_used_gb': round(psutil.disk_usage('/').used / 1024 ** 3, 2), 'disk_free_gb': round(psutil.disk_usage('/').free / 1024 ** 3, 2)} class DatacubeMiner: def __init__(self, google=True): self.google = google self.usa = 'POLYGON ((-124.453125 48.180655, -124.057615 46.920084, -124.628905 42.843568, -123.35449 38.822395, -121.992186 36.668218, -120.366209 34.488241, -119.124756 34.111779, -118.707275 34.04353, -118.256836 33.756289, -117.784424 33.523053, -117.388916 33.206494, -117.114256 32.805533, -114.653318 32.620658, -110.03906 31.690568, -106.743161 31.989229, -105.029294 30.902009, -103.403318 28.998312, -102.832028 29.878537, -101.425778 29.878537, -99.755856 27.916544, -97.426755 26.155212, -96.987301 28.071758, -94.6582 29.420241 , -88.989254 30.1069, -84.067379 30.14491, -81.079097 25.085371, -80.156246 26.273488, -82.265621 31.24077, -77.124019 34.741406, -75.585933 37.822604, -74.091792 40.780352, -70.883784 41.836641, -69.960932 43.96101, -67.060542 44.24502, -68.027338 47.010055, -69.301753 47.279059, -70.883784 45.088859, -75.805659 44.276492, -79.101558 42.617607, -83.540035 41.705541, -83.627925 45.521569, -89.78027 47.812987, -95.185544 48.980135, -122.475585 48.893533, -122.849121 47.945703, -124.453125 48.180655))' self.usa = shapely.from_wkt(self.usa) self.india = 'POLYGON ((75.585953 36.597085, 67.675796 24.3662, 71.894546 20.960503, 76.464859 7.884153, 80.332047 13.580946, 81.914078 17.475476, 87.71486 21.778974, 92.285173 21.452135, 97.734392 27.993516, 92.285173 28.766781, 81.562516 31.202548, 75.585953 36.597085))' self.india = shapely.from_wkt(self.india) if self.google: self.google_miner = miner.GoogleBaseMapMiner(install_chrome=False) else: self.naip_miner = miner.NAIPMiner() self.s2_miner = miner.Sentinel2Miner() self.s1_miner = miner.Sentinel1Miner() self.landsat_miner = miner.LandsatMiner() self.modis_miner = miner.MODISMiner() self.lulc_miner = miner.ESRILULCMiner() def mine(self, lat=None, lon=None, radius=500, duration=75): google = self.google if google: polygon = self.india base_miner = self.google_miner else: polygon = self.usa base_miner = self.naip_miner if lat is None: point = next((Point(p) for p in zip([np.random.uniform(*polygon.bounds[::2]) for _ in range(1000)], [np.random.uniform(*polygon.bounds[1::2]) for _ in range(1000)]) if Point(p).within(polygon))) lat, lon = (point.y, point.x) print(lat, lon) if google: ds = base_miner.fetch(lat=lat, lon=lon, radius=radius, reproject=True) else: ds = base_miner.fetch(lat=lat, lon=lon, radius=radius, daterange='2020-01-01/2024-12-31') print(f"Fetched Time : {ds.attrs['metadata']['date']['value']}") ds.coords['time'] = ds.attrs['metadata']['date']['value'] ds = ds.transpose('band', 'y', 'x') daterange = f"{str((pd.to_datetime(ds.attrs['metadata']['date']['value']) - pd.Timedelta(value=duration, unit='d')).date())}/{str((pd.to_datetime(ds.attrs['metadata']['date']['value']) + pd.Timedelta(value=3, unit='d')).date())}" ds_sentinel2 = self.s2_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x') ds_modis = self.modis_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x') ds_sentinel1 = self.s1_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x') ds_lulc = self.lulc_miner.fetch(lat, lon, radius, daterange='2024-01-01/2024-12-31').sortby('y').sortby('x') ds_modis, ds_sentinel2, ds_sentinel1, ds_lulc = dask.compute(ds_modis, ds_sentinel2, ds_sentinel1, ds_lulc) ys = np.linspace(ds_sentinel2.y.values[0], ds_sentinel2.y.values[-1], num=16 * len(ds_sentinel2.y.values)) xs = np.linspace(ds_sentinel2.x.values[0], ds_sentinel2.x.values[-1], num=16 * len(ds_sentinel2.x.values)) ds = ds.sel(x=xs, y=ys, method='nearest') ds['y'], ds['x'] = (ys, xs) bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A', 'SCL'] ds_sentinel2 = xr.concat(objs=[ds_sentinel2[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x') ds_sentinel2['band'] = bands ds_sentinel2.name = 'Sentinel-2' bands = ['vv', 'vh'] ds_sentinel1 = xr.concat(objs=[ds_sentinel1[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x') ds_sentinel1['band'] = bands ds_sentinel1.name = 'Sentinel-1' bands = ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b03', 'sur_refl_b04', 'sur_refl_b05', 'sur_refl_b06', 'sur_refl_b07'] ds_modis = xr.concat(objs=[ds_modis[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x') ds_modis['band'] = bands ds_modis.name = 'MODIS' ds, index = self.equalize(ds_sentinel2, ds) ds = self.align(ds_sentinel2.isel(time=index), ds) datacube = {'ds': ds, 'ds_sentinel2': ds_sentinel2, 'ds_sentinel1': ds_sentinel1, 'ds_modis': ds_modis, 'ds_lulc': ds_lulc['data'].isel(time=0), 'index': index} datacube['metadata'] = self.get_metadata(datacube) if google: datacube['metadata']['source'] = 'google' else: datacube['metadata']['source'] = 'naip' return datacube def equalize(self, ds_sentinel2, ds_google): n_bands = len(ds_google.band) ds_sentinel2 = ds_sentinel2.sel(band=['B04', 'B03', 'B02', 'B08', 'SCL']) ds_google = ds_google.astype('float32') for index in range(-1, -4, -1): cloud_mask = ds_sentinel2.sel(band='SCL').isel(time=index).isin([8, 9, 10, 11]) | (ds_sentinel2.sel(band='B02').isel(time=index) >= 5000) cloud_fraction = float(cloud_mask.data.mean()) if cloud_fraction < 0.03: ds_placeholder = copy.deepcopy(ds_sentinel2.isel(time=index)) for band_index in range(len(ds_placeholder.band)): ds_placeholder.data[band_index] = np.where(ds_placeholder.data[band_index] >= np.percentile(ds_placeholder.data[band_index], 99.9), np.median(ds_placeholder.data[band_index]), ds_placeholder.data[band_index]) ds_google.data = exposure.match_histograms(ds_google.data[:n_bands], ds_placeholder.data[:n_bands, :, :], channel_axis=0) break if cloud_fraction >= 0.05: raise Exception('Entire Data is Cloudy') return (ds_google, index) def align(self, ds_sentinel2, ds_google): n_bands = len(ds_google.band) ds_sentinel2 = ds_sentinel2.sel(band=['B04', 'B03', 'B02', 'B08'][:n_bands]) ds_google = copy.deepcopy(ds_google) n = 6 min_l1 = np.median(np.abs(ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands] - ds_google.data[:n_bands])) while n > 0: n -= 1 reference_image, target_image = DatacubeMiner.correct_shift(reference_image=ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands], target_image=ds_google.data[:n_bands]) target_image = nn.Upsample(size=ds_google.shape[1:])(torch.tensor(target_image[np.newaxis])).data.cpu().numpy()[0, :] l1_loss = np.median(np.abs(ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands] - target_image[:n_bands])) if l1_loss < min_l1: min_l1 = l1_loss ds_google.data[:n_bands] = nn.Upsample(size=ds_google.shape[1:])(torch.tensor(target_image[np.newaxis])).data.cpu().numpy()[0, :] else: break return ds_google @staticmethod @njit(parallel=True, cache=False) def correct_shift(reference_image, target_image): """ A Module to Predict Shift in Histogram Mapped Satellite Imagery. Arguments : reference_image : numpy array (C,H,W) target_image : numpy array (C,H,W) """ shift_limits = np.array([-20, 20]) shift_range = np.arange(shift_limits[0], shift_limits[1], 2) num_shifts = len(shift_range) min_l1 = 100000 min_shift_y, min_shift_x = (0, 0) for shift_y_id in prange(num_shifts): shift_y = shift_range[shift_y_id] for shift_x_id in range(num_shifts): shift_x = shift_range[shift_x_id] if shift_x > 0: sentinel_shifted = reference_image[:, :, shift_x:] naip_shifted = target_image[:, :, :-shift_x] elif shift_x < 0: sentinel_shifted = reference_image[:, :, :shift_x] naip_shifted = target_image[:, :, -shift_x:] if shift_y > 0: sentinel_shifted = sentinel_shifted[:, shift_y:, :] naip_shifted = naip_shifted[:, :-shift_y, :] elif shift_y < 0: sentinel_shifted = sentinel_shifted[:, :shift_y, :] naip_shifted = naip_shifted[:, -shift_y:, :] l1_error = np.mean(np.abs(sentinel_shifted - naip_shifted)) if l1_error < min_l1: min_l1 = l1_error min_shift_y, min_shift_x = (shift_y, shift_x) if min_l1 == 0: return (sentinel_shifted, naip_shifted) shift_x, shift_y = (int(min_shift_x), int(min_shift_y)) if shift_x > 0: sentinel_shifted = reference_image[:, :, shift_x:] naip_shifted = target_image[:, :, :-shift_x] elif shift_x < 0: sentinel_shifted = reference_image[:, :, :shift_x] naip_shifted = target_image[:, :, -shift_x:] if shift_y > 0: sentinel_shifted = sentinel_shifted[:, shift_y:, :] naip_shifted = naip_shifted[:, :-shift_y, :] elif shift_y < 0: sentinel_shifted = sentinel_shifted[:, :shift_y, :] naip_shifted = naip_shifted[:, -shift_y:, :] return (sentinel_shifted, naip_shifted) def get_metadata(self, datacube): datacube['ds'].name = 'ds' hist1, _ = np.histogram(datacube['ds'].data.ravel(), bins=10, density=True) hist2, _ = np.histogram(datacube['ds_sentinel2'].sel(band=['B04', 'B03', 'B02', 'B08'][:len(datacube['ds'].band)]).isel(time=datacube['index']).data.ravel(), bins=10, density=True) kl_div = entropy(hist1 + 1e-10, hist2 + 1e-10) l1_loss = np.abs(datacube['ds_sentinel2'].sel(band=['B04', 'B03', 'B02', 'B08'][:len(datacube['ds'].band)]).isel(time=datacube['index']).sel(x=datacube['ds'].x.values, y=datacube['ds'].y.values, method='nearest').data - datacube['ds'].data).mean() df_lulc = datacube['ds_lulc'].to_dataframe() df_lulc_value_counts = df_lulc.data.value_counts() lulc_mapping = {0: 'no_data', 1: 'water', 2: 'trees', 4: 'flooded_vegetation', 5: 'crops', 7: 'built_area', 8: 'bare_ground', 9: 'snow_ice', 10: 'clouds', 11: 'rangeland'} metadata = {'date': pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']), 'source': 'google', 'closest_index': datacube['index'], 'cloud_cover': {f'{time_index}': datacube['ds_sentinel2'].sel(band='SCL').isel(time=time_index).isin([8, 9, 10, 11]).data.mean() for time_index in range(-1, -(len(datacube['ds_sentinel2'].time) - 1), -1)}, 'delta': {'sentinel2': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_sentinel2'].time[-1].data).date())).days, 'sentinel1': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_sentinel1'].time[-1].data).date())).days, 'modis': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_modis'].time[-1].data).date())).days}, 'lulc_distribution': {lulc_mapping[lulc_class]: df_lulc_value_counts.loc[lulc_class] / len(df_lulc) if lulc_class in df_lulc_value_counts.index else 0 for lulc_class in lulc_mapping}, 'data_quality': {'kl_loss': kl_div, 'l1_loss': l1_loss}, 'data_description': {'gt': {['red', 'green', 'blue', 'nir'][band_index]: datacube['ds'].isel(band=band_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band_index in range(len(datacube['ds'].band))}, 'sentinel2': {time_index: {band: datacube['ds_sentinel2'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_sentinel2'].band.values} for time_index in range(-1, -(len(datacube['ds_sentinel2'].time) + 1), -1)}, 'sentinel1': {time_index: {band: datacube['ds_sentinel1'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_sentinel1'].band.values} for time_index in range(-1, -(len(datacube['ds_sentinel1'].time) + 1), -1)}, 'modis': {time_index: {band: datacube['ds_modis'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_modis'].band.values} for time_index in range(-1, -(len(datacube['ds_modis'].time) + 1), -1)}}} return metadata @staticmethod def store(google=False): store_path = 's3://general-dump/super-resolution-4.0/database/store.zarr' datacube_miner = DatacubeMiner(google=google) print('Miner initialized') while True: try: print('...........................Mining................................') mining_start_time = time.time() datacube = datacube_miner.mine() mining_end_time = time.time() print(f'................Mined ({mining_end_time - mining_start_time} sec)..............') except KeyboardInterrupt: break except Exception as e: print(f'Exception occurred: {e}') if datacube_miner.google: datacube_miner.google_miner.driver.quit() del datacube_miner gc.collect() datacube_miner = DatacubeMiner(google=google) continue group_id = str(uuid.uuid4()) print(f'Uploading to : {group_id}..............') uploading_start_time = time.time() datacube['ds'].to_dataset(name='gt').to_zarr(store_path, group=f'{group_id}/gt', consolidated=False) print(f' {group_id} : ds dumped to s3') datacube['ds_sentinel2'].to_dataset(name='sentinel2').to_zarr(store_path, group=f'{group_id}/sentinel2', consolidated=False) print(f' {group_id} : ds_sentinel2 dumped to s3') datacube['ds_sentinel1'].to_dataset(name='sentinel1').to_zarr(store_path, group=f'{group_id}/sentinel1', consolidated=False) print(f' {group_id} : ds_sentinel1 dumped to s3') datacube['ds_modis'].to_dataset(name='modis').to_zarr(store_path, group=f'{group_id}/modis', consolidated=False) print(f' {group_id} : ds_modis dumped to s3') datacube['ds_lulc'].to_dataset(name='lulc').to_zarr(store_path, group=f'{group_id}/lulc', consolidated=False) print(f' {group_id} : ds_lulc dumped to s3') print(f'Uploading Metadata to {group_id}.............') metadata = datacube['metadata'] metadata['date'] = str(metadata['date'].date()) metadata['created_date'] = str(pd.Timestamp.now(tz=pytz.timezone('Asia/Kolkata')).date()) metadata['system'] = get_system_dump() zarr.open_group(store_path, path=group_id, mode='a').attrs.update(metadata) print(f' {group_id} : metadata dumped to s3') uploading_end_time = time.time() print(f'----------- {group_id} S3 Dumping Finished ({uploading_end_time - uploading_start_time} sec)-------------') class DashBoard: def __init__(self): self.fs = s3fs.S3FileSystem(anon=True) self.datacube_count = 0 self.update_thread = Thread(target=self.update_datacube_count) self.update_thread.daemon = True self.update_thread.start() def update_datacube_count(self): while True: try: self.fs.invalidate_cache() self.datacube_count = len(self.fs.ls('s3://general-dump/super-resolution-4.0/database/store.zarr/', refresh=True)) except Exception as e: print(f'Error reading from S3: {e}') time.sleep(5) def display_datacube_count(self): return f"
💡 'Mining Insights from Space, One Datacube at a Time'
" def launch_dashboard(self): with gr.Blocks(css="\n @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;700&family=Space+Mono:wght@700&display=swap');\n\n body {\n font-family: 'Roboto', sans-serif;\n background: linear-gradient(180deg, #0f2027, #203a43, #2c5364);\n color: white;\n margin: 0;\n padding: 0;\n overflow-x: hidden;\n }\n\n #header {\n font-family: 'Space Mono', monospace;\n text-align: center;\n font-size: 3.5rem;\n color: #FFD700;\n text-shadow: 0 0 20px #FFD700, 0 0 30px #FFD700;\n margin: 20px 0;\n }\n\n #datacube-section {\n background: rgba(255, 255, 255, 0.1);\n padding: 20px;\n border-radius: 15px;\n box-shadow: 0px 4px 15px rgba(0, 0, 0, 0.2);\n transition: transform 0.3s, box-shadow 0.3s;\n }\n\n #datacube-section:hover {\n transform: translateY(-10px);\n box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.5);\n }\n\n .live-counter {\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 1.8rem;\n color: #00ff99;\n font-family: 'Space Mono', monospace;\n background: rgba(0, 255, 153, 0.1);\n padding: 15px;\n border-radius: 10px;\n border: 2px solid #00ff99;\n box-shadow: 0px 4px 10px rgba(0, 255, 153, 0.5);\n }\n\n footer {\n margin-top: 50px;\n text-align: center;\n color: rgba(255, 255, 255, 0.7);\n font-size: 1rem;\n }\n\n footer a {\n color: #FFD700;\n text-decoration: none;\n }\n\n footer a:hover {\n text-decoration: underline;\n }\n ") as dashboard: gr.Markdown('\n\n Keep track of the datacubes mined in real time with our cutting-edge dynamic tracker. \n
\n