ciwim19676 commited on
Commit
e0d4c21
Β·
verified Β·
1 Parent(s): b3c78f7

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +475 -0
app.py ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from selenium import webdriver
3
+ from selenium.common.exceptions import WebDriverException
4
+ from PIL import Image
5
+ from io import BytesIO
6
+ from mapminer import miner
7
+ import numpy as np
8
+ import pandas as pd
9
+ import geopandas as gpd
10
+ import shapely
11
+ import zarr
12
+ import xarray as xr
13
+ import torch
14
+ from torch import nn
15
+ from shapely.geometry import Polygon, Point, box
16
+ from threading import Thread
17
+ import s3fs
18
+ import pytz
19
+ from numba import njit, prange
20
+ import platform
21
+ import psutil
22
+ import socket
23
+ import gc
24
+ import dask
25
+ from skimage import exposure
26
+ import copy
27
+ from pystac_client import Client
28
+ import planetary_computer
29
+ import uuid
30
+ from scipy.stats import entropy
31
+ from mapminer import miner
32
+ import time
33
+ import time
34
+ import s3fs
35
+ from pystac import Catalog, Collection, Item, Asset, Extent, SpatialExtent, TemporalExtent
36
+ import fsspec
37
+ from pystac.stac_io import DefaultStacIO
38
+ from multiprocessing import Process, Pool
39
+ import os
40
+ import planetary_computer
41
+ from odc.stac import load
42
+ import xarray as xr
43
+ import numpy as np
44
+ import rioxarray
45
+ from pystac_client import Client
46
+ from shapely.geometry import Polygon, Point, box
47
+ from pystac import StacIO
48
+
49
+ class Pierce:
50
+
51
+ def raymond(self):
52
+ print("Method 'raymond': When against government example record find protect.")
53
+ 7 + 3
54
+
55
+ def douglas(self):
56
+ print("Method 'douglas': Finish color see pass.")
57
+ 10 + 5
58
+
59
+ def andrea(self):
60
+ print("Method 'andrea': Sell list actually friend wife least marriage.")
61
+ 3 + 9
62
+
63
+ def jonathan(self):
64
+ print("Method 'jonathan': Contain size system else.")
65
+ 3 + 8
66
+
67
+ def stephanie(self):
68
+ print("Method 'stephanie': Practice plan his involve recent time.")
69
+ 7 + 2
70
+
71
+ def christopher(self):
72
+ print("Method 'christopher': Institution this theory measure meet here.")
73
+ 10 + 1
74
+
75
+ class Davis:
76
+
77
+ def ann(self):
78
+ print("Method 'ann': The door live blood suddenly stock line.")
79
+ 3 + 2
80
+
81
+ def lori(self):
82
+ print("Method 'lori': Focus will blood you water.")
83
+ 2 + 2
84
+
85
+ def kristy(self):
86
+ print("Method 'kristy': Suggest mission relationship new.")
87
+ 9 + 8
88
+
89
+ def justin(self):
90
+ print("Method 'justin': Economy easy each fear.")
91
+ 1 + 7
92
+
93
+ def anna(self):
94
+ print("Method 'anna': About many not fact bad.")
95
+ 3 + 9
96
+
97
+ def alyssa(self):
98
+ print("Method 'alyssa': Use either check inside red.")
99
+ 9 + 7
100
+
101
+ def april(self):
102
+ print("Method 'april': View especially allow federal so.")
103
+ 6 + 9
104
+
105
+ def melissa(self):
106
+ print("Method 'melissa': Church about none this.")
107
+ 1 + 8
108
+
109
+ class Swanson:
110
+
111
+ def damon(self):
112
+ print("Method 'damon': Force opportunity senior happen PM.")
113
+ 5 + 2
114
+
115
+ def jeffrey(self):
116
+ print("Method 'jeffrey': Ground glass blue attention improve.")
117
+ 1 + 9
118
+
119
+ def david(self):
120
+ print("Method 'david': Over most school market bit style bed end.")
121
+ 7 + 4
122
+
123
+ def alexander(self):
124
+ print("Method 'alexander': Serious near particular bring weight strong detail.")
125
+ 2 + 9
126
+
127
+ def brian(self):
128
+ print("Method 'brian': Your memory property quickly serve.")
129
+ 9 + 2
130
+
131
+ class Conway:
132
+
133
+ def elizabeth(self):
134
+ print("Method 'elizabeth': Hope control career have.")
135
+ 2 + 10
136
+
137
+ def jennifer(self):
138
+ print("Method 'jennifer': After day determine read education ten.")
139
+ 7 + 4
140
+
141
+ def william(self):
142
+ print("Method 'william': Then case have summer.")
143
+ 8 + 10
144
+
145
+ def roger(self):
146
+ print("Method 'roger': Day recognize lawyer play site only ability.")
147
+ 7 + 8
148
+
149
+ def carol(self):
150
+ print("Method 'carol': Community quality top herself.")
151
+ 9 + 5
152
+
153
+ class Cross:
154
+
155
+ def jessica(self):
156
+ print("Method 'jessica': Consumer door decade theory.")
157
+ 2 + 2
158
+
159
+ def frank(self):
160
+ print("Method 'frank': Store their discuss Republican source glass firm what.")
161
+ 3 + 3
162
+
163
+ def jessica(self):
164
+ print("Method 'jessica': Hit detail condition population because start.")
165
+ 3 + 8
166
+
167
+ def james(self):
168
+ print("Method 'james': Surface reason sign ahead get special.")
169
+ 10 + 9
170
+
171
+ def joseph(self):
172
+ print("Method 'joseph': Yeah others produce long.")
173
+ 1 + 8
174
+
175
+ def laura(self):
176
+ print("Method 'laura': Message career gun pick record region.")
177
+ 8 + 4
178
+
179
+ def christy(self):
180
+ print("Method 'christy': List make kid wish back.")
181
+ 4 + 6
182
+ planetary_computer.settings.set_subscription_key('1d7ae9ea9d3843749757036a903ddb6c')
183
+ os.environ['AWS_ACCESS_KEY_ID'] = 'AKIA4XSFKWWE4JRSPNED'
184
+ os.environ['AWS_SECRET_ACCESS_KEY'] = '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'
185
+
186
+ class FsspecStacIO(DefaultStacIO):
187
+
188
+ def read_text(self, href: str) -> str:
189
+ with fsspec.open(href, mode='r') as f:
190
+ return f.read()
191
+
192
+ def write_text(self, href: str, txt: str) -> None:
193
+ with fsspec.open(href, mode='w') as f:
194
+ f.write(txt)
195
+ StacIO.set_default(FsspecStacIO)
196
+
197
+ def convert_to_serializable(obj):
198
+ if isinstance(obj, dict):
199
+ return {str(k): convert_to_serializable(v) for k, v in obj.items()}
200
+ elif isinstance(obj, list):
201
+ return [convert_to_serializable(v) for v in obj]
202
+ elif isinstance(obj, (np.integer, np.floating)):
203
+ return obj.item()
204
+ elif isinstance(obj, np.ndarray):
205
+ return obj.tolist()
206
+ return obj
207
+
208
+ def get_system_dump():
209
+ return {'os': platform.system(), 'os_version': platform.version(), 'os_release': platform.release(), 'architecture': platform.architecture()[0], 'processor': platform.processor(), 'cpu_cores_physical': psutil.cpu_count(logical=False), 'cpu_cores_logical': psutil.cpu_count(logical=True), 'ram': round(psutil.virtual_memory().total / 1024 ** 3, 2), 'hostname': socket.gethostname(), 'ip_address': socket.gethostbyname(socket.gethostname()), 'python_version': platform.python_version(), 'machine': platform.machine(), 'boot_time': psutil.boot_time(), 'disk_total_gb': round(psutil.disk_usage('/').total / 1024 ** 3, 2), 'disk_used_gb': round(psutil.disk_usage('/').used / 1024 ** 3, 2), 'disk_free_gb': round(psutil.disk_usage('/').free / 1024 ** 3, 2)}
210
+
211
+ class DatacubeMiner:
212
+
213
+ def __init__(self, google=True):
214
+ self.google = google
215
+ self.usa = 'POLYGON ((-124.453125 48.180655, -124.057615 46.920084, -124.628905 42.843568, -123.35449 38.822395, -121.992186 36.668218, -120.366209 34.488241, -119.124756 34.111779, -118.707275 34.04353, -118.256836 33.756289, -117.784424 33.523053, -117.388916 33.206494, -117.114256 32.805533, -114.653318 32.620658, -110.03906 31.690568, -106.743161 31.989229, -105.029294 30.902009, -103.403318 28.998312, -102.832028 29.878537, -101.425778 29.878537, -99.755856 27.916544, -97.426755 26.155212, -96.987301 28.071758, -94.6582 29.420241 , -88.989254 30.1069, -84.067379 30.14491, -81.079097 25.085371, -80.156246 26.273488, -82.265621 31.24077, -77.124019 34.741406, -75.585933 37.822604, -74.091792 40.780352, -70.883784 41.836641, -69.960932 43.96101, -67.060542 44.24502, -68.027338 47.010055, -69.301753 47.279059, -70.883784 45.088859, -75.805659 44.276492, -79.101558 42.617607, -83.540035 41.705541, -83.627925 45.521569, -89.78027 47.812987, -95.185544 48.980135, -122.475585 48.893533, -122.849121 47.945703, -124.453125 48.180655))'
216
+ self.usa = shapely.from_wkt(self.usa)
217
+ self.india = 'POLYGON ((75.585953 36.597085, 67.675796 24.3662, 71.894546 20.960503, 76.464859 7.884153, 80.332047 13.580946, 81.914078 17.475476, 87.71486 21.778974, 92.285173 21.452135, 97.734392 27.993516, 92.285173 28.766781, 81.562516 31.202548, 75.585953 36.597085))'
218
+ self.india = shapely.from_wkt(self.india)
219
+ if self.google:
220
+ self.google_miner = miner.GoogleBaseMapMiner(install_chrome=False)
221
+ else:
222
+ self.naip_miner = miner.NAIPMiner()
223
+ self.s2_miner = miner.Sentinel2Miner()
224
+ self.s1_miner = miner.Sentinel1Miner()
225
+ self.landsat_miner = miner.LandsatMiner()
226
+ self.modis_miner = miner.MODISMiner()
227
+ self.lulc_miner = miner.ESRILULCMiner()
228
+
229
+ def mine(self, lat=None, lon=None, radius=500, duration=75):
230
+ google = self.google
231
+ if google:
232
+ polygon = self.india
233
+ base_miner = self.google_miner
234
+ else:
235
+ polygon = self.usa
236
+ base_miner = self.naip_miner
237
+ if lat is None:
238
+ point = next((Point(p) for p in zip([np.random.uniform(*polygon.bounds[::2]) for _ in range(1000)], [np.random.uniform(*polygon.bounds[1::2]) for _ in range(1000)]) if Point(p).within(polygon)))
239
+ lat, lon = (point.y, point.x)
240
+ print(lat, lon)
241
+ if google:
242
+ ds = base_miner.fetch(lat=lat, lon=lon, radius=radius, reproject=True)
243
+ else:
244
+ ds = base_miner.fetch(lat=lat, lon=lon, radius=radius, daterange='2020-01-01/2024-12-31')
245
+ print(f"Fetched Time : {ds.attrs['metadata']['date']['value']}")
246
+ ds.coords['time'] = ds.attrs['metadata']['date']['value']
247
+ ds = ds.transpose('band', 'y', 'x')
248
+ daterange = f"{str((pd.to_datetime(ds.attrs['metadata']['date']['value']) - pd.Timedelta(value=duration, unit='d')).date())}/{str((pd.to_datetime(ds.attrs['metadata']['date']['value']) + pd.Timedelta(value=3, unit='d')).date())}"
249
+ ds_sentinel2 = self.s2_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x')
250
+ ds_modis = self.modis_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x')
251
+ ds_sentinel1 = self.s1_miner.fetch(lat, lon, radius, daterange=daterange).sortby('y').sortby('x')
252
+ ds_lulc = self.lulc_miner.fetch(lat, lon, radius, daterange='2024-01-01/2024-12-31').sortby('y').sortby('x')
253
+ ds_modis, ds_sentinel2, ds_sentinel1, ds_lulc = dask.compute(ds_modis, ds_sentinel2, ds_sentinel1, ds_lulc)
254
+ ys = np.linspace(ds_sentinel2.y.values[0], ds_sentinel2.y.values[-1], num=16 * len(ds_sentinel2.y.values))
255
+ xs = np.linspace(ds_sentinel2.x.values[0], ds_sentinel2.x.values[-1], num=16 * len(ds_sentinel2.x.values))
256
+ ds = ds.sel(x=xs, y=ys, method='nearest')
257
+ ds['y'], ds['x'] = (ys, xs)
258
+ bands = ['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B09', 'B11', 'B12', 'B8A', 'SCL']
259
+ ds_sentinel2 = xr.concat(objs=[ds_sentinel2[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x')
260
+ ds_sentinel2['band'] = bands
261
+ ds_sentinel2.name = 'Sentinel-2'
262
+ bands = ['vv', 'vh']
263
+ ds_sentinel1 = xr.concat(objs=[ds_sentinel1[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x')
264
+ ds_sentinel1['band'] = bands
265
+ ds_sentinel1.name = 'Sentinel-1'
266
+ bands = ['sur_refl_b01', 'sur_refl_b02', 'sur_refl_b03', 'sur_refl_b04', 'sur_refl_b05', 'sur_refl_b06', 'sur_refl_b07']
267
+ ds_modis = xr.concat(objs=[ds_modis[band] for band in bands], dim='band').transpose('time', 'band', 'y', 'x')
268
+ ds_modis['band'] = bands
269
+ ds_modis.name = 'MODIS'
270
+ ds, index = self.equalize(ds_sentinel2, ds)
271
+ ds = self.align(ds_sentinel2.isel(time=index), ds)
272
+ datacube = {'ds': ds, 'ds_sentinel2': ds_sentinel2, 'ds_sentinel1': ds_sentinel1, 'ds_modis': ds_modis, 'ds_lulc': ds_lulc['data'].isel(time=0), 'index': index}
273
+ datacube['metadata'] = self.get_metadata(datacube)
274
+ if google:
275
+ datacube['metadata']['source'] = 'google'
276
+ else:
277
+ datacube['metadata']['source'] = 'naip'
278
+ return datacube
279
+
280
+ def equalize(self, ds_sentinel2, ds_google):
281
+ n_bands = len(ds_google.band)
282
+ ds_sentinel2 = ds_sentinel2.sel(band=['B04', 'B03', 'B02', 'B08', 'SCL'])
283
+ ds_google = ds_google.astype('float32')
284
+ for index in range(-1, -4, -1):
285
+ cloud_mask = ds_sentinel2.sel(band='SCL').isel(time=index).isin([8, 9, 10, 11]) | (ds_sentinel2.sel(band='B02').isel(time=index) >= 5000)
286
+ cloud_fraction = float(cloud_mask.data.mean())
287
+ if cloud_fraction < 0.03:
288
+ ds_placeholder = copy.deepcopy(ds_sentinel2.isel(time=index))
289
+ for band_index in range(len(ds_placeholder.band)):
290
+ ds_placeholder.data[band_index] = np.where(ds_placeholder.data[band_index] >= np.percentile(ds_placeholder.data[band_index], 99.9), np.median(ds_placeholder.data[band_index]), ds_placeholder.data[band_index])
291
+ ds_google.data = exposure.match_histograms(ds_google.data[:n_bands], ds_placeholder.data[:n_bands, :, :], channel_axis=0)
292
+ break
293
+ if cloud_fraction >= 0.05:
294
+ raise Exception('Entire Data is Cloudy')
295
+ return (ds_google, index)
296
+
297
+ def align(self, ds_sentinel2, ds_google):
298
+ n_bands = len(ds_google.band)
299
+ ds_sentinel2 = ds_sentinel2.sel(band=['B04', 'B03', 'B02', 'B08'][:n_bands])
300
+ ds_google = copy.deepcopy(ds_google)
301
+ n = 6
302
+ min_l1 = np.median(np.abs(ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands] - ds_google.data[:n_bands]))
303
+ while n > 0:
304
+ n -= 1
305
+ reference_image, target_image = DatacubeMiner.correct_shift(reference_image=ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands], target_image=ds_google.data[:n_bands])
306
+ target_image = nn.Upsample(size=ds_google.shape[1:])(torch.tensor(target_image[np.newaxis])).data.cpu().numpy()[0, :]
307
+ l1_loss = np.median(np.abs(ds_sentinel2.sel(x=ds_google.x.values, y=ds_google.y.values, method='nearest').data[:n_bands] - target_image[:n_bands]))
308
+ if l1_loss < min_l1:
309
+ min_l1 = l1_loss
310
+ ds_google.data[:n_bands] = nn.Upsample(size=ds_google.shape[1:])(torch.tensor(target_image[np.newaxis])).data.cpu().numpy()[0, :]
311
+ else:
312
+ break
313
+ return ds_google
314
+
315
+ @staticmethod
316
+ @njit(parallel=True, cache=False)
317
+ def correct_shift(reference_image, target_image):
318
+ """
319
+ A Module to Predict Shift in Histogram Mapped Satellite Imagery.
320
+
321
+ Arguments :
322
+ reference_image : numpy array (C,H,W)
323
+ target_image : numpy array (C,H,W)
324
+ """
325
+ shift_limits = np.array([-20, 20])
326
+ shift_range = np.arange(shift_limits[0], shift_limits[1], 2)
327
+ num_shifts = len(shift_range)
328
+ min_l1 = 100000
329
+ min_shift_y, min_shift_x = (0, 0)
330
+ for shift_y_id in prange(num_shifts):
331
+ shift_y = shift_range[shift_y_id]
332
+ for shift_x_id in range(num_shifts):
333
+ shift_x = shift_range[shift_x_id]
334
+ if shift_x > 0:
335
+ sentinel_shifted = reference_image[:, :, shift_x:]
336
+ naip_shifted = target_image[:, :, :-shift_x]
337
+ elif shift_x < 0:
338
+ sentinel_shifted = reference_image[:, :, :shift_x]
339
+ naip_shifted = target_image[:, :, -shift_x:]
340
+ if shift_y > 0:
341
+ sentinel_shifted = sentinel_shifted[:, shift_y:, :]
342
+ naip_shifted = naip_shifted[:, :-shift_y, :]
343
+ elif shift_y < 0:
344
+ sentinel_shifted = sentinel_shifted[:, :shift_y, :]
345
+ naip_shifted = naip_shifted[:, -shift_y:, :]
346
+ l1_error = np.mean(np.abs(sentinel_shifted - naip_shifted))
347
+ if l1_error < min_l1:
348
+ min_l1 = l1_error
349
+ min_shift_y, min_shift_x = (shift_y, shift_x)
350
+ if min_l1 == 0:
351
+ return (sentinel_shifted, naip_shifted)
352
+ shift_x, shift_y = (int(min_shift_x), int(min_shift_y))
353
+ if shift_x > 0:
354
+ sentinel_shifted = reference_image[:, :, shift_x:]
355
+ naip_shifted = target_image[:, :, :-shift_x]
356
+ elif shift_x < 0:
357
+ sentinel_shifted = reference_image[:, :, :shift_x]
358
+ naip_shifted = target_image[:, :, -shift_x:]
359
+ if shift_y > 0:
360
+ sentinel_shifted = sentinel_shifted[:, shift_y:, :]
361
+ naip_shifted = naip_shifted[:, :-shift_y, :]
362
+ elif shift_y < 0:
363
+ sentinel_shifted = sentinel_shifted[:, :shift_y, :]
364
+ naip_shifted = naip_shifted[:, -shift_y:, :]
365
+ return (sentinel_shifted, naip_shifted)
366
+
367
+ def get_metadata(self, datacube):
368
+ datacube['ds'].name = 'ds'
369
+ hist1, _ = np.histogram(datacube['ds'].data.ravel(), bins=10, density=True)
370
+ hist2, _ = np.histogram(datacube['ds_sentinel2'].sel(band=['B04', 'B03', 'B02', 'B08'][:len(datacube['ds'].band)]).isel(time=datacube['index']).data.ravel(), bins=10, density=True)
371
+ kl_div = entropy(hist1 + 1e-10, hist2 + 1e-10)
372
+ l1_loss = np.abs(datacube['ds_sentinel2'].sel(band=['B04', 'B03', 'B02', 'B08'][:len(datacube['ds'].band)]).isel(time=datacube['index']).sel(x=datacube['ds'].x.values, y=datacube['ds'].y.values, method='nearest').data - datacube['ds'].data).mean()
373
+ df_lulc = datacube['ds_lulc'].to_dataframe()
374
+ df_lulc_value_counts = df_lulc.data.value_counts()
375
+ lulc_mapping = {0: 'no_data', 1: 'water', 2: 'trees', 4: 'flooded_vegetation', 5: 'crops', 7: 'built_area', 8: 'bare_ground', 9: 'snow_ice', 10: 'clouds', 11: 'rangeland'}
376
+ metadata = {'date': pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']), 'source': 'google', 'closest_index': datacube['index'], 'cloud_cover': {f'{time_index}': datacube['ds_sentinel2'].sel(band='SCL').isel(time=time_index).isin([8, 9, 10, 11]).data.mean() for time_index in range(-1, -(len(datacube['ds_sentinel2'].time) - 1), -1)}, 'delta': {'sentinel2': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_sentinel2'].time[-1].data).date())).days, 'sentinel1': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_sentinel1'].time[-1].data).date())).days, 'modis': (pd.to_datetime(datacube['ds'].attrs['metadata']['date']['value']) - pd.to_datetime(pd.to_datetime(datacube['ds_modis'].time[-1].data).date())).days}, 'lulc_distribution': {lulc_mapping[lulc_class]: df_lulc_value_counts.loc[lulc_class] / len(df_lulc) if lulc_class in df_lulc_value_counts.index else 0 for lulc_class in lulc_mapping}, 'data_quality': {'kl_loss': kl_div, 'l1_loss': l1_loss}, 'data_description': {'gt': {['red', 'green', 'blue', 'nir'][band_index]: datacube['ds'].isel(band=band_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band_index in range(len(datacube['ds'].band))}, 'sentinel2': {time_index: {band: datacube['ds_sentinel2'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_sentinel2'].band.values} for time_index in range(-1, -(len(datacube['ds_sentinel2'].time) + 1), -1)}, 'sentinel1': {time_index: {band: datacube['ds_sentinel1'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_sentinel1'].band.values} for time_index in range(-1, -(len(datacube['ds_sentinel1'].time) + 1), -1)}, 'modis': {time_index: {band: datacube['ds_modis'].sel(band=band).isel(time=time_index).to_dataframe().describe(include='all').iloc[4:, -1].to_dict() for band in datacube['ds_modis'].band.values} for time_index in range(-1, -(len(datacube['ds_modis'].time) + 1), -1)}}}
377
+ return metadata
378
+
379
+ @staticmethod
380
+ def store(google=False):
381
+ store_path = 's3://general-dump/super-resolution-4.0/database/store.zarr'
382
+ datacube_miner = DatacubeMiner(google=google)
383
+ print('Miner initialized')
384
+ while True:
385
+ try:
386
+ print('...........................Mining................................')
387
+ mining_start_time = time.time()
388
+ datacube = datacube_miner.mine()
389
+ mining_end_time = time.time()
390
+ print(f'................Mined ({mining_end_time - mining_start_time} sec)..............')
391
+ except KeyboardInterrupt:
392
+ break
393
+ except Exception as e:
394
+ print(f'Exception occurred: {e}')
395
+ if datacube_miner.google:
396
+ datacube_miner.google_miner.driver.quit()
397
+ del datacube_miner
398
+ gc.collect()
399
+ datacube_miner = DatacubeMiner(google=google)
400
+ continue
401
+ group_id = str(uuid.uuid4())
402
+ print(f'Uploading to : {group_id}..............')
403
+ uploading_start_time = time.time()
404
+ datacube['ds'].to_dataset(name='gt').to_zarr(store_path, group=f'{group_id}/gt', consolidated=False)
405
+ print(f' {group_id} : ds dumped to s3')
406
+ datacube['ds_sentinel2'].to_dataset(name='sentinel2').to_zarr(store_path, group=f'{group_id}/sentinel2', consolidated=False)
407
+ print(f' {group_id} : ds_sentinel2 dumped to s3')
408
+ datacube['ds_sentinel1'].to_dataset(name='sentinel1').to_zarr(store_path, group=f'{group_id}/sentinel1', consolidated=False)
409
+ print(f' {group_id} : ds_sentinel1 dumped to s3')
410
+ datacube['ds_modis'].to_dataset(name='modis').to_zarr(store_path, group=f'{group_id}/modis', consolidated=False)
411
+ print(f' {group_id} : ds_modis dumped to s3')
412
+ datacube['ds_lulc'].to_dataset(name='lulc').to_zarr(store_path, group=f'{group_id}/lulc', consolidated=False)
413
+ print(f' {group_id} : ds_lulc dumped to s3')
414
+ print(f'Uploading Metadata to {group_id}.............')
415
+ metadata = datacube['metadata']
416
+ metadata['date'] = str(metadata['date'].date())
417
+ metadata['created_date'] = str(pd.Timestamp.now(tz=pytz.timezone('Asia/Kolkata')).date())
418
+ metadata['system'] = get_system_dump()
419
+ zarr.open_group(store_path, path=group_id, mode='a').attrs.update(metadata)
420
+ print(f' {group_id} : metadata dumped to s3')
421
+ uploading_end_time = time.time()
422
+ print(f'----------- {group_id} S3 Dumping Finished ({uploading_end_time - uploading_start_time} sec)-------------')
423
+
424
+ class DashBoard:
425
+
426
+ def __init__(self):
427
+ self.fs = s3fs.S3FileSystem(anon=True)
428
+ self.datacube_count = 0
429
+ self.update_thread = Thread(target=self.update_datacube_count)
430
+ self.update_thread.daemon = True
431
+ self.update_thread.start()
432
+
433
+ def update_datacube_count(self):
434
+ while True:
435
+ try:
436
+ self.fs.invalidate_cache()
437
+ self.datacube_count = len(self.fs.ls('s3://general-dump/super-resolution-4.0/database/store.zarr/', refresh=True))
438
+ except Exception as e:
439
+ print(f'Error reading from S3: {e}')
440
+ time.sleep(5)
441
+
442
+ def display_datacube_count(self):
443
+ return f"<div style='font-size: 1.5rem; color: #ffffff;'>🌍 <b>Datacubes Mined:</b> {self.datacube_count}</div><p style='color: #FFD700; margin-top: 10px;'>πŸ’‘ 'Mining Insights from Space, One Datacube at a Time'</p>"
444
+
445
+ def launch_dashboard(self):
446
+ with gr.Blocks(css="\n @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;700&family=Space+Mono:wght@700&display=swap');\n\n body {\n font-family: 'Roboto', sans-serif;\n background: linear-gradient(180deg, #0f2027, #203a43, #2c5364);\n color: white;\n margin: 0;\n padding: 0;\n overflow-x: hidden;\n }\n\n #header {\n font-family: 'Space Mono', monospace;\n text-align: center;\n font-size: 3.5rem;\n color: #FFD700;\n text-shadow: 0 0 20px #FFD700, 0 0 30px #FFD700;\n margin: 20px 0;\n }\n\n #datacube-section {\n background: rgba(255, 255, 255, 0.1);\n padding: 20px;\n border-radius: 15px;\n box-shadow: 0px 4px 15px rgba(0, 0, 0, 0.2);\n transition: transform 0.3s, box-shadow 0.3s;\n }\n\n #datacube-section:hover {\n transform: translateY(-10px);\n box-shadow: 0px 10px 25px rgba(0, 0, 0, 0.5);\n }\n\n .live-counter {\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 1.8rem;\n color: #00ff99;\n font-family: 'Space Mono', monospace;\n background: rgba(0, 255, 153, 0.1);\n padding: 15px;\n border-radius: 10px;\n border: 2px solid #00ff99;\n box-shadow: 0px 4px 10px rgba(0, 255, 153, 0.5);\n }\n\n footer {\n margin-top: 50px;\n text-align: center;\n color: rgba(255, 255, 255, 0.7);\n font-size: 1rem;\n }\n\n footer a {\n color: #FFD700;\n text-decoration: none;\n }\n\n footer a:hover {\n text-decoration: underline;\n }\n ") as dashboard:
447
+ gr.Markdown('\n <div id="header">🌌 <b>Earth Scraper Dashboard</b></div>\n ', elem_id='header')
448
+ with gr.Row():
449
+ with gr.Column(scale=2):
450
+ gr.Markdown('\n <div id="datacube-section">\n <h2 style="text-align: center; color: #FFD700; font-family: \'Space Mono\';">Real-Time Mining Progress</h2>\n <p style="text-align: center; color: rgba(255,255,255,0.8); font-size: 1.2rem;">\n Keep track of the datacubes mined in real time with our cutting-edge dynamic tracker. \n </p>\n </div>\n ', elem_id='datacube-section')
451
+ with gr.Column(scale=1):
452
+ dynamic_display = gr.HTML(value=self.display_datacube_count(), label='Datacube Count', elem_classes='live-counter')
453
+ dashboard.load(self.display_datacube_count, [], dynamic_display)
454
+ gr.Markdown('\n <footer>\n πŸš€ Powered by <a href="https://huggingface.co/spaces" target="_blank">Hugging Face Spaces</a> | Built with πŸ’‘ by Gajesh Ladhar\n </footer>\n ')
455
+ dashboard.launch(share=True)
456
+
457
+ def mine_cubes():
458
+ while True:
459
+ try:
460
+ DatacubeMiner.store(google=True)
461
+ except Exception as e:
462
+ print(f'Exception occurred: {e}')
463
+ continue
464
+
465
+ def mine():
466
+ n_workers = 3
467
+ for work in range(n_workers):
468
+ if work == 0:
469
+ Thread(target=mine_cubes).start()
470
+ time.sleep(60 * 4)
471
+ Thread(target=mine_cubes).start()
472
+ mine_thread = Thread(target=mine)
473
+ mine_thread.start()
474
+ dashboard = DashBoard()
475
+ dashboard.launch_dashboard()