adamantix commited on
Commit
aa99e27
·
verified ·
1 Parent(s): e03910e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -36
app.py CHANGED
@@ -11,21 +11,23 @@ import re
11
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
 
14
  TEXT_MODEL_NAME = "indobenchmark/indobert-large-p1"
15
  tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
16
  text_model = AutoModel.from_pretrained(TEXT_MODEL_NAME).to(device)
17
  text_model.eval()
18
 
19
- clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
20
- "EVA01-g-14-plus",
21
- pretrained="merged2b_s11b_b114k"
22
- )
23
  clip_model.to(device)
24
  clip_model.eval()
25
 
26
  with open("xgb_full.pkl", "rb") as f:
27
  xgb_model = pickle.load(f)
28
 
 
 
 
 
29
  def preprocess_text(text: str) -> str:
30
  text = str(text).lower()
31
  text = re.sub(r'http\S+|www\.\S+', '', text)
@@ -34,13 +36,43 @@ def preprocess_text(text: str) -> str:
34
  text = re.sub(r'\s+', ' ', text).strip()
35
  return " ".join(text.split())
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  app = FastAPI(
38
  title="Multimodal Water Pollution Risk API",
39
  description=(
40
  "Input: text + image + geospatial + time\n"
41
- "Model: IndoBERT + EVA-CLIP (HF Hub) + XGBoost (xgb.pkl)\n"
42
  ),
43
- version="1.0.0",
44
  )
45
 
46
  app.add_middleware(
@@ -63,57 +95,69 @@ async def predict(
63
  text: str = Form(...),
64
  longitude: float = Form(...),
65
  latitude: float = Form(...),
66
- location_cluster: int = Form(...),
67
  hour: int = Form(...),
68
  dayofweek: int = Form(...),
69
  month: int = Form(...),
70
  image: UploadFile = File(...),
71
  ):
72
- # 1. preprocess text
73
- cleaned_text = preprocess_text(text)
74
 
75
- # 2. encode text (ambil CLS token-nya)
76
- text_inputs = tokenizer(
77
- cleaned_text,
78
- return_tensors="pt",
79
- padding="max_length",
80
- truncation=True,
81
- max_length=128,
82
- )
83
- text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
84
- with torch.no_grad():
85
- text_emb = text_model(**text_inputs).last_hidden_state[:, 0, :] # take the CLS token only
86
- text_emb = text_emb.cpu().numpy()
87
 
88
- # 3. encode image (EVA-CLIP image embedding)
89
  img_bytes = await image.read()
90
- pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
91
- img_tensor = clip_preprocess(pil_img).unsqueeze(0).to(device)
92
 
93
- with torch.no_grad():
94
- img_emb = clip_model.encode_image(img_tensor)
95
- img_emb = img_emb.cpu().numpy()
96
 
97
- # 4. additional numeric features (longitude, latitude, location_cluster, hour, dayofweek, month)
98
- add_feats = np.array(
99
- [[longitude, latitude, location_cluster, hour, dayofweek, month]],
100
- dtype=np.float32,
101
- )
102
 
103
- # 5. concatenate (early fusion): [image_emb, text_emb, add_feats]
104
  fused = np.concatenate([img_emb, text_emb, add_feats], axis=1)
105
 
106
- # 6. predict
107
  proba = xgb_model.predict_proba(fused)[0]
108
  pred_idx = int(np.argmax(proba))
 
109
  label = "KRITIS" if pred_idx == 1 else "WASPADA"
110
 
111
  return {
112
  "prediction": label,
 
113
  "probabilities": {
114
  "WASPADA": float(proba[0]),
115
- "KRITIS": float(proba[1]),
116
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  }
118
 
119
  if __name__ == "__main__":
 
11
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
+ # step 1: load the models
15
  TEXT_MODEL_NAME = "indobenchmark/indobert-large-p1"
16
  tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
17
  text_model = AutoModel.from_pretrained(TEXT_MODEL_NAME).to(device)
18
  text_model.eval()
19
 
20
+ clip_model, _, clip_preprocess = open_clip.create_model_and_transforms("EVA01-g-14-plus", pretrained="merged2b_s11b_b114k")
 
 
 
21
  clip_model.to(device)
22
  clip_model.eval()
23
 
24
  with open("xgb_full.pkl", "rb") as f:
25
  xgb_model = pickle.load(f)
26
 
27
+ with open("k-means.pkl", "rb") as f:
28
+ kmeans = pickle.load(f)
29
+
30
+ # step 2: preprocessing
31
  def preprocess_text(text: str) -> str:
32
  text = str(text).lower()
33
  text = re.sub(r'http\S+|www\.\S+', '', text)
 
36
  text = re.sub(r'\s+', ' ', text).strip()
37
  return " ".join(text.split())
38
 
39
+ # step 3: feature encoding (text and image)
40
+ def encode_text(text: str):
41
+ # step 3.1 preprocess text
42
+ processed = preprocess_text(text)
43
+ # step 3.2 tokenize text
44
+ tokens = tokenizer(
45
+ processed,
46
+ return_tensors="pt",
47
+ padding="max_length",
48
+ truncation=True,
49
+ max_length=128,
50
+ )
51
+ tokens = {k: v.to(device) for k, v in tokens.items()}
52
+
53
+ with torch.no_grad():
54
+ # take the [CLS] token
55
+ out = text_model(**tokens).last_hidden_state[:, 0, :]
56
+ return out.cpu().numpy()
57
+
58
+ def encode_image(image_bytes):
59
+ # step 4.1 load the image
60
+ img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
61
+
62
+ # step 4.2 encode the image into a tensor (embedding image)
63
+ tensor = clip_preprocess(img).unsqueeze(0).to(device)
64
+
65
+ with torch.no_grad():
66
+ emb = clip_model.encode_image(tensor)
67
+ return emb.cpu().numpy()
68
+
69
  app = FastAPI(
70
  title="Multimodal Water Pollution Risk API",
71
  description=(
72
  "Input: text + image + geospatial + time\n"
73
+ "Model: IndoBERT + EVA-CLIP + XGBoost\n"
74
  ),
75
+ version="1.0.3",
76
  )
77
 
78
  app.add_middleware(
 
95
  text: str = Form(...),
96
  longitude: float = Form(...),
97
  latitude: float = Form(...),
 
98
  hour: int = Form(...),
99
  dayofweek: int = Form(...),
100
  month: int = Form(...),
101
  image: UploadFile = File(...),
102
  ):
 
 
103
 
104
+ # 1. Encode text
105
+ text_emb = encode_text(text)
 
 
 
 
 
 
 
 
 
 
106
 
107
+ # 2. Encode image
108
  img_bytes = await image.read()
109
+ img_emb = encode_image(img_bytes)
 
110
 
111
+ # 3. Generate the location cluster
112
+ location_cluster = int(kmeans.predict([[latitude, longitude]])[0])
 
113
 
114
+ # 4. Create feature vector
115
+ add_feats = np.array([[longitude, latitude, location_cluster, hour, dayofweek, month]], dtype=np.float32)
 
 
 
116
 
117
+ # 5. Early Fusion
118
  fused = np.concatenate([img_emb, text_emb, add_feats], axis=1)
119
 
120
+ # 6. Predict
121
  proba = xgb_model.predict_proba(fused)[0]
122
  pred_idx = int(np.argmax(proba))
123
+
124
  label = "KRITIS" if pred_idx == 1 else "WASPADA"
125
 
126
  return {
127
  "prediction": label,
128
+ "cluster_used": location_cluster,
129
  "probabilities": {
130
  "WASPADA": float(proba[0]),
131
+ "KRITIS": float(proba[1])
132
+ }
133
+ }
134
+
135
+ @app.post("/predict_proba")
136
+ async def predict_proba(
137
+ text: str = Form(...),
138
+ longitude: float = Form(...),
139
+ latitude: float = Form(...),
140
+ hour: int = Form(...),
141
+ dayofweek: int = Form(...),
142
+ month: int = Form(...),
143
+ image: UploadFile = File(...),
144
+ ):
145
+ text_emb = encode_text(text)
146
+ img_bytes = await image.read()
147
+ img_emb = encode_image(img_bytes)
148
+
149
+ location_cluster = int(kmeans.predict([[latitude, longitude]])[0])
150
+
151
+ add_feats = np.array([[longitude, latitude, location_cluster, hour, dayofweek, month]], dtype=np.float32)
152
+
153
+ fused = np.concatenate([img_emb, text_emb, add_feats], axis=1)
154
+
155
+ proba = xgb_model.predict_proba(fused)[0]
156
+
157
+ return {
158
+ "WASPADA": float(proba[0]),
159
+ "KRITIS": float(proba[1]),
160
+ "cluster_used": location_cluster,
161
  }
162
 
163
  if __name__ == "__main__":