adamantix commited on
Commit
8e13d87
·
verified ·
1 Parent(s): 474823a

initial commit

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. app.py +119 -0
  3. requirements.txt +13 -0
  4. xgb_full.pkl +3 -0
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import pickle
3
+
4
+ import numpy as np
5
+ import torch
6
+ from fastapi import FastAPI, UploadFile, File, Form
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ from PIL import Image
9
+ from transformers import AutoTokenizer, AutoModel
10
+ import open_clip
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+
14
+ TEXT_MODEL_NAME = "indobenchmark/indobert-large-p1"
15
+ tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
16
+ text_model = AutoModel.from_pretrained(TEXT_MODEL_NAME).to(device)
17
+ text_model.eval()
18
+
19
+ clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
20
+ "EVA01-g-14-plus",
21
+ pretrained="merged2b_s11b_b114k"
22
+ )
23
+ clip_model.to(device)
24
+ clip_model.eval()
25
+
26
+ with open("xgb_full.pkl", "rb") as f:
27
+ xgb_model = pickle.load(f)
28
+
29
+ def preprocess_text(text: str) -> str:
30
+ # nanti ditambahin preprocessingnya
31
+ return text.strip()
32
+
33
+ app = FastAPI(
34
+ title="Multimodal Water Pollution Risk API",
35
+ description=(
36
+ "Input: text + image + geospatial + time\n"
37
+ "Model: IndoBERT + EVA-CLIP (HF Hub) + XGBoost (xgb.pkl)\n"
38
+ ),
39
+ version="1.0.0",
40
+ )
41
+
42
+ app.add_middleware(
43
+ CORSMiddleware,
44
+ allow_origins=["*"],
45
+ allow_methods=["*"],
46
+ allow_headers=["*"],
47
+ )
48
+
49
+ @app.get("/")
50
+ def root():
51
+ return {
52
+ "status": "OK",
53
+ "message": "Multimodal Water Pollution Risk API is running.",
54
+ "info": "Use POST /predict with text, image, and features.",
55
+ }
56
+
57
+ @app.post("/predict")
58
+ async def predict(
59
+ text: str = Form(...),
60
+ longitude: float = Form(...),
61
+ latitude: float = Form(...),
62
+ location_cluster: int = Form(...),
63
+ hour: int = Form(...),
64
+ dayofweek: int = Form(...),
65
+ month: int = Form(...),
66
+ image: UploadFile = File(...),
67
+ ):
68
+ # 1. Preprocess text
69
+ cleaned_text = preprocess_text(text)
70
+
71
+ # 2. Encode text -> IndoBERT CLS embedding (shape: [1, 1024])
72
+ text_inputs = tokenizer(
73
+ cleaned_text,
74
+ return_tensors="pt",
75
+ padding="max_length",
76
+ truncation=True,
77
+ max_length=128,
78
+ )
79
+ text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
80
+ with torch.no_grad():
81
+ text_emb = text_model(**text_inputs).last_hidden_state[:, 0, :]
82
+ text_emb = text_emb.cpu().numpy()
83
+
84
+ # 3. Encode image -> EVA-CLIP image embedding (shape: [1, 1024] / sesuai model)
85
+ img_bytes = await image.read()
86
+ pil_img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
87
+ img_tensor = clip_preprocess(pil_img).unsqueeze(0).to(device)
88
+
89
+ with torch.no_grad():
90
+ img_emb = clip_model.encode_image(img_tensor)
91
+ img_emb = img_emb.cpu().numpy()
92
+
93
+ # 4. Additional numeric features (same order as training)
94
+ add_feats = np.array(
95
+ [[longitude, latitude, location_cluster, hour, dayofweek, month]],
96
+ dtype=np.float32,
97
+ )
98
+
99
+ # 5. Concatenate: [image_emb, text_emb, add_feats]
100
+ # pastikan bentuk-nya [1, dim_image + dim_text + 6]
101
+ fused = np.concatenate([img_emb, text_emb, add_feats], axis=1)
102
+
103
+ # 6. XGBoost prediction
104
+ proba = xgb_model.predict_proba(fused)[0] # shape: [2]
105
+ pred_idx = int(np.argmax(proba))
106
+ label = "KRITIS" if pred_idx == 1 else "WASPADA"
107
+
108
+ return {
109
+ "prediction": label,
110
+ "probabilities": {
111
+ "WASPADA": float(proba[0]),
112
+ "KRITIS": float(proba[1]),
113
+ },
114
+ }
115
+
116
+ if __name__ == "__main__":
117
+ import uvicorn
118
+
119
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ xgboost
4
+ openclip-torch
5
+ timm
6
+ Pillow
7
+ numpy
8
+ uvicorn
9
+ fastapi
10
+ pydantic
11
+ python-multipart
12
+ sentencepiece
13
+ protobuf
xgb_full.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa565eb70fed4e6a4099597161946283b789dec9355c8f11f4e4a9cc23d24bb6
3
+ size 1046727