Spaces:

azzandr
/

ID-gambling-website-detection

Running

App Files Files Community

Azzan Dwi Riski commited on 19 days ago

Commit

277db83

1 Parent(s): 589504e

update code to show the processed image and text in interface

Browse files

Files changed (1) hide show

app.py +29 -12

app.py CHANGED Viewed

@@ -263,11 +263,12 @@ def predict_single_url(url):
     screenshot_path = take_screenshot(url)
     if not screenshot_path:
-        return f"Error: Failed to take screenshot for {url}", None
-    text = extract_text_from_image(screenshot_path)
-    if not text.strip():  # Jika text kosong
         print(f"No OCR text found for {url}. Using Image-Only Model.")
         image = Image.open(screenshot_path)
         image_tensor = transform(image).unsqueeze(0).to(device)
@@ -283,10 +284,10 @@ def predict_single_url(url):
         confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
         print(f"[Image-Only] URL: {url}")
         print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
-        return label, f"Confidence: {confidence:.2f}"
     else:
-        image_tensor, input_ids, attention_mask = prepare_data_for_model(screenshot_path, text)
         with torch.no_grad():
             fused_logits, image_logits, text_logits, weights = fusion_model(image_tensor, input_ids, attention_mask)
@@ -306,15 +307,15 @@ def predict_single_url(url):
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
         print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
-        return label, f"Confidence: {confidence:.2f}"
 def predict_batch_urls(file_obj):
     results = []
     content = file_obj.read().decode('utf-8')
     urls = [line.strip() for line in content.splitlines() if line.strip()]
     for url in urls:
-        label, confidence = predict_single_url(url)
-        results.append({"url": url, "label": label, "confidence": confidence})
     df = pd.DataFrame(results)
     print(f"Batch prediction completed for {len(urls)} URLs.")
@@ -329,10 +330,26 @@ with gr.Blocks() as app:
     with gr.Tab("Single URL"):
         url_input = gr.Textbox(label="Enter Website URL")
         predict_button = gr.Button("Predict")
-        label_output = gr.Label()
-        confidence_output = gr.Textbox(label="Confidence", interactive=False)
-        predict_button.click(fn=predict_single_url, inputs=url_input, outputs=[label_output, confidence_output])
     with gr.Tab("Batch URLs"):
         file_input = gr.File(label="Upload .txt file with URLs (one per line)")

     screenshot_path = take_screenshot(url)
     if not screenshot_path:
+        return f"Error: Failed to take screenshot for {url}", None, None, None, None
+    raw_text = extract_text_from_image(screenshot_path)
+    cleaned_text = clean_text(raw_text) if raw_text.strip() else ""
+    if not raw_text.strip():  # Jika text kosong
         print(f"No OCR text found for {url}. Using Image-Only Model.")
         image = Image.open(screenshot_path)
         image_tensor = transform(image).unsqueeze(0).to(device)
         confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
         print(f"[Image-Only] URL: {url}")
         print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
+        return label, f"Confidence: {confidence:.2f}", screenshot_path, raw_text, cleaned_text
     else:
+        image_tensor, input_ids, attention_mask = prepare_data_for_model(screenshot_path, raw_text)
         with torch.no_grad():
             fused_logits, image_logits, text_logits, weights = fusion_model(image_tensor, input_ids, attention_mask)
         print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
         print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
+        return label, f"Confidence: {confidence:.2f}", screenshot_path, raw_text, cleaned_text
 def predict_batch_urls(file_obj):
     results = []
     content = file_obj.read().decode('utf-8')
     urls = [line.strip() for line in content.splitlines() if line.strip()]
     for url in urls:
+        label, confidence, screenshot_path, raw_text, cleaned_text = predict_single_url(url)
+        results.append({"url": url, "label": label, "confidence": confidence, "screenshot_path": screenshot_path, "raw_text": raw_text, "cleaned_text": cleaned_text})
     df = pd.DataFrame(results)
     print(f"Batch prediction completed for {len(urls)} URLs.")
     with gr.Tab("Single URL"):
         url_input = gr.Textbox(label="Enter Website URL")
         predict_button = gr.Button("Predict")
+        with gr.Row():
+            with gr.Column():
+                label_output = gr.Label()
+                confidence_output = gr.Textbox(label="Confidence", interactive=False)
+            with gr.Column():
+                screenshot_output = gr.Image(label="Screenshot", type="filepath")
+        with gr.Row():
+            with gr.Column():
+                raw_text_output = gr.Textbox(label="Raw OCR Text", lines=5)
+            with gr.Column():
+                cleaned_text_output = gr.Textbox(label="Cleaned Text", lines=5)
+        predict_button.click(
+            fn=predict_single_url,
+            inputs=url_input,
+            outputs=[label_output, confidence_output, screenshot_output, raw_text_output, cleaned_text_output]
+        )
     with gr.Tab("Batch URLs"):
         file_input = gr.File(label="Upload .txt file with URLs (one per line)")