Spaces:
Sleeping
Sleeping
Azzan Dwi Riski
commited on
Commit
·
eaa98a9
1
Parent(s):
f8bc416
add some feature
Browse files
app.py
CHANGED
@@ -269,7 +269,7 @@ def predict_single_url(url):
|
|
269 |
print(f"Processing URL: {url}")
|
270 |
screenshot_path = take_screenshot(url)
|
271 |
if not screenshot_path:
|
272 |
-
return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
|
273 |
|
274 |
text = extract_text_from_image(screenshot_path)
|
275 |
raw_text = text # Store raw text before cleaning
|
@@ -290,7 +290,7 @@ def predict_single_url(url):
|
|
290 |
confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
|
291 |
print(f"[Image-Only] URL: {url}")
|
292 |
print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
|
293 |
-
return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
|
294 |
|
295 |
else:
|
296 |
clean_text_data = clean_text(text)
|
@@ -314,15 +314,22 @@ def predict_single_url(url):
|
|
314 |
print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
|
315 |
print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
|
316 |
|
317 |
-
return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
|
318 |
|
319 |
def predict_batch_urls(file_obj):
|
320 |
results = []
|
321 |
content = file_obj.read().decode('utf-8')
|
322 |
urls = [line.strip() for line in content.splitlines() if line.strip()]
|
323 |
for url in urls:
|
324 |
-
label, confidence, screenshot_path, raw_text, cleaned_text
|
325 |
-
results.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
|
327 |
df = pd.DataFrame(results)
|
328 |
print(f"Batch prediction completed for {len(urls)} URLs.")
|
@@ -338,16 +345,13 @@ with gr.Blocks() as app:
|
|
338 |
predict_button = gr.Button("Predict")
|
339 |
|
340 |
with gr.Row():
|
341 |
-
with gr.Column():
|
342 |
label_output = gr.Label()
|
343 |
confidence_output = gr.Textbox(label="Confidence", interactive=False)
|
344 |
screenshot_output = gr.Image(label="Screenshot", type="filepath")
|
345 |
-
with gr.Column():
|
346 |
-
raw_text_output = gr.Textbox(label="Raw Text", interactive=False)
|
347 |
-
cleaned_text_output = gr.Textbox(label="Cleaned Text", interactive=False)
|
348 |
-
image_model_output = gr.Textbox(label="Image Model Output", interactive=False)
|
349 |
-
text_model_output = gr.Textbox(label="Text Model Output", interactive=False)
|
350 |
-
fusion_weights_output = gr.Textbox(label="Fusion Weights", interactive=False)
|
351 |
|
352 |
predict_button.click(
|
353 |
fn=predict_single_url,
|
@@ -357,10 +361,7 @@ with gr.Blocks() as app:
|
|
357 |
confidence_output,
|
358 |
screenshot_output,
|
359 |
raw_text_output,
|
360 |
-
cleaned_text_output
|
361 |
-
image_model_output,
|
362 |
-
text_model_output,
|
363 |
-
fusion_weights_output
|
364 |
]
|
365 |
)
|
366 |
|
|
|
269 |
print(f"Processing URL: {url}")
|
270 |
screenshot_path = take_screenshot(url)
|
271 |
if not screenshot_path:
|
272 |
+
return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
|
273 |
|
274 |
text = extract_text_from_image(screenshot_path)
|
275 |
raw_text = text # Store raw text before cleaning
|
|
|
290 |
confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
|
291 |
print(f"[Image-Only] URL: {url}")
|
292 |
print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
|
293 |
+
return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
|
294 |
|
295 |
else:
|
296 |
clean_text_data = clean_text(text)
|
|
|
314 |
print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
|
315 |
print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
|
316 |
|
317 |
+
return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
|
318 |
|
319 |
def predict_batch_urls(file_obj):
|
320 |
results = []
|
321 |
content = file_obj.read().decode('utf-8')
|
322 |
urls = [line.strip() for line in content.splitlines() if line.strip()]
|
323 |
for url in urls:
|
324 |
+
label, confidence, screenshot_path, raw_text, cleaned_text = predict_single_url(url)
|
325 |
+
results.append({
|
326 |
+
"url": url,
|
327 |
+
"label": label,
|
328 |
+
"confidence": confidence,
|
329 |
+
"screenshot_path": screenshot_path,
|
330 |
+
"raw_text": raw_text,
|
331 |
+
"cleaned_text": cleaned_text
|
332 |
+
})
|
333 |
|
334 |
df = pd.DataFrame(results)
|
335 |
print(f"Batch prediction completed for {len(urls)} URLs.")
|
|
|
345 |
predict_button = gr.Button("Predict")
|
346 |
|
347 |
with gr.Row():
|
348 |
+
with gr.Column(scale=1):
|
349 |
label_output = gr.Label()
|
350 |
confidence_output = gr.Textbox(label="Confidence", interactive=False)
|
351 |
screenshot_output = gr.Image(label="Screenshot", type="filepath")
|
352 |
+
with gr.Column(scale=1):
|
353 |
+
raw_text_output = gr.Textbox(label="Raw Text", interactive=False, lines=10)
|
354 |
+
cleaned_text_output = gr.Textbox(label="Cleaned Text", interactive=False, lines=10)
|
|
|
|
|
|
|
355 |
|
356 |
predict_button.click(
|
357 |
fn=predict_single_url,
|
|
|
361 |
confidence_output,
|
362 |
screenshot_output,
|
363 |
raw_text_output,
|
364 |
+
cleaned_text_output
|
|
|
|
|
|
|
365 |
]
|
366 |
)
|
367 |
|