Azzan Dwi Riski commited on
Commit
eaa98a9
·
1 Parent(s): f8bc416

add some feature

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -269,7 +269,7 @@ def predict_single_url(url):
269
  print(f"Processing URL: {url}")
270
  screenshot_path = take_screenshot(url)
271
  if not screenshot_path:
272
- return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", "", "", "", ""
273
 
274
  text = extract_text_from_image(screenshot_path)
275
  raw_text = text # Store raw text before cleaning
@@ -290,7 +290,7 @@ def predict_single_url(url):
290
  confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
291
  print(f"[Image-Only] URL: {url}")
292
  print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
293
- return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, "", f"Image Model: {image_probs[0]:.2f}", "", ""
294
 
295
  else:
296
  clean_text_data = clean_text(text)
@@ -314,15 +314,22 @@ def predict_single_url(url):
314
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
315
  print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
316
 
317
- return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data, f"Image Model: {image_probs[0]:.2f}", f"Text Model: {text_probs[0]:.2f}", f"Fusion Weights: Image={weights[0]:.2f}, Text={weights[1]:.2f}"
318
 
319
  def predict_batch_urls(file_obj):
320
  results = []
321
  content = file_obj.read().decode('utf-8')
322
  urls = [line.strip() for line in content.splitlines() if line.strip()]
323
  for url in urls:
324
- label, confidence, screenshot_path, raw_text, cleaned_text, image_model_output, text_model_output, fusion_weights = predict_single_url(url)
325
- results.append({"url": url, "label": label, "confidence": confidence, "screenshot_path": screenshot_path, "raw_text": raw_text, "cleaned_text": cleaned_text, "image_model_output": image_model_output, "text_model_output": text_model_output, "fusion_weights": fusion_weights})
 
 
 
 
 
 
 
326
 
327
  df = pd.DataFrame(results)
328
  print(f"Batch prediction completed for {len(urls)} URLs.")
@@ -338,16 +345,13 @@ with gr.Blocks() as app:
338
  predict_button = gr.Button("Predict")
339
 
340
  with gr.Row():
341
- with gr.Column():
342
  label_output = gr.Label()
343
  confidence_output = gr.Textbox(label="Confidence", interactive=False)
344
  screenshot_output = gr.Image(label="Screenshot", type="filepath")
345
- with gr.Column():
346
- raw_text_output = gr.Textbox(label="Raw Text", interactive=False)
347
- cleaned_text_output = gr.Textbox(label="Cleaned Text", interactive=False)
348
- image_model_output = gr.Textbox(label="Image Model Output", interactive=False)
349
- text_model_output = gr.Textbox(label="Text Model Output", interactive=False)
350
- fusion_weights_output = gr.Textbox(label="Fusion Weights", interactive=False)
351
 
352
  predict_button.click(
353
  fn=predict_single_url,
@@ -357,10 +361,7 @@ with gr.Blocks() as app:
357
  confidence_output,
358
  screenshot_output,
359
  raw_text_output,
360
- cleaned_text_output,
361
- image_model_output,
362
- text_model_output,
363
- fusion_weights_output
364
  ]
365
  )
366
 
 
269
  print(f"Processing URL: {url}")
270
  screenshot_path = take_screenshot(url)
271
  if not screenshot_path:
272
+ return f"❌ Error: Unable to capture screenshot for {url}. This may be due to:\n• Too many redirects\n• Website blocking automated access\n• Network connectivity issues\n• Invalid URL", "Screenshot capture failed", None, "", ""
273
 
274
  text = extract_text_from_image(screenshot_path)
275
  raw_text = text # Store raw text before cleaning
 
290
  confidence = image_probs[0].item() if is_gambling else 1 - image_probs[0].item()
291
  print(f"[Image-Only] URL: {url}")
292
  print(f"Prediction: {label} | Confidence: {confidence:.2f}\n")
293
+ return label, f"Confidence: {confidence:.2f} (Image-Only Model)", screenshot_path, raw_text, ""
294
 
295
  else:
296
  clean_text_data = clean_text(text)
 
314
  print(f"Text Model Prediction Probability: {text_probs[0]:.2f}")
315
  print(f"Fusion Final Prediction: {label} | Confidence: {confidence:.2f}\n")
316
 
317
+ return label, f"Confidence: {confidence:.2f} (Fusion Model)", screenshot_path, raw_text, clean_text_data
318
 
319
  def predict_batch_urls(file_obj):
320
  results = []
321
  content = file_obj.read().decode('utf-8')
322
  urls = [line.strip() for line in content.splitlines() if line.strip()]
323
  for url in urls:
324
+ label, confidence, screenshot_path, raw_text, cleaned_text = predict_single_url(url)
325
+ results.append({
326
+ "url": url,
327
+ "label": label,
328
+ "confidence": confidence,
329
+ "screenshot_path": screenshot_path,
330
+ "raw_text": raw_text,
331
+ "cleaned_text": cleaned_text
332
+ })
333
 
334
  df = pd.DataFrame(results)
335
  print(f"Batch prediction completed for {len(urls)} URLs.")
 
345
  predict_button = gr.Button("Predict")
346
 
347
  with gr.Row():
348
+ with gr.Column(scale=1):
349
  label_output = gr.Label()
350
  confidence_output = gr.Textbox(label="Confidence", interactive=False)
351
  screenshot_output = gr.Image(label="Screenshot", type="filepath")
352
+ with gr.Column(scale=1):
353
+ raw_text_output = gr.Textbox(label="Raw Text", interactive=False, lines=10)
354
+ cleaned_text_output = gr.Textbox(label="Cleaned Text", interactive=False, lines=10)
 
 
 
355
 
356
  predict_button.click(
357
  fn=predict_single_url,
 
361
  confidence_output,
362
  screenshot_output,
363
  raw_text_output,
364
+ cleaned_text_output
 
 
 
365
  ]
366
  )
367