Spaces:

azzandr
/

ID-gambling-website-detection

Running

Azzan Dwi Riski commited on May 11

Commit

2e4a786

1 Parent(s): 4e933a0

fix tokenizer issues

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ from PIL import Image
 import pytesseract
 from playwright.sync_api import sync_playwright
 import asyncio
-from transformers import AutoTokenizer
 from torchvision import transforms
 from torchvision import models
 from torchvision.transforms import functional as F
@@ -34,9 +34,10 @@ try:
     else:
         # If local not available, try direct download with cache
         print("Local tokenizer not found, downloading from Hugging Face...")
-        tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
-                                                 use_fast=True,
-                                                 cache_dir='/app/tokenizers')
 except Exception as e:
     print(f"Error loading tokenizer: {e}")
     # Fallback to default BERT tokenizer if needed
@@ -117,7 +118,7 @@ if os.path.exists(image_model_path):
     print("Image-only model loaded from state_dict successfully!")
 else:
     # Download from HuggingFace if local file doesn't exist
-    image_model_path = hf_hub_download(repo_id="azzandr/gambling-fusion-model",
                                       filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
     image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
     num_features = image_only_model.classifier[1].in_features

 import pytesseract
 from playwright.sync_api import sync_playwright
 import asyncio
+from transformers import AutoTokenizer, BertTokenizerFast
 from torchvision import transforms
 from torchvision import models
 from torchvision.transforms import functional as F
     else:
         # If local not available, try direct download with cache
         print("Local tokenizer not found, downloading from Hugging Face...")
+        # tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
+        #                                          use_fast=True,
+        #                                          cache_dir='/app/tokenizers')
+        tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
 except Exception as e:
     print(f"Error loading tokenizer: {e}")
     # Fallback to default BERT tokenizer if needed
     print("Image-only model loaded from state_dict successfully!")
 else:
     # Download from HuggingFace if local file doesn't exist
+    image_model_path = hf_hub_download(repo_id="azzandr/gambling-image-model",
                                       filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
     image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
     num_features = image_only_model.classifier[1].in_features