Azzan Dwi Riski
commited on
Commit
·
2e4a786
1
Parent(s):
4e933a0
fix tokenizer issues
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from PIL import Image
|
|
8 |
import pytesseract
|
9 |
from playwright.sync_api import sync_playwright
|
10 |
import asyncio
|
11 |
-
from transformers import AutoTokenizer
|
12 |
from torchvision import transforms
|
13 |
from torchvision import models
|
14 |
from torchvision.transforms import functional as F
|
@@ -34,9 +34,10 @@ try:
|
|
34 |
else:
|
35 |
# If local not available, try direct download with cache
|
36 |
print("Local tokenizer not found, downloading from Hugging Face...")
|
37 |
-
tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
|
38 |
-
|
39 |
-
|
|
|
40 |
except Exception as e:
|
41 |
print(f"Error loading tokenizer: {e}")
|
42 |
# Fallback to default BERT tokenizer if needed
|
@@ -117,7 +118,7 @@ if os.path.exists(image_model_path):
|
|
117 |
print("Image-only model loaded from state_dict successfully!")
|
118 |
else:
|
119 |
# Download from HuggingFace if local file doesn't exist
|
120 |
-
image_model_path = hf_hub_download(repo_id="azzandr/gambling-
|
121 |
filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
|
122 |
image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
|
123 |
num_features = image_only_model.classifier[1].in_features
|
|
|
8 |
import pytesseract
|
9 |
from playwright.sync_api import sync_playwright
|
10 |
import asyncio
|
11 |
+
from transformers import AutoTokenizer, BertTokenizerFast
|
12 |
from torchvision import transforms
|
13 |
from torchvision import models
|
14 |
from torchvision.transforms import functional as F
|
|
|
34 |
else:
|
35 |
# If local not available, try direct download with cache
|
36 |
print("Local tokenizer not found, downloading from Hugging Face...")
|
37 |
+
# tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
|
38 |
+
# use_fast=True,
|
39 |
+
# cache_dir='/app/tokenizers')
|
40 |
+
tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
|
41 |
except Exception as e:
|
42 |
print(f"Error loading tokenizer: {e}")
|
43 |
# Fallback to default BERT tokenizer if needed
|
|
|
118 |
print("Image-only model loaded from state_dict successfully!")
|
119 |
else:
|
120 |
# Download from HuggingFace if local file doesn't exist
|
121 |
+
image_model_path = hf_hub_download(repo_id="azzandr/gambling-image-model",
|
122 |
filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
|
123 |
image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
|
124 |
num_features = image_only_model.classifier[1].in_features
|