Azzan Dwi Riski commited on
Commit
2e4a786
·
1 Parent(s): 4e933a0

fix tokenizer issues

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -8,7 +8,7 @@ from PIL import Image
8
  import pytesseract
9
  from playwright.sync_api import sync_playwright
10
  import asyncio
11
- from transformers import AutoTokenizer
12
  from torchvision import transforms
13
  from torchvision import models
14
  from torchvision.transforms import functional as F
@@ -34,9 +34,10 @@ try:
34
  else:
35
  # If local not available, try direct download with cache
36
  print("Local tokenizer not found, downloading from Hugging Face...")
37
- tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
38
- use_fast=True,
39
- cache_dir='/app/tokenizers')
 
40
  except Exception as e:
41
  print(f"Error loading tokenizer: {e}")
42
  # Fallback to default BERT tokenizer if needed
@@ -117,7 +118,7 @@ if os.path.exists(image_model_path):
117
  print("Image-only model loaded from state_dict successfully!")
118
  else:
119
  # Download from HuggingFace if local file doesn't exist
120
- image_model_path = hf_hub_download(repo_id="azzandr/gambling-fusion-model",
121
  filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
122
  image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
123
  num_features = image_only_model.classifier[1].in_features
 
8
  import pytesseract
9
  from playwright.sync_api import sync_playwright
10
  import asyncio
11
+ from transformers import AutoTokenizer, BertTokenizerFast
12
  from torchvision import transforms
13
  from torchvision import models
14
  from torchvision.transforms import functional as F
 
34
  else:
35
  # If local not available, try direct download with cache
36
  print("Local tokenizer not found, downloading from Hugging Face...")
37
+ # tokenizer = AutoTokenizer.from_pretrained('indobenchmark/indobert-base-p1',
38
+ # use_fast=True,
39
+ # cache_dir='/app/tokenizers')
40
+ tokenizer = BertTokenizerFast.from_pretrained("indobenchmark/indobert-base-p1")
41
  except Exception as e:
42
  print(f"Error loading tokenizer: {e}")
43
  # Fallback to default BERT tokenizer if needed
 
118
  print("Image-only model loaded from state_dict successfully!")
119
  else:
120
  # Download from HuggingFace if local file doesn't exist
121
+ image_model_path = hf_hub_download(repo_id="azzandr/gambling-image-model",
122
  filename="best_image_model_Adam_lr0.0001_bs32_state_dict.pt")
123
  image_only_model = models.efficientnet_b3(weights=models.EfficientNet_B3_Weights.DEFAULT)
124
  num_features = image_only_model.classifier[1].in_features