|
--- |
|
license: mit |
|
library_name: transformers |
|
pipeline_tag: image-to-text |
|
--- |
|
|
|
# Load model |
|
from transformers import AutoProcessor, BlipForConditionalGeneration |
|
|
|
processor = AutoProcessor.from_pretrained("trunks/blip-image-captioning-base") |
|
|
|
model = BlipForConditionalGeneration.from_pretrained("trunks/blip-image-captioning-base") |
|
|
|
|
|
# prepare image for model |
|
from PIL import Image |
|
from IPython.display import display |
|
|
|
img1 = Image.open("imagepath/img.jpeg") |
|
|
|
width, height = img1.size |
|
|
|
img1_resized = img1.resize((int(0.3 * width), int(0.3 * height)) |
|
|
|
display(img1_resized) |
|
|
|
# testing image |
|
inputs = processor(images=img1, return_tensors="pt") |
|
|
|
pixel_values = inputs.pixel_values |
|
|
|
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) |
|
|
|
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
print(generated_caption) |
|
|