SandraCLV commited on
Commit
84573ef
·
1 Parent(s): 20bee1f

Update image_text_model.py

Browse files
Files changed (1) hide show
  1. image_text_model.py +37 -0
image_text_model.py CHANGED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, BlipForConditionalGeneration, AutoTokenizer
3
+ import librosa
4
+ import numpy as np
5
+ import torch
6
+ import open_clip
7
+
8
+ # Carga el modelo de clasificación de imagen a texto
9
+ blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
10
+ blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ blip_model_large.to(device)
14
+
15
+ ##### IMAGE MODEL TO TEXT, MODEL 1
16
+ def generate_caption(processor, model, image, tokenizer=None, use_float_16=False):
17
+ inputs = processor(images=image, return_tensors="pt").to(device)
18
+
19
+ if use_float_16:
20
+ inputs = inputs.to(torch.float16)
21
+
22
+ generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
23
+
24
+ if tokenizer is not None:
25
+ generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
26
+ else:
27
+ generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
28
+
29
+ return generated_caption
30
+
31
+
32
+ def generate_caption_coca(model, transform, image):
33
+ im = transform(image).unsqueeze(0).to(device)
34
+ with torch.no_grad(), torch.cuda.amp.autocast():
35
+ generated = model.generate(im, seq_len=20)
36
+ return open_clip.decode(generated[0].detach()).split("<end_of_text>")[0].replace("<start_of_text>", "")
37
+ #####END IMAGE MODEL TO TEXT