SandraCLV commited on
Commit
69504ad
·
1 Parent(s): 5a087c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -33
app.py CHANGED
@@ -9,45 +9,12 @@ import open_clip
9
 
10
  #CONSTANTS
11
 
12
- # Carga el modelo de clasificación de imagen a texto
13
- blip_processor_large = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
14
- blip_model_large = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
15
-
16
- device = "cuda" if torch.cuda.is_available() else "cpu"
17
- blip_model_large.to(device)
18
-
19
-
20
- ##### IMAGE MODEL TO TEXT, MODEL 1
21
- def generate_caption(processor, model, image, tokenizer=None, use_float_16=False):
22
- inputs = processor(images=image, return_tensors="pt").to(device)
23
-
24
- if use_float_16:
25
- inputs = inputs.to(torch.float16)
26
-
27
- generated_ids = model.generate(pixel_values=inputs.pixel_values, max_length=50)
28
-
29
- if tokenizer is not None:
30
- generated_caption = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
31
- else:
32
- generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
33
-
34
- return generated_caption
35
-
36
-
37
- def generate_caption_coca(model, transform, image):
38
- im = transform(image).unsqueeze(0).to(device)
39
- with torch.no_grad(), torch.cuda.amp.autocast():
40
- generated = model.generate(im, seq_len=20)
41
- return open_clip.decode(generated[0].detach()).split("<end_of_text>")[0].replace("<start_of_text>", "")
42
-
43
-
44
  def generate_captions_speech(image):
45
 
46
  caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
47
  print('generate_captions>>>'+caption_blip_large)
48
  return caption_blip_large,text_to_speech(caption_blip_large,"Surprise Me!")
49
 
50
- #####END IMAGE MODEL TO TEXT
51
 
52
  # Define la interfaz de usuario utilizando Gradio entradas y salidas
53
  inputsImg = [
 
9
 
10
  #CONSTANTS
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def generate_captions_speech(image):
13
 
14
  caption_blip_large = generate_caption(blip_processor_large, blip_model_large, image)
15
  print('generate_captions>>>'+caption_blip_large)
16
  return caption_blip_large,text_to_speech(caption_blip_large,"Surprise Me!")
17
 
 
18
 
19
  # Define la interfaz de usuario utilizando Gradio entradas y salidas
20
  inputsImg = [