SandraCLV commited on
Commit
97efaa3
·
1 Parent(s): 3e16b4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -8
app.py CHANGED
@@ -2,22 +2,34 @@ import gradio as gr
2
  from transformers import pipeline,WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
  import librosa
 
 
 
5
 
6
- checkpoint = "openai/whisper-base"
7
  # checkpoint = "/innev/open-ai/huggingface/openai/whisper-base"
8
  image_to_text_model = pipeline("image-classification")
9
  text_to_audio_model = pipeline("text-to-speech")
 
 
10
 
 
 
 
 
 
11
  def image_to_text(input_image):
12
  # Convertir la imagen a texto
13
  text_output = image_to_text_model(input_image)[0]['label']
 
 
14
  return text_output
15
 
16
- with gr.Blocks() as demo:
17
- gr.Markdown("Start typing below and then click **Run** to see the output.")
18
- with gr.Row():
19
- inp = gr.Image()
20
- out = gr.Textbox(placeholder=image_to_text(inp))
21
- gr.Interface(fn=image_to_text, inputs=inp, outputs=out,interpretation="default")
22
 
23
- demo.launch()
 
2
  from transformers import pipeline,WhisperProcessor, WhisperForConditionalGeneration
3
  import torch
4
  import librosa
5
+ import datasets
6
+ from transformers.pipelines.pt_utils import KeyDataset
7
+ from tqdm.auto import tqdm
8
 
9
+ transcriber = pipeline(model="openai/whisper-large-v2",device_map="auto")
10
  # checkpoint = "/innev/open-ai/huggingface/openai/whisper-base"
11
  image_to_text_model = pipeline("image-classification")
12
  text_to_audio_model = pipeline("text-to-speech")
13
+ pipe_audio = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
14
+ dataset = datasets.load_dataset("superb", name="asr", split="test")
15
 
16
+ for out in tqdm(pipe(KeyDataset(dataset, "file"))):
17
+ print(out)
18
+ # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
19
+ # {"text": ....}
20
+ # ....
21
  def image_to_text(input_image):
22
  # Convertir la imagen a texto
23
  text_output = image_to_text_model(input_image)[0]['label']
24
+ print(text_output)
25
+ #texts = transcriber(text_output)
26
  return text_output
27
 
28
+ #with gr.Blocks() as demo:
29
+ # gr.Markdown("Start typing below and then click **Run** to see the output.")
30
+ # with gr.Row():
31
+ # inp = gr.Image()
32
+ # out = gr.Textbox(placeholder=image_to_text(inp))
33
+ # gr.Interface(fn=image_to_text, inputs=inp, outputs=out,interpretation="default")
34
 
35
+ #demo.launch()