Spaces:

SandraCLV
/

injectModel1intoModel2

Sleeping

File size: 1,347 Bytes

1a7973f
5eb99ac
ec574b7
5eb99ac
97efaa3
 
 
5eb99ac
97efaa3
5eb99ac
460a57d
 
97efaa3
 
fac92f6
97efaa3
 
 
 
 
460a57d
 
 
97efaa3
 
460a57d
d6da658
97efaa3
 
 
 
 
 
1a7973f
97efaa3

import gradio as gr
from transformers import pipeline,WhisperProcessor, WhisperForConditionalGeneration
import torch
import librosa
import datasets
from transformers.pipelines.pt_utils import KeyDataset
from tqdm.auto import tqdm

transcriber = pipeline(model="openai/whisper-large-v2",device_map="auto")
# checkpoint = "/innev/open-ai/huggingface/openai/whisper-base"
image_to_text_model = pipeline("image-classification")
text_to_audio_model = pipeline("text-to-speech")
pipe_audio = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
dataset = datasets.load_dataset("superb", name="asr", split="test")

for out in tqdm(pipe(KeyDataset(dataset, "file"))):
    print(out)
    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
    # {"text": ....}
    # ....
def image_to_text(input_image):
    # Convertir la imagen a texto
    text_output = image_to_text_model(input_image)[0]['label']
    print(text_output)
    #texts = transcriber(text_output)
    return text_output

#with gr.Blocks() as demo:
#    gr.Markdown("Start typing below and then click **Run** to see the output.")
#    with gr.Row():
#        inp = gr.Image()
#        out = gr.Textbox(placeholder=image_to_text(inp))
#    gr.Interface(fn=image_to_text, inputs=inp, outputs=out,interpretation="default")

#demo.launch()