Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import torch | |
| from torch import autocast | |
| from diffusers import StableDiffusionPipeline | |
| from PIL import Image | |
| from styles import css, header_html, footer_html | |
| from examples import examples | |
| from transformers import pipeline | |
| ars_model = pipeline("automatic-speech-recognition") | |
| model_id = "CompVis/stable-diffusion-v1-4" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # If you are running this code locally, you need to either do a 'huggingface-cli login` or paste your User Access Token from here https://huggingface.co/settings/tokens into the use_auth_token field below. | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| model_id, use_auth_token=os.environ.get('auth_token'), revision="fp16", torch_dtype=torch.float16) | |
| pipe = pipe.to(device) | |
| def transcribe(audio): | |
| text = ars_model(audio)["text"] | |
| return text | |
| def infer(audio, samples, steps, scale, seed): | |
| prompt = transcribe(audio) | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| # If you are running locally with CPU, you can remove the `with autocast("cuda")` | |
| if device == "cuda": | |
| with autocast("cuda"): | |
| images_list = pipe( | |
| [prompt] * samples, | |
| num_inference_steps=steps, | |
| guidance_scale=scale, | |
| generator=generator, | |
| ) | |
| else: | |
| images_list = pipe( | |
| [prompt] * samples, | |
| num_inference_steps=steps, | |
| guidance_scale=scale, | |
| generator=generator, | |
| ) | |
| images = [] | |
| safe_image = Image.open(r"unsafe.png") | |
| for i, image in enumerate(images_list["sample"]): | |
| if(images_list["nsfw_content_detected"][i]): | |
| images.append(safe_image) | |
| else: | |
| images.append(image) | |
| return images | |
| block = gr.Blocks(css=css) | |
| with block: | |
| gr.HTML(header_html) | |
| with gr.Group(): | |
| with gr.Box(): | |
| with gr.Row().style(mobile_collapse=False, equal_height=True): | |
| audio = gr.Audio( | |
| label="Describe a prompt", | |
| source="microphone", | |
| type="filepath" | |
| # ).style( | |
| # border=(True, False, True, True), | |
| # rounded=(True, False, False, True), | |
| # container=False, | |
| ) | |
| btn = gr.Button("Generate image").style( | |
| margin=False, | |
| rounded=(False, True, True, False), | |
| ) | |
| gallery = gr.Gallery( | |
| label="Generated images", show_label=False, elem_id="gallery" | |
| ).style(grid=[2], height="auto") | |
| advanced_button = gr.Button("Advanced options", elem_id="advanced-btn") | |
| with gr.Row(elem_id="advanced-options"): | |
| samples = gr.Slider(label="Images", minimum=1, | |
| maximum=4, value=4, step=1) | |
| steps = gr.Slider(label="Steps", minimum=1, | |
| maximum=50, value=45, step=1) | |
| scale = gr.Slider( | |
| label="Guidance Scale", minimum=0, maximum=50, value=7.5, step=0.1 | |
| ) | |
| seed = gr.Slider( | |
| label="Seed", | |
| minimum=0, | |
| maximum=2147483647, | |
| step=1, | |
| randomize=True, | |
| ) | |
| # ex = gr.Examples(fn=infer, inputs=[ | |
| # audio, samples, steps, scale, seed], outputs=gallery) | |
| # ex.dataset.headers = [""] | |
| # audio.submit(infer, inputs=[audio, samples, | |
| # steps, scale, seed], outputs=gallery) | |
| btn.click(infer, inputs=[audio, samples, steps, | |
| scale, seed], outputs=gallery) | |
| advanced_button.click( | |
| None, | |
| [], | |
| audio, | |
| _js=""" | |
| () => { | |
| const options = document.querySelector("body > gradio-app").querySelector("#advanced-options"); | |
| options.style.display = ["none", ""].includes(options.style.display) ? "flex" : "none"; | |
| }""", | |
| ) | |
| gr.HTML(footer_html) | |
| block.queue(max_size=25).launch() |