Spaces:

SpyC0der77
/

HRVC

Runtime error

File size: 17,319 Bytes

1a942eb

"""Module which defines the code for the "One-click generation" tab."""

from collections.abc import Sequence
from functools import partial

import gradio as gr

from ultimate_rvc.core.generate.song_cover import run_pipeline
from ultimate_rvc.typing_extra import AudioExt, F0Method, SampleRate
from ultimate_rvc.web.common import (
    PROGRESS_BAR,
    exception_harness,
    toggle_visible_component,
    update_cached_songs,
    update_output_audio,
    update_song_cover_name,
    update_value,
)
from ultimate_rvc.web.typing_extra import ConcurrencyId, SourceType


def _toggle_intermediate_audio(
    visible: bool,
) -> list[gr.Accordion]:
    """
    Toggle the visibility of intermediate audio accordions.

    Parameters
    ----------
    visible : bool
        Visibility status of the intermediate audio accordions.

    Returns
    -------
    list[gr.Accordion]
        The intermediate audio accordions.

    """
    accordions = [gr.Accordion(open=False) for _ in range(7)]
    return [gr.Accordion(visible=visible, open=False), *accordions]


def render(
    song_dirs: Sequence[gr.Dropdown],
    cached_song_1click: gr.Dropdown,
    cached_song_multi: gr.Dropdown,
    model_1click: gr.Dropdown,
    intermediate_audio: gr.Dropdown,
    output_audio: gr.Dropdown,
) -> None:
    """
    Render "One-click generation" tab.

    Parameters
    ----------
    song_dirs : Sequence[gr.Dropdown]
        Dropdowns for selecting song directories in the
        "Multi-step generation" tab.
    cached_song_1click : gr.Dropdown
        Dropdown for selecting a cached song in the
        "One-click generation" tab
    cached_song_multi : gr.Dropdown
        Dropdown for selecting a cached song in the
        "Multi-step generation" tab
    model_1click : gr.Dropdown
        Dropdown for selecting voice model in the
        "One-click generation" tab.
    intermediate_audio : gr.Dropdown
        Dropdown for selecting intermediate audio files to delete in the
        "Delete audio" tab.
    output_audio : gr.Dropdown
        Dropdown for selecting output audio files to delete in the
        "Delete audio" tab.

    """
    with gr.Tab("One-click generation"):
        with gr.Accordion("Main options"):
            with gr.Row():
                with gr.Column():
                    source_type = gr.Dropdown(
                        list(SourceType),
                        value=SourceType.PATH,
                        label="Source type",
                        type="index",
                        info="The type of source to retrieve a song from.",
                    )
                with gr.Column():
                    source = gr.Textbox(
                        label="Source",
                        info=(
                            "Link to a song on YouTube or the full path of a local"
                            " audio file."
                        ),
                    )
                    local_file = gr.Audio(
                        label="Source",
                        type="filepath",
                        visible=False,
                    )
                    cached_song_1click.render()
                source_type.input(
                    partial(toggle_visible_component, 3),
                    inputs=source_type,
                    outputs=[source, local_file, cached_song_1click],
                    show_progress="hidden",
                )

                local_file.change(
                    update_value,
                    inputs=local_file,
                    outputs=source,
                    show_progress="hidden",
                )
                cached_song_1click.input(
                    update_value,
                    inputs=cached_song_1click,
                    outputs=source,
                    show_progress="hidden",
                )
            with gr.Row():
                model_1click.render()
                n_octaves = gr.Slider(
                    -3,
                    3,
                    value=0,
                    step=1,
                    label="Vocal pitch shift",
                    info=(
                        "The number of octaves to pitch-shift converted vocals by."
                        " Use 1 for male-to-female and -1 for vice-versa."
                    ),
                )
                n_semitones = gr.Slider(
                    -12,
                    12,
                    value=0,
                    step=1,
                    label="Overall pitch shift",
                    info=(
                        "The number of semi-tones to pitch-shift converted vocals,"
                        " instrumentals, and backup vocals by."
                    ),
                )

        with gr.Accordion("Vocal conversion options", open=False):
            with gr.Row():
                index_rate = gr.Slider(
                    0,
                    1,
                    value=0.5,
                    label="Index rate",
                    info=(
                        "How much of the accent in the voice model to keep in the"
                        " converted vocals. Increase to bias the conversion towards the"
                        " accent of the voice model."
                    ),
                )
                filter_radius = gr.Slider(
                    0,
                    7,
                    value=3,
                    step=1,
                    label="Filter radius",
                    info=(
                        "If >=3: apply median filtering to harvested pitch results."
                        " Can help reduce breathiness in the converted vocals."
                    ),
                )
                rms_mix_rate = gr.Slider(
                    0,
                    1,
                    value=0.25,
                    label="RMS mix rate",
                    info=(
                        "How much to mimic the loudness (0) of the input vocals or a"
                        " fixed loudness (1)."
                        "<br><br>"
                    ),
                )
            with gr.Row():
                protect = gr.Slider(
                    0,
                    0.5,
                    value=0.33,
                    label="Protect rate",
                    info=(
                        "Protection of voiceless consonants and breath sounds. Decrease"
                        " to increase protection at the cost of indexing accuracy. Set"
                        " to 0.5 to disable."
                        "<br><br>"
                    ),
                )
                f0_method = gr.Dropdown(
                    list(F0Method),
                    value=F0Method.RMVPE,
                    label="Pitch detection algorithm",
                    info=(
                        "The method to use for pitch detection. Best option is RMVPE"
                        " (clarity in vocals), then Mangio-CREPE (smoother vocals)."
                        "<br><br>"
                    ),
                )
                hop_length = gr.Slider(
                    32,
                    320,
                    value=128,
                    step=1,
                    label="Hop length",
                    info=(
                        "How often the CREPE-based pitch detection algorithm checks for"
                        " pitch changes. Measured in milliseconds. Lower values lead to"
                        " longer conversion times and a higher risk of voice cracks,"
                        " but better pitch accuracy."
                    ),
                )
        with gr.Accordion("Audio mixing options", open=False):
            gr.Markdown("")
            gr.Markdown("**Reverb control on converted vocals**")
            with gr.Row():
                room_size = gr.Slider(
                    0,
                    1,
                    value=0.15,
                    label="Room size",
                    info=(
                        "Size of the room which reverb effect simulates. Increase for"
                        " longer reverb time."
                    ),
                )
            with gr.Row():
                wet_level = gr.Slider(
                    0,
                    1,
                    value=0.2,
                    label="Wetness level",
                    info="Loudness of converted vocals with reverb effect applied.",
                )
                dry_level = gr.Slider(
                    0,
                    1,
                    value=0.8,
                    label="Dryness level",
                    info="Loudness of converted vocals without reverb effect applied.",
                )
                damping = gr.Slider(
                    0,
                    1,
                    value=0.7,
                    label="Damping level",
                    info="Absorption of high frequencies in reverb effect.",
                )

            gr.Markdown("")
            gr.Markdown("**Volume controls (dB)**")
            with gr.Row():
                main_gain = gr.Slider(-20, 20, value=0, step=1, label="Main vocals")
                inst_gain = gr.Slider(-20, 20, value=0, step=1, label="Instrumentals")
                backup_gain = gr.Slider(-20, 20, value=0, step=1, label="Backup vocals")
        with gr.Accordion("Audio output options", open=False):
            with gr.Row():
                output_name = gr.Textbox(
                    value=partial(
                        update_song_cover_name,
                        None,
                        update_placeholder=True,
                    ),
                    inputs=[cached_song_1click, model_1click],
                    label="Output name",
                    info=(
                        "If no name is provided, a suitable name will be generated"
                        " automatically."
                    ),
                    placeholder="Ultimate RVC song cover",
                )
                output_sr = gr.Dropdown(
                    choices=list(SampleRate),
                    value=SampleRate.HZ_44100,
                    label="Output sample rate",
                    info="The sample rate to save the generated song cover in.",
                )
                output_format = gr.Dropdown(
                    list(AudioExt),
                    value=AudioExt.MP3,
                    label="Output format",
                    info="The format to save the generated song cover in.",
                )
            with gr.Row():
                show_intermediate_audio = gr.Checkbox(
                    label="Show intermediate audio",
                    value=False,
                    info=(
                        "Show intermediate audio tracks generated during song cover"
                        " generation."
                    ),
                )

        intermediate_audio_accordions = [
            gr.Accordion(label, open=False, render=False)
            for label in [
                "Step 0: song retrieval",
                "Step 1a: vocals/instrumentals separation",
                "Step 1b: main vocals/ backup vocals separation",
                "Step 1c: main vocals cleanup",
                "Step 2: conversion of main vocals",
                "Step 3: post-processing of converted vocals",
                "Step 4: pitch shift of background tracks",
            ]
        ]
        (
            song_retrieval_accordion,
            vocals_separation_accordion,
            main_vocals_separation_accordion,
            vocal_cleanup_accordion,
            vocal_conversion_accordion,
            vocals_postprocessing_accordion,
            pitch_shift_accordion,
        ) = intermediate_audio_accordions
        intermediate_audio_tracks = [
            gr.Audio(label=label, type="filepath", interactive=False, render=False)
            for label in [
                "Song",
                "Vocals",
                "Instrumentals",
                "Main vocals",
                "Backup vocals",
                "De-reverbed main vocals",
                "Main vocals reverb",
                "Converted vocals",
                "Post-processed vocals",
                "Pitch-shifted instrumentals",
                "Pitch-shifted backup vocals",
            ]
        ]
        (
            song,
            vocals_track,
            instrumentals_track,
            main_vocals_track,
            backup_vocals_track,
            main_vocals_dereverbed_track,
            main_vocals_reverb_track,
            converted_vocals_track,
            postprocessed_vocals_track,
            instrumentals_shifted_track,
            backup_vocals_shifted_track,
        ) = intermediate_audio_tracks
        with gr.Accordion(
            "Intermediate audio tracks",
            open=False,
            visible=False,
        ) as intermediate_audio_accordion:
            song_retrieval_accordion.render()
            with song_retrieval_accordion:
                song.render()
            vocals_separation_accordion.render()
            with vocals_separation_accordion, gr.Row():
                vocals_track.render()
                instrumentals_track.render()
            main_vocals_separation_accordion.render()
            with main_vocals_separation_accordion, gr.Row():
                main_vocals_track.render()
                backup_vocals_track.render()
            vocal_cleanup_accordion.render()
            with vocal_cleanup_accordion, gr.Row():
                main_vocals_dereverbed_track.render()
                main_vocals_reverb_track.render()
            vocal_conversion_accordion.render()
            with vocal_conversion_accordion:
                converted_vocals_track.render()
            vocals_postprocessing_accordion.render()
            with vocals_postprocessing_accordion:
                postprocessed_vocals_track.render()
            pitch_shift_accordion.render()
            with pitch_shift_accordion, gr.Row():
                instrumentals_shifted_track.render()
                backup_vocals_shifted_track.render()

        show_intermediate_audio.change(
            _toggle_intermediate_audio,
            inputs=show_intermediate_audio,
            outputs=[
                intermediate_audio_accordion,
                *intermediate_audio_accordions,
            ],
            show_progress="hidden",
        )

        with gr.Row(equal_height=True):
            reset_btn = gr.Button(value="Reset settings", scale=2)
            generate_btn = gr.Button("Generate", scale=2, variant="primary")
            song_cover = gr.Audio(label="Song cover", scale=3)

        generate_btn.click(
            partial(
                exception_harness(
                    run_pipeline,
                    info_msg="Song cover generated successfully!",
                ),
                progress_bar=PROGRESS_BAR,
            ),
            inputs=[
                source,
                model_1click,
                n_octaves,
                n_semitones,
                f0_method,
                index_rate,
                filter_radius,
                rms_mix_rate,
                protect,
                hop_length,
                room_size,
                wet_level,
                dry_level,
                damping,
                main_gain,
                inst_gain,
                backup_gain,
                output_sr,
                output_format,
                output_name,
            ],
            outputs=[song_cover, *intermediate_audio_tracks],
            concurrency_limit=1,
            concurrency_id=ConcurrencyId.GPU,
        ).success(
            partial(
                update_cached_songs,
                3 + len(song_dirs),
                [],
                [2],
            ),
            outputs=[
                cached_song_1click,
                cached_song_multi,
                intermediate_audio,
                *song_dirs,
            ],
            show_progress="hidden",
        ).then(
            partial(update_output_audio, 1, [], [0]),
            outputs=[output_audio],
            show_progress="hidden",
        )
        reset_btn.click(
            lambda: [
                0,
                0,
                0.5,
                3,
                0.25,
                0.33,
                F0Method.RMVPE,
                128,
                0.15,
                0.2,
                0.8,
                0.7,
                0,
                0,
                0,
                SampleRate.HZ_44100,
                AudioExt.MP3,
                False,
            ],
            outputs=[
                n_octaves,
                n_semitones,
                index_rate,
                filter_radius,
                rms_mix_rate,
                protect,
                f0_method,
                hop_length,
                room_size,
                wet_level,
                dry_level,
                damping,
                main_gain,
                inst_gain,
                backup_gain,
                output_sr,
                output_format,
                show_intermediate_audio,
            ],
            show_progress="hidden",
        )