ECHOAI

Running on Zero

App Files Files Community

MPCIRCLE commited on 20 days ago

Commit

c21ab36

verified ·

1 Parent(s): 61222b7

Update webui.py

Browse files

changed to streamlit

Files changed (1) hide show

webui.py +113 -61

webui.py CHANGED Viewed

@@ -1,81 +1,133 @@
-import spaces
 import os
-import shutil
-import threading
 import time
 import sys
 from huggingface_hub import snapshot_download
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(current_dir)
 sys.path.append(os.path.join(current_dir, "indextts"))
-import gradio as gr
 from indextts.infer import IndexTTS
 from tools.i18n.i18n import I18nAuto
-i18n = I18nAuto(language="zh_CN")
-MODE = 'local'
-snapshot_download("IndexTeam/IndexTTS-1.5",local_dir="checkpoints",)
-tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
-os.makedirs("outputs/tasks",exist_ok=True)
-os.makedirs("prompts",exist_ok=True)
-@spaces.GPU
-def infer(voice, text,output_path=None):
-    if not tts:
-        raise Exception("Model not loaded")
     if not output_path:
         output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav")
-    tts.infer(voice, text, output_path)
     return output_path
-def gen_single(prompt, text):
-    output_path = infer(prompt, text)
-    return gr.update(value=output_path,visible=True)
-def update_prompt_audio():
-    update_button = gr.update(interactive=True)
-    return update_button
-with gr.Blocks() as demo:
-    mutex = threading.Lock()
-    gr.HTML('''
-    <h2><center>IndexTTS: An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System</h2>
-<p align="center">
-<a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>
-    ''')
-    with gr.Tab("音频生成"):
-        with gr.Row():
-            os.makedirs("prompts",exist_ok=True)
-            prompt_audio = gr.Audio(label="请上传参考音频",key="prompt_audio",
-                                    sources=["upload","microphone"],type="filepath")
-            prompt_list = os.listdir("prompts")
-            default = ''
-            if prompt_list:
-                default = prompt_list[0]
-            input_text_single = gr.Textbox(label="请输入目标文本",key="input_text_single")
-            gen_button = gr.Button("生成语音",key="gen_button",interactive=True)
-            output_audio = gr.Audio(label="生成结果", visible=False,key="output_audio")
-    prompt_audio.upload(update_prompt_audio,
-                         inputs=[],
-                         outputs=[gen_button])
-    gen_button.click(gen_single,
-                     inputs=[prompt_audio, input_text_single],
-                     outputs=[output_audio])
-def main():
-    tts.load_normalizer()
-    demo.queue(20)
-    demo.launch(server_name="0.0.0.0")
 if __name__ == "__main__":
-    main()

+import streamlit as st
 import os
 import time
 import sys
+import torch
 from huggingface_hub import snapshot_download
 current_dir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(current_dir)
 sys.path.append(os.path.join(current_dir, "indextts"))
 from indextts.infer import IndexTTS
 from tools.i18n.i18n import I18nAuto
+# Initialize internationalization
+i18n = I18nAuto(language="en")  # Changed to English
+# GPU configuration
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# App configuration
+st.set_page_config(page_title="echoAI - IndexTTS", layout="wide")
+# Create necessary directories
+os.makedirs("outputs/tasks", exist_ok=True)
+os.makedirs("prompts", exist_ok=True)
+# Download checkpoints if not exists
+if not os.path.exists("checkpoints"):
+    snapshot_download("IndexTeam/IndexTTS-1.5", local_dir="checkpoints")
+# Load TTS model with GPU support
+@st.cache_resource
+def load_model():
+    tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
+    tts.load_normalizer()
+    if DEVICE == "cuda":
+        tts.model.to(DEVICE)  # Move model to GPU if available
+    return tts
+tts = load_model()
+# Inference function with device awareness
+def infer(voice_path, text, output_path=None):
     if not output_path:
         output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav")
+    # Ensure input is on correct device
+    tts.infer(voice_path, text, output_path)
     return output_path
+# Streamlit UI
+st.title("echoAI - IndexTTS")
+st.markdown("""
+<h4 style='text-align: center;'>
+    An Industrial-Level Controllable and Efficient Zero-Shot Text-To-Speech System
+</h4>
+<p style='text-align: center;'>
+    <a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a>
+</p>
+""", unsafe_allow_html=True)
+# Device status indicator
+st.sidebar.markdown(f"**Device:** {DEVICE.upper()}")
+# Main interface
+with st.container():
+    st.header("Audio Generation")  # Translated
+    col1, col2 = st.columns(2)
+    with col1:
+        uploaded_audio = st.file_uploader(
+            "Upload reference audio",  # Translated
+            type=["wav", "mp3", "ogg"],
+            accept_multiple_files=False
+        )
+        input_text = st.text_area(
+            "Input target text",  # Translated
+            height=150,
+            placeholder="Enter text to synthesize..."
+        )
+        generate_btn = st.button("Generate Speech")  # Translated
+    with col2:
+        if generate_btn and uploaded_audio and input_text:
+            with st.spinner("Generating audio..."):
+                # Save uploaded audio
+                audio_path = os.path.join("prompts", uploaded_audio.name)
+                with open(audio_path, "wb") as f:
+                    f.write(uploaded_audio.getbuffer())
+                # Perform inference
+                try:
+                    output_path = infer(audio_path, input_text)
+                    st.audio(output_path, format="audio/wav")
+                    st.success("Generation complete!")
+                    # Download button
+                    with open(output_path, "rb") as f:
+                        st.download_button(
+                            "Download Result",  # Translated
+                            f,
+                            file_name=os.path.basename(output_path)
+                except Exception as e:
+                    st.error(f"Error: {str(e)}")
+        elif generate_btn:
+            st.warning("Please upload an audio file and enter text first!")  # Translated
+# Sidebar with additional info
+with st.sidebar:
+    st.header("About echoAI")
+    st.markdown("""
+    ### Key Features:
+    - Zero-shot voice cloning
+    - Industrial-grade TTS
+    - Efficient synthesis
+    - Controllable output
+    """)
+    st.markdown("---")
+    st.markdown("""
+    ### Usage Instructions:
+    1. Upload a reference audio clip
+    2. Enter target text
+    3. Click 'Generate Speech'
+    """)
 if __name__ == "__main__":
+    # Cleanup old files if needed
+    pass