Spaces:

11mlabs
/

IndriVoice

Running

App Files Files Community

skriller18 commited on Nov 26, 2024

Commit

c39573e

1 Parent(s): 7c4c876

Changes

Browse files

Files changed (1) hide show

app.py +24 -16

app.py CHANGED Viewed

@@ -16,27 +16,35 @@ pipe = pipeline(
 st.title("Indri")
 st.subheader("Ultrafast multi-modal AI")
 speakers = {
-    "[spkr_63] : 🇬🇧 👨 book reader",
-    "[spkr_67] : 🇺🇸 👨 influencer",
-    "[spkr_68] : 🇮🇳 👨 book reader",
-    "[spkr_69] : 🇮🇳 👨 book reader",
-    "[spkr_70] : 🇮🇳 👨 motivational speaker",
-    "[spkr_62] : 🇮🇳 👨 book reader heavy",
-    "[spkr_53] : 🇮🇳 👩 recipe reciter",
-    "[spkr_60] : 🇮🇳 👩 book reader",
-    "[spkr_74] : 🇺🇸 👨 book reader",
-    "[spkr_75] : 🇮🇳 👨 entrepreneur",
-    "[spkr_76] : 🇬🇧 👨 nature lover",
-    "[spkr_77] : 🇮🇳 👨 influencer",
-    "[spkr_66] : 🇮🇳 👨 politician"
 }
-speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])
-text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)
-if st.button("Generate Audio"):
     if text_input:
         output = pipe([text_input], speaker=speaker_id)
         torchaudio.save('output.wav', output[0]['audio'][0], sample_rate=24000)

 st.title("Indri")
 st.subheader("Ultrafast multi-modal AI")
+# Add some spacing and a description
+st.markdown("<h3 style='text-align: center;'>Text-to-Speech Application</h3>", unsafe_allow_html=True)
+st.markdown("<p style='text-align: center;'>Select a speaker and enter text to generate audio.</p>", unsafe_allow_html=True)
 speakers = {
+    "[spkr_63]" : "🇬🇧 👨 book reader",
+    "[spkr_67]" : "🇺🇸 👨 influencer",
+    "[spkr_68]" : "🇮🇳 👨 book reader",
+    "[spkr_69]" : "🇮🇳 👨 book reader",
+    "[spkr_70]" : "🇮🇳 👨 motivational speaker",
+    "[spkr_62]" : "🇮🇳 👨 book reader heavy",
+    "[spkr_53]" : "🇮🇳 👩 recipe reciter",
+    "[spkr_60]" : "🇮🇳 👩 book reader",
+    "[spkr_74]" : "🇺🇸 👨 book reader",
+    "[spkr_75]" : "🇮🇳 👨 entrepreneur",
+    "[spkr_76]" : "🇬🇧 👨 nature lover",
+    "[spkr_77]" : "🇮🇳 👨 influencer",
+    "[spkr_66]" : "🇮🇳 👨 politician"
 }
+# Create a container for the speaker selection and text input
+with st.container():
+    st.markdown("### Speaker Selection")
+    speaker_id = st.selectbox("Select a speaker:", options=list(speakers.keys()), format_func=lambda x: speakers[x])
+    st.markdown("### Text Input")
+    text_input = st.text_area("Enter text for TTS (max 200 characters):", max_chars=200)
+if st.button("Generate Audio", key="generate_audio"):
     if text_input:
         output = pipe([text_input], speaker=speaker_id)
         torchaudio.save('output.wav', output[0]['audio'][0], sample_rate=24000)