|
|
|
|
|
|
|
import gradio as gr |
|
import soundfile as sf |
|
import os |
|
from transformers import pipeline |
|
|
|
asr = pipeline(task="automatic-speech-recognition", |
|
model="distil-whisper/distil-small.en") |
|
|
|
def transcribe_speech(audio_filepath): |
|
if audio_filepath is None: |
|
gr.Warning('No audio found. Please try again!') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audio, sr = sf.read(audio_filepath) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result = asr( |
|
{"array": audio, "sampling_rate": sr} |
|
) |
|
|
|
|
|
|
|
|
|
|
|
return result['text'] |
|
|
|
|
|
mic_transcribe = gr.Interface( |
|
fn=transcribe_speech, |
|
inputs=gr.Audio( |
|
sources="microphone", |
|
type="filepath", |
|
label="π€ Speak into your microphone" |
|
), |
|
outputs=gr.Textbox( |
|
label="π Transcription Result", |
|
lines=4, |
|
placeholder="Your transcribed text will appear here..." |
|
), |
|
flagging_mode="never", |
|
description="Record your voice directly using your device's microphone. Get an instant transcription." |
|
) |
|
|
|
|
|
file_transcribe = gr.Interface( |
|
fn=transcribe_speech, |
|
inputs=gr.Audio( |
|
sources="upload", |
|
type="filepath", |
|
label="π Upload an Audio File" |
|
), |
|
outputs=gr.Textbox( |
|
label="π Transcription Result", |
|
lines=4, |
|
placeholder="Upload an audio file (e.g., .wav, .mp3) to get its transcription." |
|
), |
|
flagging_mode="never", |
|
description="Upload an audio file for transcription." |
|
) |
|
|
|
|
|
custom_css = """ |
|
/* Import Google Font - Arial (or a very similar sans-serif if Arial isn't universally available on all systems) */ |
|
/* Note: Arial is typically a system font, so direct import isn't strictly necessary for it to work, |
|
but it's good practice for other fonts. */ |
|
@import url('https://fonts.googleapis.com/css2?family=Arial:wght@400;700&display=swap'); |
|
|
|
/* Apply Arial to ALL text elements by default within the Gradio container */ |
|
.gradio-container, body, button, input, select, textarea, div, p, span, h1, h2, h3, h4, h5, h6 { |
|
font-family: 'Arial', sans-serif !important; |
|
} |
|
|
|
/* Overall container styling */ |
|
.gradio-container { |
|
max-width: 900px; /* Limit overall width for better readability */ |
|
margin: 30px auto; /* Center the app on the page */ |
|
padding: 30px; |
|
border-radius: 15px; /* Rounded corners for a softer look */ |
|
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.1); /* Subtle shadow for depth */ |
|
background-color: #ffffff; /* White background for the main content area */ |
|
} |
|
|
|
/* Titles and Headers */ |
|
h1 { |
|
color: #34495e; /* Darker blue-grey for main title */ |
|
text-align: center; |
|
font-size: 2.5em; /* Larger main title */ |
|
margin-bottom: 10px; |
|
font-weight: 700; /* Bold */ |
|
} |
|
|
|
h3 { |
|
color: #5d6d7e; /* Slightly lighter blue-grey for subtitle */ |
|
text-align: center; |
|
font-size: 1.2em; |
|
margin-top: 0; |
|
margin-bottom: 25px; |
|
} |
|
|
|
p { |
|
text-align: center; |
|
color: #7f8c8d; /* Muted grey for descriptions */ |
|
font-size: 0.95em; |
|
margin-bottom: 20px; |
|
} |
|
|
|
/* Tabbed Interface Styling */ |
|
.tabs { |
|
border-radius: 10px; |
|
overflow: hidden; /* Ensures rounded corners on tabs */ |
|
margin-bottom: 20px; |
|
} |
|
|
|
.tab-nav button { |
|
background-color: #ecf0f1; /* Light grey for inactive tabs */ |
|
color: #34495e; /* Dark text for inactive tabs */ |
|
font-weight: bold; |
|
padding: 12px 20px; |
|
border-radius: 8px 8px 0 0; |
|
margin-right: 5px; /* Small space between tabs */ |
|
transition: all 0.3s ease; |
|
} |
|
|
|
.tab-nav button.selected { |
|
background-color: #4a90e2; /* Vibrant blue for active tab */ |
|
color: white; /* White text for active tab */ |
|
box-shadow: 0 4px 10px rgba(74, 144, 226, 0.3); /* Subtle shadow for active tab */ |
|
} |
|
|
|
/* Input and Output Component Styling (General) */ |
|
.gr-box { |
|
border-radius: 10px; /* Rounded corners for input/output boxes */ |
|
border: 1px solid #dfe6e9; /* Light border */ |
|
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05); /* Very subtle shadow */ |
|
padding: 20px; |
|
background-color: #fcfcfc; /* Slightly off-white background */ |
|
} |
|
|
|
/* Labels within components (e.g., "Upload Audio File", "Transcription Result") */ |
|
.label { |
|
font-weight: bold; |
|
color: #2c3e50; /* Dark text for labels */ |
|
font-size: 1.1em; |
|
margin-bottom: 8px; |
|
} |
|
|
|
/* Buttons (Clear, Submit) */ |
|
.gr-button { |
|
background-color: #4a90e2 !important; /* Primary blue for actions */ |
|
color: white !important; |
|
border: none !important; |
|
border-radius: 8px !important; /* Rounded buttons */ |
|
padding: 12px 25px !important; |
|
font-weight: bold !important; |
|
transition: background-color 0.3s ease, box-shadow 0.3s ease !important; |
|
margin: 5px; /* Spacing between buttons */ |
|
} |
|
|
|
.gr-button:hover { |
|
background-color: #3a7bd2 !important; /* Darker blue on hover */ |
|
box-shadow: 0 4px 15px rgba(74, 144, 226, 0.4) !important; |
|
} |
|
|
|
/* Clear button specific */ |
|
.gr-button.secondary { |
|
background-color: #e0e6eb !important; /* Lighter grey for clear */ |
|
color: #34495e !important; |
|
} |
|
.gr-button.secondary:hover { |
|
background-color: #d1d8df !important; |
|
box-shadow: none !important; |
|
} |
|
|
|
/* Textbox specific */ |
|
textarea { |
|
border-radius: 8px !important; |
|
border: 1px solid #bdc3c7 !important; |
|
padding: 10px !important; |
|
resize: vertical; /* Allow vertical resizing */ |
|
} |
|
|
|
/* Audio component player */ |
|
.gr-audio-player { |
|
border-radius: 8px; |
|
background-color: #f0f0f0; |
|
padding: 10px; |
|
} |
|
|
|
/* Footer styling */ |
|
hr { |
|
border: none; |
|
border-top: 1px solid #e0e0e0; |
|
margin-top: 30px; |
|
margin-bottom: 15px; |
|
} |
|
|
|
.footer-text { |
|
font-size: 0.85em; |
|
color: #a0a0a0; |
|
text-align: center; |
|
} |
|
""" |
|
|
|
|
|
|
|
demo = gr.Blocks( |
|
theme=gr.themes.Soft(), |
|
css=custom_css |
|
) |
|
|
|
|
|
with demo: |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
<center> |
|
<h1 style="color: #4A90E2;"> |
|
ποΈ AI-Powered Speech-to-Text Transcriber π |
|
</h1> |
|
<h3 style="color: #6C7A89;"> |
|
Developed by Muhammad Farhan Aslam. |
|
</h3> |
|
<h3 style="color: #6C7A89;"> |
|
Convert spoken words into accurate text with ease and precision. |
|
</h3> |
|
<p style="color: #8C9CA7; font-size: 1.05em;"> |
|
Effortlessly transcribe audio from your microphone or by uploading a file. |
|
This application leverages advanced AI to provide clear and reliable transcriptions. |
|
</p> |
|
</center> |
|
""" |
|
) |
|
|
|
|
|
gr.TabbedInterface( |
|
[file_transcribe, mic_transcribe], |
|
["π Transcribe Audio File", "π€ Transcribe from Microphone"], |
|
) |
|
|
|
|
|
gr.Markdown( |
|
""" |
|
<hr> |
|
<p class="footer-text"> |
|
Built with β€οΈ and Gradio on Hugging Face Transformers. |
|
</p> |
|
""" |
|
) |
|
|
|
demo.launch(share=True) |
|
|