ENGLISH-Speaking-Scoring

Sleeping

App Files Files Community

aiqcamp commited on May 5

Commit

0dbafab

verified ·

1 Parent(s): bbf01ee

Update app.py

Browse files

Files changed (1) hide show

app.py +345 -21

app.py CHANGED Viewed

@@ -1,6 +1,23 @@
 import gradio as gr
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 # Load Whisper for ASR
 asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
@@ -13,36 +30,343 @@ grammar_pipeline = pipeline("text-classification", model=cola_model, tokenizer=c
 # Load Grammar Correction Model (T5)
 correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
 def process_audio(audio):
     if audio is None:
-        return "No audio provided.", "", ""
     # Step 1: Transcription
-    transcription = asr_pipeline(audio)["text"]
     # Step 2: Grammar Scoring
     score_output = grammar_pipeline(transcription)[0]
     label = score_output["label"]
     confidence = score_output["score"]
     # Step 3: Grammar Correction
     corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
-    return transcription, f"{label} ({confidence:.2f})", corrected
-demo = gr.Interface(
-    fn=process_audio,
-    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath", label="🎤 Speak or Upload Audio (.wav)"),
-    outputs=[
-        gr.Textbox(label="📝 Transcription"),
-        gr.Textbox(label="✅ Grammar Score"),
-        gr.Textbox(label="✍️ Grammar Correction")
-    ],
-    title="🎙️ Voice Grammar Scorer",
-    description="Record or upload a WAV file. This app transcribes your voice, scores its grammar, and suggests corrections.",
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import numpy as np
+import matplotlib.pyplot as plt
+import time
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
+import pandas as pd
+from sklearn.feature_extraction.text import CountVectorizer
+import nltk
+from nltk.tokenize import word_tokenize
+import re
+# Download necessary NLTK data
+try:
+    nltk.data.find('tokenizers/punkt')
+except LookupError:
+    nltk.download('punkt')
+try:
+    nltk.data.find('taggers/averaged_perceptron_tagger')
+except LookupError:
+    nltk.download('averaged_perceptron_tagger')
 # Load Whisper for ASR
 asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3")
 # Load Grammar Correction Model (T5)
 correction_pipeline = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
+# Add sentiment analysis
+sentiment_pipeline = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+# Add fluency analysis (using BERT)
+fluency_pipeline = pipeline("text-classification", model="textattack/bert-base-uncased-CoLA")
+# Common English filler words to detect
+FILLER_WORDS = ["um", "uh", "like", "you know", "actually", "basically", "literally",
+                "sort of", "kind of", "i mean", "so", "well", "right", "okay", "yeah"]
+def count_filler_words(text):
+    """Count filler words in the text"""
+    text = text.lower()
+    count = 0
+    for word in FILLER_WORDS:
+        count += len(re.findall(r'\b' + word + r'\b', text))
+    return count, count / max(len(text.split()), 1)  # Count and ratio
+def calculate_speaking_rate(text, duration):
+    """Calculate words per minute"""
+    if duration <= 0:
+        return 0
+    words = len(text.split())
+    return (words / duration) * 60  # Words per minute
+def analyze_vocabulary_richness(text):
+    """Analyze vocabulary richness"""
+    words = word_tokenize(text.lower())
+    if not words:
+        return 0, 0
+    # Vocabulary richness (unique words / total words)
+    unique_words = set(words)
+    richness = len(unique_words) / len(words)
+    # POS tagging to see variety of word types used
+    pos_tags = nltk.pos_tag(words)
+    pos_counts = {}
+    for _, tag in pos_tags:
+        pos_counts[tag] = pos_counts.get(tag, 0) + 1
+    return richness, pos_counts
+def analyze_sentence_complexity(text):
+    """Analyze sentence complexity"""
+    sentences = re.split(r'[.!?]+', text)
+    sentences = [s.strip() for s in sentences if s.strip()]
+    if not sentences:
+        return 0, 0
+    # Average words per sentence
+    words_per_sentence = [len(s.split()) for s in sentences]
+    avg_words = sum(words_per_sentence) / len(sentences)
+    # Sentence length variation (standard deviation)
+    sentence_length_variation = np.std(words_per_sentence) if len(sentences) > 1 else 0
+    return avg_words, sentence_length_variation
+def create_detailed_feedback(transcription, grammar_score, corrected_text,
+                            sentiment, fluency, filler_ratio, speaking_rate,
+                            vocabulary_richness, avg_words_per_sentence):
+    """Create detailed feedback based on all metrics"""
+    feedback = []
+    # Grammar feedback
+    if "acceptable" in grammar_score.lower():
+        feedback.append("✅ Your grammar is good!")
+    else:
+        feedback.append("❗ Your grammar needs improvement. Check the corrections provided.")
+    # Fluency feedback
+    if fluency > 0.7:
+        feedback.append("✅ Your speech flows naturally.")
+    else:
+        feedback.append("❗ Work on making your speech more fluid and natural.")
+    # Filler words feedback
+    if filler_ratio > 0.1:
+        feedback.append(f"❗ You used too many filler words ({filler_ratio:.1%} of your words).")
+    else:
+        feedback.append("✅ Good job minimizing filler words!")
+    # Speaking rate feedback
+    if 120 <= speaking_rate <= 160:
+        feedback.append(f"✅ Your speaking pace is good ({speaking_rate:.0f} words/min).")
+    elif speaking_rate < 120:
+        feedback.append(f"❗ Try speaking a bit faster ({speaking_rate:.0f} words/min is slower than ideal).")
+    else:
+        feedback.append(f"❗ Try speaking a bit slower ({speaking_rate:.0f} words/min is faster than ideal).")
+    # Vocabulary feedback
+    if vocabulary_richness > 0.6:
+        feedback.append("✅ Excellent vocabulary diversity!")
+    elif vocabulary_richness > 0.4:
+        feedback.append("✅ Good vocabulary usage.")
+    else:
+        feedback.append("❗ Try using more varied vocabulary.")
+    # Sentence complexity feedback
+    if 10 <= avg_words_per_sentence <= 20:
+        feedback.append("✅ Good sentence structure and length.")
+    elif avg_words_per_sentence < 10:
+        feedback.append("❗ Try using more complex sentences occasionally.")
+    else:
+        feedback.append("❗ Your sentences are quite long. Consider varying your sentence length.")
+    # Overall sentiment feedback
+    if sentiment == "POSITIVE":
+        feedback.append("✅ Your tone is positive and engaging.")
+    else:
+        feedback.append("ℹ️ Your tone is neutral/negative. Consider if this matches your intent.")
+    return "\n".join(feedback)
 def process_audio(audio):
     if audio is None:
+        return {
+            "transcription": "No audio provided.",
+            "grammar_score": "",
+            "corrected": "",
+            "feedback": "",
+            "metrics_chart": None,
+            "detailed_analysis": ""
+        }
+    start_time = time.time()
+    # Get audio duration (assuming audio[1] contains the sample rate)
+    sample_rate = 16000  # Default if we can't determine
+    if isinstance(audio, tuple) and len(audio) > 1:
+        sample_rate = audio[1]
+    # For file uploads, we need to handle differently
+    if isinstance(audio, str):
+        # This is a file path
+        import librosa
+        y, sr = librosa.load(audio, sr=None)
+        duration = librosa.get_duration(y=y, sr=sr)
+    else:
+        # Assuming a tuple with (samples, sample_rate)
+        try:
+            duration = len(audio[0]) / sample_rate if sample_rate > 0 else 0
+        except:
+            duration = 0
     # Step 1: Transcription
+    transcription_result = asr_pipeline(audio)
+    transcription = transcription_result["text"]
     # Step 2: Grammar Scoring
     score_output = grammar_pipeline(transcription)[0]
     label = score_output["label"]
     confidence = score_output["score"]
+    grammar_score = f"{label} ({confidence:.2f})"
     # Step 3: Grammar Correction
     corrected = correction_pipeline(transcription, max_length=128)[0]["generated_text"]
+    # Step 4: Sentiment Analysis
+    sentiment_result = sentiment_pipeline(transcription)[0]
+    sentiment = sentiment_result["label"]
+    sentiment_score = sentiment_result["score"]
+    # Step 5: Fluency Analysis
+    fluency_result = fluency_pipeline(transcription)[0]
+    fluency_score = fluency_result["score"] if fluency_result["label"] == "acceptable" else 1 - fluency_result["score"]
+    # Step 6: Filler Words Analysis
+    filler_count, filler_ratio = count_filler_words(transcription)
+    # Step 7: Speaking Rate
+    speaking_rate = calculate_speaking_rate(transcription, duration)
+    # Step 8: Vocabulary Richness
+    vocab_richness, pos_counts = analyze_vocabulary_richness(transcription)
+    # Step 9: Sentence Complexity
+    avg_words, sentence_variation = analyze_sentence_complexity(transcription)
+    # Create feedback
+    feedback = create_detailed_feedback(
+        transcription, grammar_score, corrected, sentiment,
+        fluency_score, filler_ratio, speaking_rate, vocab_richness, avg_words
+    )
+    # Create metrics visualization
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Define metrics for radar chart
+    categories = ['Grammar', 'Fluency', 'Vocabulary', 'Speaking Rate', 'Clarity']
+    # Normalize scores between 0 and 1
+    grammar_norm = confidence if label == "acceptable" else 1 - confidence
+    speaking_rate_norm = max(0, min(1, 1 - abs((speaking_rate - 140) / 100)))  # Optimal around 140 wpm
+    values = [
+        grammar_norm,
+        fluency_score,
+        vocab_richness,
+        speaking_rate_norm,
+        1 - filler_ratio  # Lower filler ratio is better
+    ]
+    # Complete the loop for the radar chart
+    values += values[:1]
+    categories += categories[:1]
+    # Convert to radians and plot
+    angles = np.linspace(0, 2*np.pi, len(categories), endpoint=False).tolist()
+    angles += angles[:1]
+    ax.plot(angles, values, linewidth=2, linestyle='solid')
+    ax.fill(angles, values, alpha=0.25)
+    ax.set_yticklabels([])
+    ax.set_xticks(angles[:-1])
+    ax.set_xticklabels(categories[:-1])
+    ax.grid(True)
+    plt.title('Speaking Performance Metrics', size=15, color='navy', y=1.1)
+    # Create detailed analysis text
+    processing_time = time.time() - start_time
+    detailed_analysis = f"""
+    ## Detailed Speech Analysis
+    **Processing Time:** {processing_time:.2f} seconds
+    **Audio Duration:** {duration:.2f} seconds
+    ### Metrics:
+    - **Grammar Score:** {confidence:.2f} ({label})
+    - **Fluency Score:** {fluency_score:.2f}
+    - **Speaking Rate:** {speaking_rate:.1f} words per minute
+    - **Vocabulary Richness:** {vocab_richness:.2f} (higher is better)
+    - **Filler Words:** {filler_count} occurrences ({filler_ratio:.1%} of speech)
+    - **Avg Words Per Sentence:** {avg_words:.1f}
+    - **Sentiment:** {sentiment} ({sentiment_score:.2f})
+    ### Word Types Used:
+    {', '.join([f"{k}: {v}" for k, v in sorted(pos_counts.items(), key=lambda x: x[1], reverse=True)[:5]])}
+    """
+    return {
+        "transcription": transcription,
+        "grammar_score": grammar_score,
+        "corrected": corrected,
+        "feedback": feedback,
+        "metrics_chart": fig,
+        "detailed_analysis": detailed_analysis
+    }
+# Create theme
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="indigo",
+).set(
+    button_primary_background_fill="*primary_500",
+    button_primary_background_fill_hover="*primary_600",
+    button_primary_text_color="white",
+    block_title_text_weight="600",
+    block_border_width="2px",
+    block_shadow="0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1)",
 )
+with gr.Blocks(theme=theme, css="""
+    .container { max-width: 1000px; margin: auto; }
+    .header { text-align: center; margin-bottom: 20px; }
+    .header h1 { color: #1e40af; font-size: 2.5rem; }
+    .header p { color: #6b7280; font-size: 1.1rem; }
+    .footer { text-align: center; margin-top: 30px; color: #6b7280; }
+    .tips-box { background-color: #f0f9ff; border-radius: 10px; padding: 15px; margin: 10px 0; }
+    .score-card { border: 2px solid #dbeafe; border-radius: 10px; padding: 10px; }
+""") as demo:
+    gr.HTML("""
+    <div class="header">
+        <h1>🎙️ Advanced ENGLISH Speaking Assessment</h1>
+        <p>Record or upload your speech to receive comprehensive feedback on your English speaking skills</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["microphone", "upload"],
+                type="filepath",
+                label="🎤 Speak or Upload Audio"
+            )
+            with gr.Accordion("Speaking Tips", open=False):
+                gr.HTML("""
+                <div class="tips-box">
+                    <h4>Tips for Better Results:</h4>
+                    <ul>
+                        <li>Speak clearly and at a moderate pace</li>
+                        <li>Minimize background noise</li>
+                        <li>Try to speak for at least 20-30 seconds</li>
+                        <li>Avoid filler words like "um", "uh", "like"</li>
+                        <li>Practice with both prepared and impromptu topics</li>
+                    </ul>
+                </div>
+                """)
+            submit_btn = gr.Button("Analyze Speech", variant="primary")
+    with gr.Row():
+        with gr.Column():
+            transcription_output = gr.Textbox(label="📝 Transcription", lines=3)
+            corrected_output = gr.Textbox(label="✍️ Grammar Correction", lines=3)
+            grammar_score_output = gr.Textbox(label="✅ Grammar Score")
+    with gr.Row():
+        with gr.Column():
+            metrics_chart = gr.Plot(label="Performance Metrics")
+        with gr.Column():
+            feedback_output = gr.Textbox(label="💬 Feedback", lines=8)
+    with gr.Accordion("Detailed Analysis", open=False):
+        detailed_analysis = gr.Markdown()
+    gr.HTML("""
+    <div class="footer">
+        <p>This tool provides an assessment of your spoken English. For professional evaluation, consult a qualified language instructor.</p>
+    </div>
+    """)
+    submit_btn.click(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs={
+            "transcription": transcription_output,
+            "grammar_score": grammar_score_output,
+            "corrected": corrected_output,
+            "feedback": feedback_output,
+            "metrics_chart": metrics_chart,
+            "detailed_analysis": detailed_analysis
+        }
+    )
+if __name__ == "__main__":
+    demo.launch()