|
|
|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModel, T5ForConditionalGeneration |
|
import json |
|
import re |
|
import ast |
|
import time |
|
from typing import Dict, List, Any, Optional |
|
import logging |
|
import traceback |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class SemanticAnalyzer: |
|
def __init__(self): |
|
logger.info("π Initializing CodeLab Semantic Analyzer...") |
|
self.models_loaded = False |
|
|
|
|
|
try: |
|
|
|
logger.info("π Loading CodeBERT...") |
|
self.codebert_tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base") |
|
self.codebert_model = AutoModel.from_pretrained("microsoft/codebert-base") |
|
|
|
|
|
logger.info("π§ Loading CodeT5...") |
|
self.codet5_tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base") |
|
self.codet5_model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-base") |
|
|
|
|
|
self.codebert_model.eval() |
|
self.codet5_model.eval() |
|
|
|
self.models_loaded = True |
|
logger.info("β
All models loaded successfully!") |
|
|
|
except Exception as e: |
|
logger.error(f"β Error loading models: {str(e)}") |
|
self.models_loaded = False |
|
|
|
|
|
def generate_code_embedding(self, code: str) -> List[float]: |
|
"""Generate semantic embedding using CodeBERT""" |
|
if not self.models_loaded: |
|
logger.warning("β οΈ Models not loaded, returning zero embedding") |
|
return [0.0] * 768 |
|
|
|
try: |
|
|
|
cleaned_code = self._clean_code_for_analysis(code) |
|
|
|
|
|
inputs = self.codebert_tokenizer( |
|
cleaned_code, |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True, |
|
padding=True |
|
) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = self.codebert_model(**inputs) |
|
|
|
embedding = outputs.last_hidden_state[:, 0, :].squeeze() |
|
|
|
|
|
embedding_norm = torch.nn.functional.normalize(embedding, dim=0) |
|
|
|
return embedding_norm.tolist() |
|
|
|
except Exception as e: |
|
logger.error(f"β Error generating embedding: {str(e)}") |
|
return [0.0] * 768 |
|
|
|
def analyze_with_codet5(self, code: str, question_text: str) -> Dict[str, Any]: |
|
"""Enhanced code analysis using CodeT5""" |
|
if not self.models_loaded: |
|
return self._fallback_analysis(code) |
|
|
|
try: |
|
results = {} |
|
|
|
|
|
summarize_input = f"Summarize the following Python function: {code}" |
|
inputs = self.codet5_tokenizer( |
|
summarize_input, |
|
return_tensors="pt", |
|
max_length=512, |
|
truncation=True |
|
) |
|
|
|
with torch.no_grad(): |
|
summary_ids = self.codet5_model.generate( |
|
inputs.input_ids, |
|
max_length=100, |
|
num_beams=3, |
|
early_stopping=True, |
|
do_sample=False, |
|
pad_token_id=self.codet5_tokenizer.pad_token_id |
|
) |
|
|
|
results['code_summary'] = self.codet5_tokenizer.decode( |
|
summary_ids[0], |
|
skip_special_tokens=True |
|
) |
|
|
|
|
|
results['logic_patterns'] = self.extract_logic_patterns_enhanced(code) |
|
results['approach_analysis'] = self.analyze_approach_enhanced(code) |
|
results['complexity_analysis'] = self.analyze_complexity_enhanced(code) |
|
results['semantic_quality'] = self.assess_semantic_quality(code) |
|
|
|
return results |
|
|
|
except Exception as e: |
|
logger.error(f"β Error in CodeT5 analysis: {str(e)}") |
|
return self._fallback_analysis(code) |
|
|
|
def _fallback_analysis(self, code: str) -> Dict[str, Any]: |
|
"""Fallback analysis when AI models fail""" |
|
lines_count = len(code.split('\n')) |
|
return { |
|
'code_summary': f'Python function with {lines_count} lines', |
|
'logic_patterns': self.extract_logic_patterns_enhanced(code), |
|
'approach_analysis': self.analyze_approach_enhanced(code), |
|
'complexity_analysis': self.analyze_complexity_enhanced(code), |
|
'semantic_quality': self.assess_semantic_quality(code) |
|
} |
|
|
|
def extract_logic_patterns_enhanced(self, code: str) -> List[str]: |
|
"""Enhanced logical pattern extraction""" |
|
patterns = [] |
|
code_lower = code.lower() |
|
|
|
|
|
if 'max(' in code: patterns.append('builtin_max') |
|
if 'min(' in code: patterns.append('builtin_min') |
|
if 'sum(' in code: patterns.append('builtin_sum') |
|
if 'len(' in code: patterns.append('length_operations') |
|
if 'sorted(' in code: patterns.append('sorting_operations') |
|
|
|
|
|
if 'for' in code and 'if' in code: patterns.append('iterative_conditional') |
|
if 'while' in code: patterns.append('loop_based') |
|
if 'def' in code: patterns.append('function_definition') |
|
if 'return' in code: patterns.append('return_statement') |
|
|
|
|
|
if re.search(r'for\s+\w+\s+in\s+range', code): patterns.append('indexed_iteration') |
|
if re.search(r'for\s+\w+\s+in\s+enumerate', code): patterns.append('indexed_enumeration') |
|
if re.search(r'if\s+.*[<>]=?.*:', code): patterns.append('comparison_logic') |
|
if re.search(r'\[.*\]', code): patterns.append('list_operations') |
|
|
|
|
|
if 'try:' in code or 'except' in code: patterns.append('error_handling') |
|
if 'if not' in code or 'if len(' in code: patterns.append('input_validation') |
|
|
|
|
|
if any(op in code for op in ['**', 'pow(', 'sqrt', 'math.']): patterns.append('mathematical_operations') |
|
|
|
return list(set(patterns)) |
|
|
|
def analyze_approach_enhanced(self, code: str) -> str: |
|
"""Enhanced algorithmic approach analysis""" |
|
|
|
if 'max(' in code and 'min(' not in code: |
|
return 'builtin_maximum_approach' |
|
elif 'min(' in code and 'max(' not in code: |
|
return 'builtin_minimum_approach' |
|
elif 'max(' in code and 'min(' in code: |
|
return 'dual_builtin_approach' |
|
elif 'sum(' in code: |
|
return 'builtin_aggregation_approach' |
|
elif 'sorted(' in code: |
|
return 'sorting_based_approach' |
|
|
|
|
|
elif 'for' in code and 'if' in code and 'range' in code: |
|
return 'indexed_iterative_approach' |
|
elif 'for' in code and 'if' in code: |
|
return 'iterative_comparison_approach' |
|
elif 'while' in code: |
|
return 'loop_based_approach' |
|
|
|
|
|
elif 'enumerate' in code: |
|
return 'enumerated_iteration_approach' |
|
elif re.search(r'def\s+\w+.*def\s+\w+', code): |
|
return 'nested_function_approach' |
|
else: |
|
return 'custom_logic_approach' |
|
|
|
def analyze_complexity_enhanced(self, code: str) -> Dict[str, str]: |
|
"""Enhanced complexity analysis""" |
|
def estimate_time_complexity(code): |
|
nested_loops = len(re.findall(r'for.*for|while.*for|for.*while', code)) |
|
single_loops = code.count('for') + code.count('while') - (nested_loops * 2) |
|
|
|
if 'max(' in code or 'min(' in code or 'sum(' in code: |
|
return 'O(n)' |
|
elif 'sorted(' in code: |
|
return 'O(n log n)' |
|
elif nested_loops >= 1: |
|
return 'O(nΒ²)' if nested_loops == 1 else 'O(nΒ³)' |
|
elif single_loops >= 1: |
|
return 'O(n)' |
|
else: |
|
return 'O(1)' |
|
|
|
def estimate_space_complexity(code): |
|
if 'sorted(' in code or re.search(r'\[.*for.*\]', code): |
|
return 'O(n)' |
|
elif '[' in code and ']' in code: |
|
return 'O(n)' |
|
else: |
|
return 'O(1)' |
|
|
|
return { |
|
'time': estimate_time_complexity(code), |
|
'space': estimate_space_complexity(code) |
|
} |
|
|
|
def assess_semantic_quality(self, code: str) -> Dict[str, Any]: |
|
"""Assess the semantic quality of code""" |
|
quality_metrics = { |
|
'readability_score': 0, |
|
'logic_clarity': 'unclear', |
|
'efficiency_level': 'low', |
|
'best_practices': [] |
|
} |
|
|
|
|
|
lines = code.split('\n') |
|
total_score = 10 |
|
|
|
|
|
if '"""' in code or "'''" in code or '#' in code: |
|
quality_metrics['best_practices'].append('documented_code') |
|
total_score += 1 |
|
|
|
|
|
if re.search(r'\b(max_val|min_val|result|answer|total)\b', code): |
|
quality_metrics['best_practices'].append('meaningful_variables') |
|
total_score += 1 |
|
|
|
|
|
if 'if not' in code or 'if len(' in code: |
|
quality_metrics['best_practices'].append('input_validation') |
|
total_score += 1 |
|
|
|
|
|
if any(builtin in code for builtin in ['max(', 'min(', 'sum(']): |
|
quality_metrics['efficiency_level'] = 'high' |
|
quality_metrics['best_practices'].append('builtin_functions') |
|
elif 'for' in code and 'if' in code: |
|
quality_metrics['efficiency_level'] = 'medium' |
|
|
|
|
|
if len(lines) <= 10 and 'def' in code and 'return' in code: |
|
quality_metrics['logic_clarity'] = 'clear' |
|
elif len(lines) <= 20: |
|
quality_metrics['logic_clarity'] = 'moderate' |
|
|
|
quality_metrics['readability_score'] = min(10, max(1, total_score)) |
|
|
|
return quality_metrics |
|
|
|
def generate_optimal_solution(self, question_text: str, question_type: str = "auto_detect") -> Dict[str, Any]: |
|
"""Enhanced optimal solution generation""" |
|
try: |
|
question_lower = question_text.lower() |
|
|
|
|
|
if 'max' in question_lower and 'min' not in question_lower: |
|
return { |
|
'code': 'def find_max(numbers):\n """Find maximum value in a list"""\n if not numbers:\n return None\n return max(numbers)', |
|
'explanation': 'Optimal solution using built-in max() function with input validation', |
|
'approach': 'builtin_optimized', |
|
'complexity': {'time': 'O(n)', 'space': 'O(1)'}, |
|
'generated_by': 'pattern_optimized', |
|
'quality_score': 10 |
|
} |
|
elif 'min' in question_lower and 'max' not in question_lower: |
|
return { |
|
'code': 'def find_min(numbers):\n """Find minimum value in a list"""\n if not numbers:\n return None\n return min(numbers)', |
|
'explanation': 'Optimal solution using built-in min() function with input validation', |
|
'approach': 'builtin_optimized', |
|
'complexity': {'time': 'O(n)', 'space': 'O(1)'}, |
|
'generated_by': 'pattern_optimized', |
|
'quality_score': 10 |
|
} |
|
elif 'sum' in question_lower or 'total' in question_lower: |
|
return { |
|
'code': 'def calculate_sum(numbers):\n """Calculate sum of numbers in a list"""\n return sum(numbers)', |
|
'explanation': 'Optimal solution using built-in sum() function', |
|
'approach': 'builtin_optimized', |
|
'complexity': {'time': 'O(n)', 'space': 'O(1)'}, |
|
'generated_by': 'pattern_optimized', |
|
'quality_score': 10 |
|
} |
|
else: |
|
|
|
if self.models_loaded: |
|
return self._ai_generate_solution(question_text) |
|
else: |
|
return self._template_solution(question_text) |
|
|
|
except Exception as e: |
|
logger.error(f"β Error generating optimal solution: {str(e)}") |
|
return self._template_solution(question_text) |
|
|
|
def _ai_generate_solution(self, question_text: str) -> Dict[str, Any]: |
|
"""AI-based solution generation using CodeT5""" |
|
try: |
|
generate_input = f"Generate optimal Python function for: {question_text}" |
|
inputs = self.codet5_tokenizer( |
|
generate_input, |
|
return_tensors="pt", |
|
max_length=256, |
|
truncation=True |
|
) |
|
|
|
with torch.no_grad(): |
|
generated_ids = self.codet5_model.generate( |
|
inputs.input_ids, |
|
max_length=200, |
|
num_beams=3, |
|
early_stopping=True, |
|
do_sample=False, |
|
pad_token_id=self.codet5_tokenizer.pad_token_id |
|
) |
|
|
|
generated_code = self.codet5_tokenizer.decode( |
|
generated_ids[0], |
|
skip_special_tokens=True |
|
) |
|
|
|
return { |
|
'code': generated_code, |
|
'explanation': 'AI-generated solution using CodeT5', |
|
'approach': 'ai_generated', |
|
'complexity': 'O(n)', |
|
'generated_by': 'codet5', |
|
'quality_score': 7 |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"β Error in AI generation: {str(e)}") |
|
return self._template_solution(question_text) |
|
|
|
def _template_solution(self, question_text: str) -> Dict[str, Any]: |
|
"""Template-based fallback solution""" |
|
return { |
|
'code': 'def solution(data):\n """Template solution"""\n # Implementation needed\n return data[0] if data else None', |
|
'explanation': 'Template solution - implementation needed based on specific requirements', |
|
'approach': 'template_fallback', |
|
'complexity': 'O(1)', |
|
'generated_by': 'template', |
|
'quality_score': 5 |
|
} |
|
|
|
def compare_solutions(self, student_code: str, optimal_code: str) -> Dict[str, Any]: |
|
"""Enhanced solution comparison""" |
|
try: |
|
|
|
student_embedding = self.generate_code_embedding(student_code) |
|
optimal_embedding = self.generate_code_embedding(optimal_code) |
|
|
|
|
|
similarity = self.calculate_cosine_similarity(student_embedding, optimal_embedding) |
|
|
|
|
|
student_patterns = self.extract_logic_patterns_enhanced(student_code) |
|
optimal_patterns = self.extract_logic_patterns_enhanced(optimal_code) |
|
|
|
|
|
student_approach = self.analyze_approach_enhanced(student_code) |
|
optimal_approach = self.analyze_approach_enhanced(optimal_code) |
|
|
|
|
|
student_quality = self.assess_semantic_quality(student_code) |
|
optimal_quality = self.assess_semantic_quality(optimal_code) |
|
|
|
return { |
|
'semantic_similarity': float(similarity), |
|
'student_patterns': student_patterns, |
|
'optimal_patterns': optimal_patterns, |
|
'pattern_overlap': len(set(student_patterns) & set(optimal_patterns)), |
|
'approach_comparison': { |
|
'student': student_approach, |
|
'optimal': optimal_approach, |
|
'matches': student_approach == optimal_approach |
|
}, |
|
'quality_comparison': { |
|
'student_readability': student_quality['readability_score'], |
|
'optimal_readability': optimal_quality['readability_score'], |
|
'student_efficiency': student_quality['efficiency_level'], |
|
'optimal_efficiency': optimal_quality['efficiency_level'] |
|
}, |
|
'complexity_comparison': self.compare_complexity_enhanced(student_code, optimal_code) |
|
} |
|
|
|
except Exception as e: |
|
logger.error(f"β Error comparing solutions: {str(e)}") |
|
return { |
|
'semantic_similarity': 0.0, |
|
'student_patterns': [], |
|
'optimal_patterns': [], |
|
'pattern_overlap': 0, |
|
'approach_comparison': {'error': str(e)}, |
|
'quality_comparison': {'error': str(e)}, |
|
'complexity_comparison': 'unable_to_compare' |
|
} |
|
|
|
def calculate_cosine_similarity(self, vec1: List[float], vec2: List[float]) -> float: |
|
"""Enhanced cosine similarity calculation""" |
|
try: |
|
if len(vec1) != len(vec2) or not vec1 or not vec2: |
|
return 0.0 |
|
|
|
|
|
vec1_tensor = torch.tensor(vec1) |
|
vec2_tensor = torch.tensor(vec2) |
|
|
|
|
|
similarity = torch.nn.functional.cosine_similarity( |
|
vec1_tensor.unsqueeze(0), |
|
vec2_tensor.unsqueeze(0) |
|
) |
|
|
|
return float(similarity.item()) |
|
|
|
except Exception as e: |
|
logger.error(f"β Error calculating similarity: {str(e)}") |
|
return 0.0 |
|
|
|
def compare_complexity_enhanced(self, code1: str, code2: str) -> Dict[str, Any]: |
|
"""Enhanced complexity comparison""" |
|
complexity1 = self.analyze_complexity_enhanced(code1) |
|
complexity2 = self.analyze_complexity_enhanced(code2) |
|
|
|
|
|
complexity_rank = { |
|
'O(1)': 1, 'O(log n)': 2, 'O(n)': 3, |
|
'O(n log n)': 4, 'O(nΒ²)': 5, 'O(nΒ³)': 6 |
|
} |
|
|
|
rank1 = complexity_rank.get(complexity1['time'], 999) |
|
rank2 = complexity_rank.get(complexity2['time'], 999) |
|
|
|
return { |
|
'student_complexity': complexity1, |
|
'optimal_complexity': complexity2, |
|
'efficiency_comparison': 'better' if rank1 < rank2 else 'worse' if rank1 > rank2 else 'same', |
|
'recommendation': self._get_complexity_recommendation(complexity1, complexity2) |
|
} |
|
|
|
def _get_complexity_recommendation(self, student_comp: Dict, optimal_comp: Dict) -> str: |
|
"""Generate complexity-based recommendations""" |
|
if student_comp['time'] == optimal_comp['time']: |
|
return "Excellent! Your solution has optimal time complexity" |
|
elif student_comp['time'] in ['O(nΒ²)', 'O(nΒ³)'] and optimal_comp['time'] == 'O(n)': |
|
return "Consider using built-in functions to improve from quadratic to linear complexity" |
|
elif student_comp['time'] == 'O(n)' and optimal_comp['time'] == 'O(1)': |
|
return "Good approach, but there might be a constant-time solution" |
|
else: |
|
return "Your complexity is acceptable, but optimization is possible" |
|
|
|
def _clean_code_for_analysis(self, code: str) -> str: |
|
"""Clean code for better analysis""" |
|
|
|
lines = [line.strip() for line in code.split('\n') if line.strip()] |
|
return '\n'.join(lines) |
|
|
|
|
|
analyzer = None |
|
|
|
def get_analyzer(): |
|
"""Get analyzer instance with lazy initialization""" |
|
global analyzer |
|
if analyzer is None: |
|
analyzer = SemanticAnalyzer() |
|
return analyzer |
|
|
|
def process_semantic_analysis( |
|
student_code: str, |
|
question_text: str, |
|
question_id: str = "default", |
|
need_optimal_solution: bool = True |
|
) -> str: |
|
"""Enhanced main function for semantic analysis""" |
|
start_time = time.time() |
|
|
|
try: |
|
logger.info(f"π§ Starting enhanced semantic analysis for question: {question_id}") |
|
|
|
|
|
semantic_analyzer = get_analyzer() |
|
|
|
|
|
if not student_code or not student_code.strip(): |
|
return json.dumps({ |
|
'success': False, |
|
'error': 'Empty code provided', |
|
'processing_time_ms': int((time.time() - start_time) * 1000) |
|
}) |
|
|
|
|
|
logger.info("π Generating code embedding...") |
|
code_embedding = semantic_analyzer.generate_code_embedding(student_code) |
|
|
|
|
|
logger.info("π Performing enhanced analysis...") |
|
codet5_analysis = semantic_analyzer.analyze_with_codet5(student_code, question_text) |
|
|
|
|
|
optimal_solution = None |
|
if need_optimal_solution: |
|
logger.info("π‘ Generating optimal solution...") |
|
optimal_solution = semantic_analyzer.generate_optimal_solution(question_text) |
|
|
|
|
|
comparison = None |
|
if optimal_solution: |
|
logger.info("βοΈ Performing enhanced comparison...") |
|
comparison = semantic_analyzer.compare_solutions(student_code, optimal_solution['code']) |
|
|
|
|
|
insights = generate_comprehensive_insights( |
|
student_code, |
|
codet5_analysis, |
|
comparison, |
|
optimal_solution |
|
) |
|
|
|
processing_time = time.time() - start_time |
|
|
|
|
|
results = { |
|
'success': True, |
|
'processing_time_ms': int(processing_time * 1000), |
|
'semantic_analysis': { |
|
'code_embedding': code_embedding[:100], |
|
'embedding_size': len(code_embedding), |
|
'logic_patterns': codet5_analysis['logic_patterns'], |
|
'approach_analysis': codet5_analysis['approach_analysis'], |
|
'complexity_analysis': codet5_analysis['complexity_analysis'], |
|
'semantic_quality': codet5_analysis['semantic_quality'], |
|
'code_summary': codet5_analysis['code_summary'] |
|
}, |
|
'optimal_solution': optimal_solution, |
|
'solution_comparison': comparison, |
|
'semantic_insights': insights, |
|
'recommendations': generate_recommendations(codet5_analysis, comparison), |
|
'metadata': { |
|
'question_id': question_id, |
|
'analysis_version': '3.1-enhanced-ai', |
|
'models_used': ['CodeBERT', 'CodeT5'] if semantic_analyzer.models_loaded else ['Fallback'], |
|
'models_status': 'loaded' if semantic_analyzer.models_loaded else 'fallback', |
|
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'), |
|
'processing_stage': 'semantic_analysis' |
|
} |
|
} |
|
|
|
logger.info(f"β
Enhanced semantic analysis completed in {processing_time:.2f}s") |
|
return json.dumps(results, indent=2) |
|
|
|
except Exception as e: |
|
logger.error(f"β Error in semantic analysis: {str(e)}") |
|
logger.error(traceback.format_exc()) |
|
return json.dumps({ |
|
'success': False, |
|
'error': str(e), |
|
'processing_time_ms': int((time.time() - start_time) * 1000), |
|
'fallback_analysis': 'Enhanced analysis unavailable due to error', |
|
'metadata': { |
|
'analysis_version': '3.1-enhanced-ai', |
|
'error_occurred': True, |
|
'timestamp': time.strftime('%Y-%m-%d %H:%M:%S') |
|
} |
|
}) |
|
|
|
def generate_comprehensive_insights( |
|
student_code: str, |
|
codet5_analysis: Dict, |
|
comparison: Optional[Dict] = None, |
|
optimal_solution: Optional[Dict] = None |
|
) -> List[str]: |
|
"""Generate comprehensive insights about the student's code""" |
|
insights = [] |
|
|
|
|
|
patterns = codet5_analysis['logic_patterns'] |
|
if 'builtin_max' in patterns or 'builtin_min' in patterns or 'builtin_sum' in patterns: |
|
insights.append("Excellent! Student demonstrates advanced understanding by using Python built-in functions") |
|
elif 'iterative_conditional' in patterns: |
|
insights.append("Good logical thinking demonstrated with iterative comparison approach") |
|
elif 'function_definition' in patterns and 'return_statement' in patterns: |
|
insights.append("Proper function structure with clear return logic") |
|
|
|
|
|
approach = codet5_analysis['approach_analysis'] |
|
if 'builtin' in approach: |
|
insights.append("Optimal algorithmic approach chosen - highly efficient solution") |
|
elif 'iterative' in approach: |
|
insights.append("Solid iterative approach, shows good programming fundamentals") |
|
elif 'custom' in approach: |
|
insights.append("Creative custom approach, demonstrates independent problem-solving") |
|
|
|
|
|
complexity = codet5_analysis['complexity_analysis'] |
|
if complexity['time'] == 'O(n)' and complexity['space'] == 'O(1)': |
|
insights.append("Excellent time and space complexity - very efficient solution") |
|
elif complexity['time'] in ['O(nΒ²)', 'O(nΒ³)']: |
|
insights.append("Solution works correctly but could benefit from complexity optimization") |
|
|
|
|
|
quality = codet5_analysis['semantic_quality'] |
|
if quality['readability_score'] >= 8: |
|
insights.append("Code is highly readable with good programming practices") |
|
elif quality['efficiency_level'] == 'high': |
|
insights.append("Solution demonstrates awareness of efficient programming techniques") |
|
|
|
|
|
if comparison: |
|
similarity = comparison['semantic_similarity'] |
|
if similarity > 0.8: |
|
insights.append("Student's solution is semantically very similar to the optimal approach") |
|
elif similarity > 0.6: |
|
insights.append("Good understanding shown, with opportunities for further optimization") |
|
elif similarity > 0.4: |
|
insights.append("Correct approach with different implementation style") |
|
|
|
|
|
overlap = comparison['pattern_overlap'] |
|
total_patterns = len(comparison['optimal_patterns']) |
|
if total_patterns > 0 and overlap / total_patterns > 0.7: |
|
insights.append("Strong pattern recognition - matches most optimal solution patterns") |
|
|
|
|
|
if not insights: |
|
insights.append("Student shows basic understanding of the problem and provides a working solution") |
|
|
|
return insights |
|
|
|
def generate_recommendations(codet5_analysis: Dict, comparison: Optional[Dict] = None) -> List[str]: |
|
"""Generate actionable recommendations for improvement""" |
|
recommendations = [] |
|
|
|
|
|
patterns = codet5_analysis['logic_patterns'] |
|
if 'iterative_conditional' in patterns and 'builtin_max' not in patterns: |
|
recommendations.append("Consider using built-in max() or min() functions for better efficiency") |
|
|
|
|
|
complexity = codet5_analysis['complexity_analysis'] |
|
if complexity['time'] in ['O(nΒ²)', 'O(nΒ³)']: |
|
recommendations.append("Try to reduce algorithmic complexity using more efficient approaches") |
|
|
|
|
|
quality = codet5_analysis['semantic_quality'] |
|
if quality['readability_score'] < 7: |
|
recommendations.append("Add comments or use more descriptive variable names for better readability") |
|
|
|
if 'input_validation' not in quality['best_practices']: |
|
recommendations.append("Consider adding input validation for more robust code") |
|
|
|
|
|
if comparison and comparison['semantic_similarity'] < 0.6: |
|
recommendations.append("Review the optimal solution to learn alternative approaches") |
|
|
|
return recommendations |
|
|
|
|
|
def gradio_interface(student_code, question_text, need_optimal): |
|
"""Enhanced Gradio interface wrapper""" |
|
if not student_code.strip(): |
|
return json.dumps({ |
|
'error': 'Please provide student code for analysis', |
|
'success': False |
|
}, indent=2) |
|
|
|
return process_semantic_analysis( |
|
student_code=student_code, |
|
question_text=question_text, |
|
question_id="gradio_test", |
|
need_optimal_solution=need_optimal |
|
) |
|
|
|
|
|
demo = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.Textbox( |
|
label="Student Code", |
|
placeholder="Enter Python code here...", |
|
lines=12, |
|
value="def find_max(numbers):\n max_val = numbers[0]\n for num in numbers:\n if num > max_val:\n max_val = num\n return max_val" |
|
), |
|
gr.Textbox( |
|
label="Question Text", |
|
placeholder="Enter the question...", |
|
lines=2, |
|
value="Find the maximum number in a list" |
|
), |
|
gr.Checkbox( |
|
label="Generate Optimal Solution", |
|
value=True |
|
) |
|
], |
|
outputs=gr.Textbox( |
|
label="Semantic Analysis Results (JSON)", |
|
lines=25, |
|
show_copy_button=True |
|
), |
|
title="π§ CodeLab Semantic Analysis - Stage 3 (Fixed)", |
|
description=""" |
|
Advanced semantic analysis using CodeBERT and CodeT5 models for educational code evaluation. |
|
This system analyzes code semantics, generates optimal solutions, and provides educational insights. |
|
""", |
|
examples=[ |
|
[ |
|
"def find_max(numbers):\n return max(numbers)", |
|
"Find the maximum number in a list", |
|
True |
|
], |
|
[ |
|
"def find_min(arr):\n minimum = arr[0]\n for i in range(1, len(arr)):\n if arr[i] < minimum:\n minimum = arr[i]\n return minimum", |
|
"Find the minimum number in an array", |
|
True |
|
], |
|
[ |
|
"def calculate_sum(nums):\n total = 0\n for num in nums:\n total += num\n return total", |
|
"Calculate the sum of all numbers in a list", |
|
True |
|
] |
|
], |
|
theme=gr.themes.Soft(), |
|
analytics_enabled=False |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860 |
|
) |
|
|
|
|