import logging logger = logging.getLogger(__name__) logger.info("Importing Feedback.py...") import openai from docx import Document import json import re import os import tiktoken from typing import List, Dict, Tuple, Optional, Any import unicodedata class Grader: def __init__(self, api_key, config: Optional[Dict[str, Any]] = None): logger.info("Initializing Grader...") self.api_key = api_key openai.api_key = self.api_key try: self.client = openai.OpenAI(api_key=self.api_key) except AttributeError: self.client = openai try: self.encoding = tiktoken.encoding_for_model("gpt-4o") logger.info("Successfully initialized tiktoken encoding") except Exception as e: logger.warning(f"Failed to initialize tiktoken: {e}") self.encoding = None # Fixed config, no runtime update self.config = { 'enable_validation': True, 'enable_enhanced_logging': True, 'fallback_to_legacy': True, 'aggregate_scores': True, 'log_missing_categories': True } logger.info(f"Grader initialized with config: {self.config}") def count_tokens(self, text): if not self.encoding: return len(text) // 4 try: return len(self.encoding.encode(text)) except Exception as e: logger.warning(f"Error counting tokens: {e}") return len(text) // 4 def process_full_text(self, text): if not text: return text, 0, False # Store original text for comparison original_text = text # More conservative character filtering - only remove truly problematic control characters # Keep more Unicode characters that might be meaningful text = ''.join(char for char in text if ( unicodedata.category(char)[0] != 'C' or # Keep control chars char in '\n\r\t' or # Keep newlines, returns, tabs unicodedata.category(char) in ['Cc', 'Cf', 'Cs'] # Only remove specific control categories )) # Normalize Unicode but be more careful text = unicodedata.normalize('NFKC', text) # More selective character replacements - only replace if they cause issues replacements = { '\u201c': '"', # Left double quotation mark '\u201d': '"', # Right double quotation mark '\u2018': "'", # Left single quotation mark '\u2019': "'", # Right single quotation mark '\u2013': '-', # En dash '\u2014': '--', # Em dash (replace with two dashes) '\u2022': '•', # Bullet '\u00a0': ' ', # Non-breaking space '\u2026': '...', # Horizontal ellipsis } for old_char, new_char in replacements.items(): text = text.replace(old_char, new_char) # Log if significant changes were made if len(text) != len(original_text): logger.info(f"Text processing: {len(original_text)} -> {len(text)} characters") if len(text) < len(original_text) * 0.95: # If more than 5% was removed logger.warning(f"Significant text reduction detected: {len(original_text)} -> {len(text)} characters") token_count = self.count_tokens(text) logger.info(f"Full text token count: {token_count} - NO TRUNCATION") return text, token_count, False def read_file(self, file_path): logger.info(f"Reading file: {file_path}") if file_path.endswith('.txt'): with open(file_path, 'r', encoding='utf-8') as file: return file.read().strip() elif file_path.endswith('.docx'): doc = Document(file_path) return '\n'.join([para.text for para in doc.paragraphs]).strip() else: raise ValueError("Unsupported file format. Please use .txt or .docx files.") def extract_json_from_text(self, text): try: return json.loads(text) except json.JSONDecodeError as e: logger.warning(f"Initial JSON parsing failed: {str(e)}") logger.info(f"Raw response text: {text[:500]}...") # Log first 500 chars for debugging start_idx = text.find('{') end_idx = text.rfind('}') if start_idx == -1 or end_idx == -1: logger.error("No JSON object markers found in response") raise ValueError("No valid JSON object found in the response") json_str = text[start_idx:end_idx + 1] logger.info(f"Extracted JSON string: {json_str[:200]}...") # Log first 200 chars # Remove markdown formatting json_str = json_str.replace('```json', '').replace('```', '') # Remove control characters except newlines, returns, tabs json_str = ''.join(char for char in json_str if ( unicodedata.category(char)[0] != 'C' or char in '\n\r\t' or unicodedata.category(char) in ['Cc', 'Cf', 'Cs'] )) # Normalize Unicode json_str = unicodedata.normalize('NFKC', json_str) # Replace common problematic characters replacements = { '\u201c': '"', # Left double quotation mark '\u201d': '"', # Right double quotation mark '\u2018': "'", # Left single quotation mark '\u2019': "'", # Right single quotation mark '\u2013': '-', # En dash '\u2014': '--', # Em dash '\u2022': '•', # Bullet '\u00a0': ' ', # Non-breaking space '\u2026': '...', # Horizontal ellipsis } for old_char, new_char in replacements.items(): json_str = json_str.replace(old_char, new_char) # Clean up whitespace and formatting json_str = re.sub(r'[\r\n\t]+', ' ', json_str) json_str = re.sub(r'\s+', ' ', json_str) # Remove trailing commas before closing brackets/braces json_str = re.sub(r',\s*([}\]])', r'\1', json_str) # Ensure property names are quoted json_str = re.sub(r'([{,])\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', json_str) # Handle escaped quotes properly json_str = json_str.replace('\\"', '___ESCAPED_QUOTE___') json_str = re.sub(r'(? Dict[str, Any]: logger.info("Starting grammar-only analysis") if not text.strip(): return {'line_by_line_grammar': [], 'overall_grammar_summary': {'error': 'No text provided'}} text = self.process_full_text(text)[0] lines = text.split('\n') all_line_grammar = [] for line_index, line in enumerate(lines): if not line.strip(): all_line_grammar.append({ 'line_number': line_index + 1, 'line_content': line, 'line_type': 'empty', 'grammar_score': 100, 'grammar_issues': [], 'positive_points': ['Proper line spacing'], 'suggestions': [] }) continue try: line_grammar = self._analyze_line_grammar_only(line, line_index + 1) all_line_grammar.append(line_grammar) except Exception as e: logger.error(f"Error analyzing line {line_index + 1} for grammar: {str(e)}") all_line_grammar.append({ 'line_number': line_index + 1, 'line_content': line, 'line_type': 'error', 'grammar_score': 0, 'grammar_issues': [{'type': 'processing_error', 'description': str(e)}], 'positive_points': [], 'suggestions': ['Please review this line manually'] }) return {'line_by_line_grammar': all_line_grammar} def _analyze_line_grammar_only(self, line: str, line_number: int) -> Dict[str, Any]: system_prompt = f"""You are an expert English grammar examiner. Analyze this single line of text for GRAMMAR AND PUNCTUATION issues ONLY.\n\n...\nReturn JSON format:\n{{ ... }}""" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Line {line_number}: {line}"} ] try: response = self.client.chat.completions.create( model="gpt-4.1", messages=messages, max_tokens=8000, temperature=0.3, ) feedback_raw = response.choices[0].message.content feedback_dict = self.extract_json_from_text(feedback_raw) feedback_dict['line_number'] = line_number feedback_dict['line_content'] = line return feedback_dict except Exception as e: logger.error(f"Error in grammar-only line analysis: {str(e)}") return { 'line_number': line_number, 'line_content': line, 'line_type': 'error', 'grammar_score': 0, 'grammar_issues': [{'type': 'processing_error', 'description': str(e)}], 'positive_points': [], 'suggestions': ['Please review this line manually'] } def transform_feedback_to_app_format(self, feedback_dict): """ Transform the detailed feedback format to the format expected by the app. This ensures compatibility with existing API responses and focuses on showing issues. """ try: # Check if we have the new detailed format if "categories" in feedback_dict: # Transform from new detailed format to app format evaluation_and_scoring = [] # Map category names to app format category_mapping = { "grammar_punctuation": "Grammar & Punctuation", "vocabulary_usage": "Vocabulary Usage", "sentence_structure": "Sentence Structure", "content_relevance": "Content Relevance & Depth", "argument_development": "Argument Development", "evidence_citations": "Evidence & Citations", "structure_organization": "Structure & Organization", "conclusion_quality": "Conclusion Quality" } for category_key, category_name in category_mapping.items(): if category_key in feedback_dict["categories"]: category_data = feedback_dict["categories"][category_key] # Transform issues to app format with detailed information issues_list = [] for issue in category_data.get("issues", []): issue_info = { "before": issue.get("before", ""), "after": issue.get("after", ""), "explanation": issue.get("explanation", "") } issues_list.append(issue_info) # Create the evaluation and scoring entry (focus on issues, not scores) evaluation_and_scoring.append({ "label": category_name, "analysis": category_data.get("analysis", f"{category_name} analysis completed"), "issuesCount": len(issues_list), "issuesList": issues_list, "positivePoints": category_data.get("positive_points", []) }) # Transform essay structure to match the desired format with new CSS Examiner criteria essay_structure = [] if "essay_structure" in feedback_dict: structure_data = feedback_dict["essay_structure"] # Introduction & Thesis section intro_features = [] if 'Introduction & Thesis' in structure_data: intro_data = structure_data['Introduction & Thesis'] for key, value in intro_data.items(): is_correct = value.get('value', True) explanation = value.get('explanation', '') error_message = f"Missing: {key.lower()}. {explanation}" if not is_correct else None intro_features.append({ "label": key, "isCorrect": is_correct, "errorMessage": error_message }) essay_structure.append({ "label": "Introduction & Thesis", "features": intro_features }) # Body Development section body_features = [] if 'Body Development' in structure_data: body_data = structure_data['Body Development'] for key, value in body_data.items(): is_correct = value.get('value', True) explanation = value.get('explanation', '') error_message = f"Missing: {key.lower()}. {explanation}" if not is_correct else None body_features.append({ "label": key, "isCorrect": is_correct, "errorMessage": error_message }) essay_structure.append({ "label": "Body Development", "features": body_features }) # Content Quality section content_features = [] if 'Content Quality' in structure_data: content_data = structure_data['Content Quality'] for key, value in content_data.items(): is_correct = value.get('value', True) explanation = value.get('explanation', '') error_message = f"Missing: {key.lower()}. {explanation}" if not is_correct else None content_features.append({ "label": key, "isCorrect": is_correct, "errorMessage": error_message }) essay_structure.append({ "label": "Content Quality", "features": content_features }) # Evidence & Citations section evidence_features = [] if 'Evidence & Citations' in structure_data: evidence_data = structure_data['Evidence & Citations'] for key, value in evidence_data.items(): is_correct = value.get('value', True) explanation = value.get('explanation', '') error_message = f"Missing: {key.lower()}. {explanation}" if not is_correct else None evidence_features.append({ "label": key, "isCorrect": is_correct, "errorMessage": error_message }) essay_structure.append({ "label": "Evidence & Citations", "features": evidence_features }) # Conclusion section conclusion_features = [] if 'Conclusion' in structure_data: conclusion_data = structure_data['Conclusion'] for key, value in conclusion_data.items(): is_correct = value.get('value', True) explanation = value.get('explanation', '') error_message = f"Missing: {key.lower()}. {explanation}" if not is_correct else None conclusion_features.append({ "label": key, "isCorrect": is_correct, "errorMessage": error_message }) essay_structure.append({ "label": "Conclusion", "features": conclusion_features }) # Create the transformed response with focus on issues transformed_response = { "evaluationAndScoring": evaluation_and_scoring, "essayStructure": essay_structure, "overall_feedback": feedback_dict.get("overall_feedback", "Comprehensive analysis completed"), "improvement_priorities": feedback_dict.get("improvement_priorities", []), "total_issues_found": sum(len(section.get("issuesList", [])) for section in evaluation_and_scoring), "vocabulary_issues": [ issue for section in evaluation_and_scoring if section["label"] == "Vocabulary Usage" for issue in section.get("issuesList", []) ], "grammar_issues": [ issue for section in evaluation_and_scoring if section["label"] == "Grammar & Punctuation" for issue in section.get("issuesList", []) ] } # Add question-specific feedback if present if "question_specific_feedback" in feedback_dict: transformed_response["question_specific_feedback"] = feedback_dict["question_specific_feedback"] return transformed_response else: # Already in app format, return as is return feedback_dict except Exception as e: logger.error(f"Error transforming feedback format: {str(e)}") # Return fallback format return { "evaluationAndScoring": [ { "label": "Grammar & Punctuation", "analysis": "Basic analysis completed", "issuesCount": 0, "issuesList": [], "positivePoints": ["Essay submitted successfully"] } ], "essayStructure": [ { "label": "Introduction & Thesis", "features": [ { "label": "Clear Thesis Statement", "isCorrect": False, "errorMessage": "Missing: clear thesis statement. The essay lacks a clear, well-defined thesis statement that guides the reader." } ] } ], "overall_feedback": "Analysis completed with basic feedback", "improvement_priorities": ["Try submitting again"] } def rephrase_text_with_gpt(self, essay_text: str, system_prompt: str = None) -> dict: """ Rephrase and correct the essay to meet CSS (Central Superior Services) standards. Provides comprehensive corrections for grammar, structure, style, and content. """ if system_prompt is None: system_prompt = """You are an expert CSS (Central Superior Services) essay examiner and editor. Your task is to provide the BEST VERSION of the given essay by making comprehensive improvements while maintaining the original meaning and intent. IMPORTANT REQUIREMENTS: 1. CORRECT ALL GRAMMAR AND PUNCTUATION ERRORS 2. IMPROVE SENTENCE STRUCTURE AND FLOW 3. ENHANCE VOCABULARY USAGE WITH APPROPRIATE ACADEMIC LANGUAGE 4. STRENGTHEN ARGUMENT DEVELOPMENT AND LOGICAL FLOW 5. IMPROVE ESSAY STRUCTURE (Introduction, Body, Conclusion) 6. ADD TRANSITIONAL PHRASES FOR BETTER COHERENCE 7. ENSURE PROPER PARAGRAPH ORGANIZATION 8. MAINTAIN CSS EXAM STANDARDS AND EXPECTATIONS 9. KEEP THE ORIGINAL MEANING AND ARGUMENTS INTACT 10. USE FORMAL ACADEMIC TONE APPROPRIATE FOR CSS EXAMS CORRECTION GUIDELINES: - Fix all grammatical errors (subject-verb agreement, tense consistency, etc.) - Correct punctuation (commas, semicolons, apostrophes, etc.) - Improve sentence variety and complexity - Enhance vocabulary with sophisticated academic terms - Strengthen topic sentences and supporting evidence - Add logical transitions between paragraphs - Ensure clear thesis statement and conclusion - Maintain professional tone throughout - Follow CSS essay format and style requirements Return ONLY the corrected essay text - no explanations, no markdown formatting, just the improved essay ready for CSS examination.""" try: messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Please provide the BEST VERSION of this CSS essay with all corrections applied:\n\n{essay_text}"} ] # Calculate appropriate max_tokens based on input length input_tokens = self.count_tokens(essay_text) # Allow for 2x the input length plus extra for corrections max_tokens_needed = min(input_tokens * 2 + 2000, 16000) # Cap at 16k tokens response = self.client.chat.completions.create( model="gpt-4.1", messages=messages, max_tokens=max_tokens_needed, # Dynamic token limit temperature=0.3, # Lower temperature for more consistent corrections ) rephrased_text = response.choices[0].message.content.strip() # Clean up any potential formatting artifacts rephrased_text = rephrased_text.replace('```', '').replace('**', '').replace('*', '') rephrased_text = rephrased_text.strip() # Verify that we didn't lose significant content original_words = len(essay_text.split()) rephrased_words = len(rephrased_text.split()) if rephrased_words < original_words * 0.7: # If we lost more than 30% of content logger.warning(f"Significant content loss detected: {original_words} -> {rephrased_words} words") # Return original text with a note return { "rephrased_text": essay_text, "error": f"Content loss detected ({original_words} -> {rephrased_words} words). Returning original text.", "warning": "Rephrasing may have truncated content" } logger.info(f"Rephrasing successful: {original_words} -> {rephrased_words} words") return {"rephrased_text": rephrased_text, "error": None} except Exception as e: logger.error(f"Error in rephrasing essay: {str(e)}") return {"rephrased_text": essay_text, "error": str(e)}