""" Generic utility functions used across the application. """ import random import re def generate_seed(): """Generate a random positive 32-bit integer seed.""" return random.randint(0, 2**32 - 1) def sanitize_yaml_response(response_text: str) -> str: """ Sanitize and format AI response into valid YAML. Returns properly formatted YAML string. """ # Pre-processing: Remove code block markers if response_text.startswith("```yaml"): # Remove the "```yaml" at the beginning and closing ``` response_text = response_text[7:] # Remove "```yaml" (7 characters) if response_text.endswith("```"): response_text = response_text[:-3] # Remove closing ``` response_text = response_text.strip() elif response_text.startswith("```"): # Remove the "```" at the beginning and closing ``` response_text = response_text[3:] # Remove opening ``` if response_text.endswith("```"): response_text = response_text[:-3] # Remove closing ``` response_text = response_text.strip() # Handle edge case where the LLM might have continued the prompt # e.g., if the response starts with the incomplete prompt we provided if response_text.startswith('title: \\"'): # Remove the incomplete prompt prefix response_text = response_text[9:].strip() # Check if it already has a proper YAML structure if not response_text.startswith(('title:', 'title :')): # Only wrap with title if it doesn't already have one # The sanitize function will handle escaping response_text = f'title: {response_text}' # Split on first occurrence of ``` to handle any remaining code blocks response_text = response_text.split("```")[0] # Remove any markdown code block indicators and YAML document markers clean_text = re.sub(r'```yaml|```|---|\.\.\.$', '', response_text.strip()) # Handle the specific case where LLM duplicates 'title:' in the value # e.g., title: "title: "Something"" -> title: "Something" clean_text = re.sub(r'title:\s*"title:\s*"([^"]+)""?', r'title: "\1"', clean_text) clean_text = re.sub(r'title:\s*\'title:\s*\'([^\']+)\'\'?', r'title: \'\1\'', clean_text) clean_text = re.sub(r'title:\s*"title:\s*\'([^\']+)\'"?', r'title: "\1"', clean_text) clean_text = re.sub(r'title:\s*\'title:\s*"([^"]+)"\'?', r'title: \'\1\'', clean_text) # Also handle case where title appears twice without quotes clean_text = re.sub(r'title:\s*title:\s*(.+)$', r'title: \1', clean_text, flags=re.MULTILINE) # Split into lines and process each line lines = clean_text.split('\n') sanitized_lines = [] current_field = None for line in lines: stripped = line.strip() if not stripped: continue # Handle field starts if stripped.startswith('title:') or stripped.startswith('description:'): # Ensure proper YAML format with space after colon and proper quoting field_name = stripped.split(':', 1)[0] field_value = stripped.split(':', 1)[1].strip() # Remove outer quotes first if (field_value.startswith('"') and field_value.endswith('"')) or \ (field_value.startswith("'") and field_value.endswith("'")): field_value = field_value[1:-1] # Check for nested title pattern again (in case it wasn't caught by regex) if field_name == 'title' and field_value.lower().startswith('title:'): # Remove the nested 'title:' prefix field_value = field_value[6:].strip().strip('"\'') # Escape any internal quotes field_value = field_value.replace('"', '\\"') # Always quote the value to ensure proper YAML formatting field_value = f'"{field_value}"' sanitized_lines.append(f"{field_name}: {field_value}") current_field = field_name elif stripped.startswith('tags:'): sanitized_lines.append('tags:') current_field = 'tags' elif stripped.startswith('-') and current_field == 'tags': # Process tag values tag = stripped[1:].strip().strip('"\'') if tag: # Clean and format tag tag = re.sub(r'[^\x00-\x7F]+', '', tag) # Remove non-ASCII tag = re.sub(r'[^a-zA-Z0-9\s-]', '', tag) # Keep only alphanumeric and hyphen tag = tag.strip().lower().replace(' ', '-') if tag: sanitized_lines.append(f" - {tag}") elif current_field in ['title', 'description']: # Handle multi-line title/description continuation value = stripped.strip('"\'') if value: # Append to previous line (but within the quotes) prev = sanitized_lines[-1] # Remove the closing quote, append the value, and add the quote back if prev.endswith('"'): sanitized_lines[-1] = f'{prev[:-1]} {value}"' # Ensure the YAML has all required fields required_fields = {'title', 'description', 'tags'} found_fields = {line.split(':')[0].strip() for line in sanitized_lines if ':' in line} for field in required_fields - found_fields: if field == 'tags': sanitized_lines.extend(['tags:', ' - default']) else: sanitized_lines.append(f'{field}: "No {field} provided"') return '\n'.join(sanitized_lines)