import yaml import requests from typing import Dict, List, Any import os class TocTreeHandler: def __init__(self): self.en_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/en/_toctree.yml" self.ko_toctree_url = "https://raw.githubusercontent.com/huggingface/transformers/main/docs/source/ko/_toctree.yml" self.local_docs_path = "docs/source/ko" def fetch_toctree(self, url: str) -> Dict[str, Any]: """Fetch and parse YAML from URL""" response = requests.get(url) response.raise_for_status() return yaml.safe_load(response.text) def get_en_toctree(self) -> Dict[str, Any]: """Get English toctree structure""" return self.fetch_toctree(self.en_toctree_url) def get_ko_toctree(self) -> Dict[str, Any]: """Get Korean toctree structure""" return self.fetch_toctree(self.ko_toctree_url) def extract_title_mappings(self, en_data: List[Dict], ko_data: List[Dict]) -> Dict[str, str]: """Extract title mappings between English and Korean""" mappings = {} def process_section(en_section: Dict, ko_section: Dict): if 'local' in en_section and 'local' in ko_section: if en_section['local'] == ko_section['local']: en_title = en_section.get('title', '') ko_title = ko_section.get('title', '') if en_title and ko_title: mappings[en_title] = ko_title if 'sections' in en_section and 'sections' in ko_section: en_sections = en_section['sections'] ko_sections = ko_section['sections'] for i, en_sub in enumerate(en_sections): if i < len(ko_sections): process_section(en_sub, ko_sections[i]) for i, en_item in enumerate(en_data): if i < len(ko_data): process_section(en_item, ko_data[i]) return mappings def translate_title(self, en_title: str) -> str: """Translate English title to Korean using LLM""" try: from translator.content import llm_translate prompt = f"""Translate the following English documentation title to Korean. Return only the translated title, nothing else. English title: {en_title} Korean title:""" callback_result, translated_title = llm_translate(prompt) return translated_title.strip() except Exception as e: print(f"Error translating title '{en_title}': {e}") return en_title def create_local_toctree(self, en_title: str, local_file_path: str) -> Dict[str, str]: """Create local toctree entry with Korean title and local path""" try: # First try to get Korean title from existing mappings en_data = self.get_en_toctree() ko_data = self.get_ko_toctree() title_mappings = self.extract_title_mappings(en_data, ko_data) ko_title = title_mappings.get(en_title) # If no existing mapping, translate the title if not ko_title: ko_title = self.translate_title(en_title) return { 'local': local_file_path, 'title': ko_title } except Exception as e: print(f"Error creating local toctree: {e}") return { 'local': local_file_path, 'title': en_title } def find_and_update_translation_entry(self, ko_toctree_data, target_local: str, english_title: str, korean_title: str): """Find entry with '(번역중) 영어제목' and update it""" target_title_pattern = f"(번역중) {english_title}" def process_item(item): if isinstance(item, dict): # Check if title matches the pattern if item.get('title') == target_title_pattern: # Update local path and title item['local'] = target_local item['title'] = korean_title return True # Process sections recursively if 'sections' in item: for section in item['sections']: if process_item(section): return True return False # Process the toctree data if isinstance(ko_toctree_data, list): for item in ko_toctree_data: if process_item(item): return True return False def create_updated_toctree_with_replacement(self, ko_toctree: list, target_local: str) -> list: """Update Korean toctree by finding and updating translation entry""" try: # Step 1: Get English toctree and find the English title for target_local en_toctree = self.get_en_toctree() english_title = self.find_title_for_local(en_toctree, target_local) if not english_title: print(f"Could not find English title for local: {target_local}") return ko_toctree print(f"Found English title: {english_title} for local: {target_local}") # Step 2: Translate the English title to Korean korean_title = self.translate_title(english_title) print(f"Translated Korean title: {korean_title}") # Step 3: Make a deep copy to avoid modifying original import copy updated_toctree = copy.deepcopy(ko_toctree) # Step 4: Find and update the "(번역중) 영어제목" entry updated = self.find_and_update_translation_entry( updated_toctree, target_local, english_title, korean_title ) if updated: print(f"Successfully updated translation entry: local={target_local}, title={korean_title}") return updated_toctree else: print(f"Could not find '(번역중) {english_title}' entry to update") return ko_toctree except Exception as e: print(f"Error creating updated toctree: {e}") return ko_toctree def find_title_for_local(self, toctree_data, target_local: str): """Find title for given local path in toctree""" def search_item(item): if isinstance(item, dict): if item.get('local') == target_local: return item.get('title', '') if 'sections' in item: for section in item['sections']: result = search_item(section) if result: return result return None if isinstance(toctree_data, list): for item in toctree_data: result = search_item(item) if result: return result return None def process_pr_commit(self, filepath: str): """Process PR commit by updating Korean toctree with translated entry""" # Get filepath without prefix filepath_without_prefix = filepath.replace("docs/source/en/", "").replace(".md", "") # Get Korean toctree ko_toctree = self.get_ko_toctree() # Update Korean toctree with replacement logic updated_ko_toctree = self.create_updated_toctree_with_replacement(ko_toctree, filepath_without_prefix) if not updated_ko_toctree: print(f"Failed to create updated Korean toctree for local: {filepath_without_prefix}") return print(f"Successfully updated Korean toctree") # Store the updated toctree for commit self.updated_ko_toctree = updated_ko_toctree def commit_and_push_toctree(self, pr_agent, owner: str, repo_name: str, branch_name: str): """Commit and push toctree updates as a separate commit""" try: # Use the updated toctree created by LLM if not hasattr(self, 'updated_ko_toctree') or not self.updated_ko_toctree: print("No updated Korean toctree available") return {"status": "error", "message": "No updated toctree to commit"} ko_data = self.updated_ko_toctree # Convert to YAML string toctree_content = yaml.dump(ko_data, allow_unicode=True, default_flow_style=False, sort_keys=False) # Create toctree commit message commit_message = "docs: update Korean documentation table of contents" # Commit toctree file file_result = pr_agent.create_or_update_file( owner=owner, repo_name=repo_name, path="docs/source/ko/_toctree.yml", message=commit_message, content=toctree_content, branch_name=branch_name ) if file_result.startswith("SUCCESS"): return { "status": "success", "message": f"Toctree committed successfully: {file_result}", "commit_message": commit_message } else: return { "status": "error", "message": f"Toctree commit failed: {file_result}" } except Exception as e: return { "status": "error", "message": f"Error committing toctree: {str(e)}" } def update_toctree_after_translation( self, translation_result: dict, filepath: str, pr_agent, github_config: dict ) -> dict: """Update toctree after successful translation PR. Args: translation_result: Result from translation PR workflow filepath: Original file path pr_agent: GitHub PR agent instance github_config: GitHub configuration dictionary Returns: Dictionary with toctree update result """ if translation_result["status"] == "error": return None try: # Process toctree update with LLM self.process_pr_commit(filepath) # Commit toctree as separate commit print("self.updated_ko_toctree:", self.updated_ko_toctree) if self.updated_ko_toctree: return self.commit_and_push_toctree( pr_agent=pr_agent, owner=github_config["owner"], repo_name=github_config["repo_name"], branch_name=translation_result["branch"] ) except Exception as e: return { "status": "error", "message": f"Error updating toctree: {str(e)}" }