def regroup_words( words: list[dict], max_len: float = 15.0, gap: float = 0.50, ) -> list[dict]: """ Returns a list of segments with keys: 'start', 'end', 'text', 'words' """ if not words: return [] segs, seg_words = [], [] seg_start = words[0]["start"] last_end = seg_start for w in words: over_max = (w["end"] - seg_start) > max_len long_gap = (w["start"] - last_end) > gap if (seg_words and (over_max or long_gap)): segs.append({ "start": seg_start, "end": last_end, "segment": " ".join(x["word"] for x in seg_words), }) seg_words = [] seg_start = w["start"] seg_words.append(w) last_end = w["end"] # flush final segment segs.append({ "start": seg_start, "end": last_end, "segment": " ".join(x["word"] for x in seg_words), }) return segs def text_to_words(text: str) -> list[dict]: """ Convert text format like "word[start:end] word[start:end]..." to word list. Args: text: String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..." Returns: List of word dictionaries with keys: 'word', 'start', 'end' """ import re if not text.strip(): return [] # Pattern to match word[start:end] format pattern = r'(\S+?)\[([^:]+):([^\]]+)\]' matches = re.findall(pattern, text) words = [] for word, start_str, end_str in matches: try: start = float(start_str) if start_str != 'xxx' else 0.0 end = float(end_str) if end_str != 'xxx' else 0.0 words.append({ 'word': word, 'start': start, 'end': end }) except ValueError: # Skip invalid entries continue return words def words_to_text(words: list[dict]) -> str: """ Convert word list to text format "word[start:end] word[start:end]...". Args: words: List of word dictionaries with keys: 'word', 'start', 'end' Returns: String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..." """ if not words: return "" text_parts = [] for word in words: word_text = word.get('word', '') start = word.get('start', 0.0) end = word.get('end', 0.0) # Format timestamps to max 2 decimal places start_str = f"{start:.2f}".rstrip('0').rstrip('.') end_str = f"{end:.2f}".rstrip('0').rstrip('.') text_parts.append(f"{word_text}[{start_str}:{end_str}]") return " ".join(text_parts) def json_to_text(json_data: dict) -> str: """ Convert JSON lyrics data to text format for display. Only uses the 'word' layer from the JSON structure. Groups words into sentences/lines for better readability. Args: json_data: Dictionary with 'word' key containing list of word objects Returns: String with words grouped into lines: "word[start:end] word[start:end]...\nword[start:end]..." """ if not isinstance(json_data, dict) or 'word' not in json_data: return "" words = json_data['word'] # Group words into segments using the existing regroup_words function segments = regroup_words(words, max_len=5, gap=0.50) # Convert each segment to text format segment_lines = [] for seg in segments: # Extract words for this segment based on time range seg_words = [] for word in words: if seg['start'] <= word['start'] < seg['end'] or ( word['start'] <= seg['start'] < word['end'] ): seg_words.append(word) if seg_words: segment_text = words_to_text(seg_words) segment_lines.append(segment_text) return '\n\n'.join(segment_lines) def round_to_quarter_beats(beat_position: float) -> float: """Round beat position to nearest quarter note for sample display.""" return round(beat_position * 4) / 4 def beats_to_seconds(beat_position: float, bpm: float) -> float: """Convert beat position to time in seconds.""" return (beat_position * 60.0) / bpm def seconds_to_beats(time_seconds: float, bpm: float) -> float: """Convert time in seconds to beat position.""" return (time_seconds * bpm) / 60.0 def convert_text_time_to_beats(text: str, bpm: float, round_to_quarters: bool = False) -> str: """ Convert time-based text format to beats-based format. Args: text: String in format "word[start_sec:end_sec] ..." bpm: Beats per minute for conversion round_to_quarters: If True, round beats to quarter notes (for sample display) Returns: String in format "word[start_beat:end_beat] ..." """ if not text.strip(): return "" words = text_to_words(text) beat_words = [] for word in words: start_beat = seconds_to_beats(word['start'], bpm) end_beat = seconds_to_beats(word['end'], bpm) # Round to quarter notes for sample display if round_to_quarters: start_beat = round_to_quarter_beats(start_beat) end_beat = round_to_quarter_beats(end_beat) # Format to reasonable precision start_str = f"{start_beat:.2f}".rstrip('0').rstrip('.') end_str = f"{end_beat:.2f}".rstrip('0').rstrip('.') beat_words.append(f"{word['word']}[{start_str}:{end_str}]") return " ".join(beat_words) def beats_to_text_with_regrouping(text: str, bpm: float, round_to_quarters: bool = False) -> str: """ Convert time-based text to beats format with regrouping (like time mode). Args: text: String in format "word[start_sec:end_sec] ..." bpm: Beats per minute for conversion round_to_quarters: If True, round beats to quarter notes (for sample display) Returns: String with beats format grouped into lines """ if not text.strip(): return "" # First convert to beats format words = text_to_words(text) beat_words = [] for word in words: start_beat = seconds_to_beats(word['start'], bpm) end_beat = seconds_to_beats(word['end'], bpm) # Round to quarter notes for sample display if round_to_quarters: start_beat = round_to_quarter_beats(start_beat) end_beat = round_to_quarter_beats(end_beat) beat_words.append({ 'word': word['word'], 'start': start_beat, 'end': end_beat }) # Group beats into segments (using beat positions instead of seconds) segments = regroup_words(beat_words, max_len=20, gap=2.0) # 20 beats max, 2 beat gap # Convert each segment to text format segment_lines = [] for seg in segments: # Extract words for this segment based on beat range seg_words = [] for word in beat_words: if seg['start'] <= word['start'] < seg['end'] or ( word['start'] <= seg['start'] < word['end'] ): seg_words.append(word) if seg_words: segment_text = words_to_text(seg_words) # This will format as word[beat:beat] segment_lines.append(segment_text) return '\n\n'.join(segment_lines) def convert_text_beats_to_time(text: str, bpm: float) -> str: """ Convert beats-based text format to time-based format. Args: text: String in format "word[start_beat:end_beat] ..." bpm: Beats per minute for conversion Returns: String in format "word[start_sec:end_sec] ..." """ if not text.strip(): return "" # Parse beats format (same pattern as time format) words = text_to_words(text) time_words = [] for word in words: # Convert beat positions to time start_time = beats_to_seconds(word['start'], bpm) end_time = beats_to_seconds(word['end'], bpm) # Format to reasonable precision start_str = f"{start_time:.2f}".rstrip('0').rstrip('.') end_str = f"{end_time:.2f}".rstrip('0').rstrip('.') time_words.append(f"{word['word']}[{start_str}:{end_str}]") return " ".join(time_words) def convert_text_beats_to_time_with_regrouping(text: str, bpm: float) -> str: """ Convert beats-based text format to time-based format while preserving line structure. Args: text: String in format "word[start_beat:end_beat] ..." (can be multi-line) bpm: Beats per minute for conversion Returns: String in format "word[start_sec:end_sec] ..." with preserved line breaks """ if not text.strip(): return "" # Process each line separately to preserve segmentation lines = text.split('\n') converted_lines = [] for line in lines: line = line.strip() if not line: # Preserve empty lines converted_lines.append("") continue # Convert this line from beats to time words = text_to_words(line) time_words = [] for word in words: # Convert beat positions to time start_time = beats_to_seconds(word['start'], bpm) end_time = beats_to_seconds(word['end'], bpm) # Format to reasonable precision start_str = f"{start_time:.2f}".rstrip('0').rstrip('.') end_str = f"{end_time:.2f}".rstrip('0').rstrip('.') time_words.append(f"{word['word']}[{start_str}:{end_str}]") if time_words: converted_lines.append(" ".join(time_words)) return "\n".join(converted_lines) def text_to_json(text: str) -> dict: """ Convert text format to JSON structure expected by the model. Creates the 'word' layer that the model needs. Handles multi-line input by joining lines. Args: text: String in format "word[start:end] word[start:end]..." (can be multi-line) Returns: Dictionary with 'word' key containing list of word objects """ # Join multiple lines into single line for parsing single_line_text = ' '.join(line.strip() for line in text.split('\n') if line.strip()) words = text_to_words(single_line_text) return {"word": words}