Spaces:

declare-lab
/

JAM

Running on Zero

JAM / utils.py

renhang

update space

bc3ffb2 5 days ago

10.7 kB


	def regroup_words(
	words: list[dict],
	max_len: float = 15.0,
	gap: float = 0.50,
	) -> list[dict]:
	"""
	Returns a list of segments with keys:
	'start', 'end', 'text', 'words'
	"""

	if not words:
	return []

	segs, seg_words = [], []
	seg_start = words[0]["start"]
	last_end = seg_start

	for w in words:
	over_max = (w["end"] - seg_start) > max_len
	long_gap = (w["start"] - last_end) > gap

	if (seg_words and (over_max or long_gap)):
	segs.append({
	"start": seg_start,
	"end": last_end,
	"segment": " ".join(x["word"] for x in seg_words),
	})
	seg_words = []
	seg_start = w["start"]

	seg_words.append(w)
	last_end = w["end"]

	# flush final segment
	segs.append({
	"start": seg_start,
	"end": last_end,
	"segment": " ".join(x["word"] for x in seg_words),
	})
	return segs


	def text_to_words(text: str) -> list[dict]:
	"""
	Convert text format like "word[start:end] word[start:end]..." to word list.

	Args:
	text: String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..."

	Returns:
	List of word dictionaries with keys: 'word', 'start', 'end'
	"""
	import re

	if not text.strip():
	return []

	# Pattern to match word[start:end] format
	pattern = r'(\S+?)\[([^:]+):([^\]]+)\]'
	matches = re.findall(pattern, text)

	words = []
	for word, start_str, end_str in matches:
	try:
	start = float(start_str) if start_str != 'xxx' else 0.0
	end = float(end_str) if end_str != 'xxx' else 0.0
	words.append({
	'word': word,
	'start': start,
	'end': end
	})
	except ValueError:
	# Skip invalid entries
	continue

	return words


	def words_to_text(words: list[dict]) -> str:
	"""
	Convert word list to text format "word[start:end] word[start:end]...".

	Args:
	words: List of word dictionaries with keys: 'word', 'start', 'end'

	Returns:
	String in format "It's[4.96:5.52] a[5.52:5.84] long[5.84:6.16]..."
	"""
	if not words:
	return ""

	text_parts = []
	for word in words:
	word_text = word.get('word', '')
	start = word.get('start', 0.0)
	end = word.get('end', 0.0)
	# Format timestamps to max 2 decimal places
	start_str = f"{start:.2f}".rstrip('0').rstrip('.')
	end_str = f"{end:.2f}".rstrip('0').rstrip('.')
	text_parts.append(f"{word_text}[{start_str}:{end_str}]")

	return " ".join(text_parts)


	def json_to_text(json_data: dict) -> str:
	"""
	Convert JSON lyrics data to text format for display.
	Only uses the 'word' layer from the JSON structure.
	Groups words into sentences/lines for better readability.

	Args:
	json_data: Dictionary with 'word' key containing list of word objects

	Returns:
	String with words grouped into lines: "word[start:end] word[start:end]...\nword[start:end]..."
	"""
	if not isinstance(json_data, dict) or 'word' not in json_data:
	return ""

	words = json_data['word']

	# Group words into segments using the existing regroup_words function
	segments = regroup_words(words, max_len=5, gap=0.50)

	# Convert each segment to text format
	segment_lines = []
	for seg in segments:
	# Extract words for this segment based on time range
	seg_words = []
	for word in words:
	if seg['start'] <= word['start'] < seg['end'] or (
	word['start'] <= seg['start'] < word['end']
	):
	seg_words.append(word)

	if seg_words:
	segment_text = words_to_text(seg_words)
	segment_lines.append(segment_text)

	return '\n\n'.join(segment_lines)


	def round_to_quarter_beats(beat_position: float) -> float:
	"""Round beat position to nearest quarter note for sample display."""
	return round(beat_position * 4) / 4


	def beats_to_seconds(beat_position: float, bpm: float) -> float:
	"""Convert beat position to time in seconds."""
	return (beat_position * 60.0) / bpm


	def seconds_to_beats(time_seconds: float, bpm: float) -> float:
	"""Convert time in seconds to beat position."""
	return (time_seconds * bpm) / 60.0


	def convert_text_time_to_beats(text: str, bpm: float, round_to_quarters: bool = False) -> str:
	"""
	Convert time-based text format to beats-based format.

	Args:
	text: String in format "word[start_sec:end_sec] ..."
	bpm: Beats per minute for conversion
	round_to_quarters: If True, round beats to quarter notes (for sample display)

	Returns:
	String in format "word[start_beat:end_beat] ..."
	"""
	if not text.strip():
	return ""

	words = text_to_words(text)
	beat_words = []

	for word in words:
	start_beat = seconds_to_beats(word['start'], bpm)
	end_beat = seconds_to_beats(word['end'], bpm)

	# Round to quarter notes for sample display
	if round_to_quarters:
	start_beat = round_to_quarter_beats(start_beat)
	end_beat = round_to_quarter_beats(end_beat)

	# Format to reasonable precision
	start_str = f"{start_beat:.2f}".rstrip('0').rstrip('.')
	end_str = f"{end_beat:.2f}".rstrip('0').rstrip('.')

	beat_words.append(f"{word['word']}[{start_str}:{end_str}]")

	return " ".join(beat_words)


	def beats_to_text_with_regrouping(text: str, bpm: float, round_to_quarters: bool = False) -> str:
	"""
	Convert time-based text to beats format with regrouping (like time mode).

	Args:
	text: String in format "word[start_sec:end_sec] ..."
	bpm: Beats per minute for conversion
	round_to_quarters: If True, round beats to quarter notes (for sample display)

	Returns:
	String with beats format grouped into lines
	"""
	if not text.strip():
	return ""

	# First convert to beats format
	words = text_to_words(text)
	beat_words = []

	for word in words:
	start_beat = seconds_to_beats(word['start'], bpm)
	end_beat = seconds_to_beats(word['end'], bpm)

	# Round to quarter notes for sample display
	if round_to_quarters:
	start_beat = round_to_quarter_beats(start_beat)
	end_beat = round_to_quarter_beats(end_beat)

	beat_words.append({
	'word': word['word'],
	'start': start_beat,
	'end': end_beat
	})

	# Group beats into segments (using beat positions instead of seconds)
	segments = regroup_words(beat_words, max_len=20, gap=2.0) # 20 beats max, 2 beat gap

	# Convert each segment to text format
	segment_lines = []
	for seg in segments:
	# Extract words for this segment based on beat range
	seg_words = []
	for word in beat_words:
	if seg['start'] <= word['start'] < seg['end'] or (
	word['start'] <= seg['start'] < word['end']
	):
	seg_words.append(word)

	if seg_words:
	segment_text = words_to_text(seg_words) # This will format as word[beat:beat]
	segment_lines.append(segment_text)

	return '\n\n'.join(segment_lines)


	def convert_text_beats_to_time(text: str, bpm: float) -> str:
	"""
	Convert beats-based text format to time-based format.

	Args:
	text: String in format "word[start_beat:end_beat] ..."
	bpm: Beats per minute for conversion

	Returns:
	String in format "word[start_sec:end_sec] ..."
	"""
	if not text.strip():
	return ""

	# Parse beats format (same pattern as time format)
	words = text_to_words(text)
	time_words = []

	for word in words:
	# Convert beat positions to time
	start_time = beats_to_seconds(word['start'], bpm)
	end_time = beats_to_seconds(word['end'], bpm)

	# Format to reasonable precision
	start_str = f"{start_time:.2f}".rstrip('0').rstrip('.')
	end_str = f"{end_time:.2f}".rstrip('0').rstrip('.')

	time_words.append(f"{word['word']}[{start_str}:{end_str}]")

	return " ".join(time_words)


	def convert_text_beats_to_time_with_regrouping(text: str, bpm: float) -> str:
	"""
	Convert beats-based text format to time-based format while preserving line structure.

	Args:
	text: String in format "word[start_beat:end_beat] ..." (can be multi-line)
	bpm: Beats per minute for conversion

	Returns:
	String in format "word[start_sec:end_sec] ..." with preserved line breaks
	"""
	if not text.strip():
	return ""

	# Process each line separately to preserve segmentation
	lines = text.split('\n')
	converted_lines = []

	for line in lines:
	line = line.strip()
	if not line:
	# Preserve empty lines
	converted_lines.append("")
	continue

	# Convert this line from beats to time
	words = text_to_words(line)
	time_words = []

	for word in words:
	# Convert beat positions to time
	start_time = beats_to_seconds(word['start'], bpm)
	end_time = beats_to_seconds(word['end'], bpm)

	# Format to reasonable precision
	start_str = f"{start_time:.2f}".rstrip('0').rstrip('.')
	end_str = f"{end_time:.2f}".rstrip('0').rstrip('.')

	time_words.append(f"{word['word']}[{start_str}:{end_str}]")

	if time_words:
	converted_lines.append(" ".join(time_words))

	return "\n".join(converted_lines)


	def text_to_json(text: str) -> dict:
	"""
	Convert text format to JSON structure expected by the model.
	Creates the 'word' layer that the model needs.
	Handles multi-line input by joining lines.

	Args:
	text: String in format "word[start:end] word[start:end]..." (can be multi-line)

	Returns:
	Dictionary with 'word' key containing list of word objects
	"""
	# Join multiple lines into single line for parsing
	single_line_text = ' '.join(line.strip() for line in text.split('\n') if line.strip())
	words = text_to_words(single_line_text)
	return {"word": words}