asigalov61 commited on
Commit
f53e807
·
verified ·
1 Parent(s): 445e7e6

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +2020 -56
TMIDIX.py CHANGED
@@ -1,14 +1,12 @@
1
  #! /usr/bin/python3
2
 
3
-
4
  r'''###############################################################################
5
  ###################################################################################
6
  #
7
  #
8
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
9
- # Version 1.0
10
  #
11
- # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1342
12
  #
13
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
14
  #
@@ -21,19 +19,19 @@ r'''############################################################################
21
  #
22
  ###################################################################################
23
  ###################################################################################
24
- # Copyright 2025 Project Los Angeles / Tegridy Code
25
  #
26
- # Licensed under the Apache License, Version 2.0 (the "License");
27
- # you may not use this file except in compliance with the License.
28
- # You may obtain a copy of the License at
29
  #
30
- # http://www.apache.org/licenses/LICENSE-2.0
31
  #
32
- # Unless required by applicable law or agreed to in writing, software
33
- # distributed under the License is distributed on an "AS IS" BASIS,
34
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
- # See the License for the specific language governing permissions and
36
- # limitations under the License.
37
  ###################################################################################
38
  ###################################################################################
39
  #
@@ -48,9 +46,23 @@ r'''############################################################################
48
  # Copyright 2020 Peter Billam
49
  #
50
  ###################################################################################
51
- ###################################################################################'''
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  import sys, struct, copy
 
54
  Version = '6.7'
55
  VersionDate = '20201120'
56
 
@@ -1440,14 +1452,14 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
1440
  ###################################################################################
1441
  #
1442
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
1443
- # Version 1.0
1444
  #
1445
  # Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
1446
  # pjb.com.au
1447
  #
1448
  # Project Los Angeles
1449
- # Tegridy Code 2021
1450
- # https://github.com/Tegridy-Code/Project-Los-Angeles
 
1451
  #
1452
  ###################################################################################
1453
  ###################################################################################
@@ -1457,8 +1469,6 @@ import os
1457
 
1458
  import datetime
1459
 
1460
- import copy
1461
-
1462
  from datetime import datetime
1463
 
1464
  import secrets
@@ -1475,12 +1485,13 @@ import multiprocessing
1475
 
1476
  from itertools import zip_longest
1477
  from itertools import groupby
 
1478
  from collections import Counter
 
 
1479
 
1480
  from operator import itemgetter
1481
 
1482
- import sys
1483
-
1484
  from abc import ABC, abstractmethod
1485
 
1486
  from difflib import SequenceMatcher as SM
@@ -1490,6 +1501,18 @@ import math
1490
 
1491
  import matplotlib.pyplot as plt
1492
 
 
 
 
 
 
 
 
 
 
 
 
 
1493
  ###################################################################################
1494
  #
1495
  # Original TMIDI Tegridy helper functions
@@ -3842,7 +3865,10 @@ def chordify_score(score,
3842
  else:
3843
  return None
3844
 
3845
- def fix_monophonic_score_durations(monophonic_score):
 
 
 
3846
 
3847
  fixed_score = []
3848
 
@@ -3854,15 +3880,17 @@ def fix_monophonic_score_durations(monophonic_score):
3854
  nmt = monophonic_score[i+1][1]
3855
 
3856
  if note[1]+note[2] >= nmt:
3857
- note_dur = nmt-note[1]-1
3858
  else:
3859
  note_dur = note[2]
3860
 
3861
  new_note = [note[0], note[1], note_dur] + note[3:]
3862
-
3863
- fixed_score.append(new_note)
3864
-
3865
- fixed_score.append(monophonic_score[-1])
 
 
3866
 
3867
  elif type(monophonic_score[0][0]) == int:
3868
 
@@ -3872,15 +3900,17 @@ def fix_monophonic_score_durations(monophonic_score):
3872
  nmt = monophonic_score[i+1][0]
3873
 
3874
  if note[0]+note[1] >= nmt:
3875
- note_dur = nmt-note[0]-1
3876
  else:
3877
  note_dur = note[1]
3878
-
3879
  new_note = [note[0], note_dur] + note[2:]
3880
-
3881
- fixed_score.append(new_note)
3882
-
3883
- fixed_score.append(monophonic_score[-1])
 
 
3884
 
3885
  return fixed_score
3886
 
@@ -4142,15 +4172,16 @@ def tones_chord_to_pitches(tones_chord, base_pitch=60):
4142
  ###################################################################################
4143
 
4144
  def advanced_score_processor(raw_score,
4145
- patches_to_analyze=list(range(129)),
4146
- return_score_analysis=False,
4147
- return_enhanced_score=False,
4148
- return_enhanced_score_notes=False,
4149
- return_enhanced_monophonic_melody=False,
4150
- return_chordified_enhanced_score=False,
4151
- return_chordified_enhanced_score_with_lyrics=False,
4152
- return_score_tones_chords=False,
4153
- return_text_and_lyric_events=False
 
4154
  ):
4155
 
4156
  '''TMIDIX Advanced Score Processor'''
@@ -4179,6 +4210,20 @@ def advanced_score_processor(raw_score,
4179
  basic_single_track_score.append(ev)
4180
  num_tracks += 1
4181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4182
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4183
  basic_single_track_score.sort(key=lambda x: x[1])
4184
 
@@ -4193,7 +4238,7 @@ def advanced_score_processor(raw_score,
4193
  enhanced_single_track_score.append(event)
4194
  num_patch_changes += 1
4195
 
4196
- if event[0] == 'note':
4197
  if event[3] != 9:
4198
  event.extend([patches[event[3]]])
4199
  all_score_patches.extend([patches[event[3]]])
@@ -4693,7 +4738,8 @@ def augment_enhanced_score_notes(enhanced_score_notes,
4693
  ceil_timings=False,
4694
  round_timings=False,
4695
  legacy_timings=True,
4696
- sort_drums_last=False
 
4697
  ):
4698
 
4699
  esn = copy.deepcopy(enhanced_score_notes)
@@ -4736,6 +4782,16 @@ def augment_enhanced_score_notes(enhanced_score_notes,
4736
  e[4] = max(1, min(127, e[4] + pitch_shift))
4737
 
4738
  pe = enhanced_score_notes[i]
 
 
 
 
 
 
 
 
 
 
4739
 
4740
  if full_sorting:
4741
 
@@ -6676,12 +6732,23 @@ def find_next_bar(escore_notes, bar_time, start_note_idx, cur_bar):
6676
  def align_escore_notes_to_bars(escore_notes,
6677
  bar_time=4000,
6678
  trim_durations=False,
6679
- split_durations=False
 
6680
  ):
6681
 
6682
  #=============================================================================
 
 
 
 
 
 
 
 
 
 
6683
 
6684
- aligned_escore_notes = copy.deepcopy(escore_notes)
6685
 
6686
  abs_time = 0
6687
  nidx = 0
@@ -6693,13 +6760,13 @@ def align_escore_notes_to_bars(escore_notes,
6693
 
6694
  while next_bar:
6695
 
6696
- next_bar = find_next_bar(escore_notes, bar_time, nidx, bcount)
6697
 
6698
  if next_bar:
6699
-
6700
- gescore_notes = escore_notes[nidx:next_bar[1]]
6701
  else:
6702
- gescore_notes = escore_notes[nidx:]
6703
 
6704
  original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
6705
  adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
@@ -6714,7 +6781,8 @@ def align_escore_notes_to_bars(escore_notes,
6714
  nidx += 1
6715
 
6716
  if next_bar:
6717
- delta = escore_notes[next_bar[1]][1]-escore_notes[next_bar[1]-1][1]
 
6718
  bcount += 1
6719
 
6720
  #=============================================================================
@@ -11125,13 +11193,17 @@ def escore_notes_core(escore_notes, core_len=128):
11125
 
11126
  ###################################################################################
11127
 
11128
- def multiprocessing_wrapper(function, data_list):
11129
 
11130
  with multiprocessing.Pool() as pool:
11131
 
11132
  results = []
11133
 
11134
- for result in tqdm.tqdm(pool.imap_unordered(function, data_list), total=len(data_list)):
 
 
 
 
11135
  results.append(result)
11136
 
11137
  return results
@@ -11182,7 +11254,1899 @@ def rle_decode_ones(encoding, size=(128, 128)):
11182
  return matrix
11183
 
11184
  ###################################################################################
11185
- #
11186
- # This is the end of the TMIDI X Python module
11187
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11188
  ###################################################################################
 
1
  #! /usr/bin/python3
2
 
 
3
  r'''###############################################################################
4
  ###################################################################################
5
  #
6
  #
7
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 
8
  #
9
+ # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1450
10
  #
11
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
12
  #
 
19
  #
20
  ###################################################################################
21
  ###################################################################################
22
+ # Copyright 2025 Project Los Angeles / Tegridy Code
23
  #
24
+ # Licensed under the Apache License, Version 2.0 (the "License");
25
+ # you may not use this file except in compliance with the License.
26
+ # You may obtain a copy of the License at
27
  #
28
+ # http://www.apache.org/licenses/LICENSE-2.0
29
  #
30
+ # Unless required by applicable law or agreed to in writing, software
31
+ # distributed under the License is distributed on an "AS IS" BASIS,
32
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
33
+ # See the License for the specific language governing permissions and
34
+ # limitations under the License.
35
  ###################################################################################
36
  ###################################################################################
37
  #
 
46
  # Copyright 2020 Peter Billam
47
  #
48
  ###################################################################################
49
+ ###################################################################################
50
+ '''
51
+
52
+ ###################################################################################
53
+
54
+ __version__ = "25.5.6"
55
+
56
+ print('=' * 70)
57
+ print('TMIDIX Python module')
58
+ print('Version:', __version__)
59
+ print('=' * 70)
60
+ print('Loading module...')
61
+
62
+ ###################################################################################
63
 
64
  import sys, struct, copy
65
+
66
  Version = '6.7'
67
  VersionDate = '20201120'
68
 
 
1452
  ###################################################################################
1453
  #
1454
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
 
1455
  #
1456
  # Based upon and includes the amazing MIDI.py module v.6.7. by Peter Billam
1457
  # pjb.com.au
1458
  #
1459
  # Project Los Angeles
1460
+ # Tegridy Code 2025
1461
+ #
1462
+ # https://github.com/Tegridy-Code/Project-Los-Angeles
1463
  #
1464
  ###################################################################################
1465
  ###################################################################################
 
1469
 
1470
  import datetime
1471
 
 
 
1472
  from datetime import datetime
1473
 
1474
  import secrets
 
1485
 
1486
  from itertools import zip_longest
1487
  from itertools import groupby
1488
+
1489
  from collections import Counter
1490
+ from collections import defaultdict
1491
+ from collections import OrderedDict
1492
 
1493
  from operator import itemgetter
1494
 
 
 
1495
  from abc import ABC, abstractmethod
1496
 
1497
  from difflib import SequenceMatcher as SM
 
1501
 
1502
  import matplotlib.pyplot as plt
1503
 
1504
+ import psutil
1505
+
1506
+ import json
1507
+
1508
+ from pathlib import Path
1509
+
1510
+ import shutil
1511
+
1512
+ import hashlib
1513
+
1514
+ from array import array
1515
+
1516
  ###################################################################################
1517
  #
1518
  # Original TMIDI Tegridy helper functions
 
3865
  else:
3866
  return None
3867
 
3868
+ def fix_monophonic_score_durations(monophonic_score,
3869
+ min_notes_gap=1,
3870
+ min_notes_dur=1
3871
+ ):
3872
 
3873
  fixed_score = []
3874
 
 
3880
  nmt = monophonic_score[i+1][1]
3881
 
3882
  if note[1]+note[2] >= nmt:
3883
+ note_dur = max(1, nmt-note[1]-min_notes_gap)
3884
  else:
3885
  note_dur = note[2]
3886
 
3887
  new_note = [note[0], note[1], note_dur] + note[3:]
3888
+
3889
+ if new_note[2] >= min_notes_dur:
3890
+ fixed_score.append(new_note)
3891
+
3892
+ if monophonic_score[-1][2] >= min_notes_dur:
3893
+ fixed_score.append(monophonic_score[-1])
3894
 
3895
  elif type(monophonic_score[0][0]) == int:
3896
 
 
3900
  nmt = monophonic_score[i+1][0]
3901
 
3902
  if note[0]+note[1] >= nmt:
3903
+ note_dur = max(1, nmt-note[0]-min_notes_gap)
3904
  else:
3905
  note_dur = note[1]
3906
+
3907
  new_note = [note[0], note_dur] + note[2:]
3908
+
3909
+ if new_note[1] >= min_notes_dur:
3910
+ fixed_score.append(new_note)
3911
+
3912
+ if monophonic_score[-1][1] >= min_notes_dur:
3913
+ fixed_score.append(monophonic_score[-1])
3914
 
3915
  return fixed_score
3916
 
 
4172
  ###################################################################################
4173
 
4174
  def advanced_score_processor(raw_score,
4175
+ patches_to_analyze=list(range(129)),
4176
+ return_score_analysis=False,
4177
+ return_enhanced_score=False,
4178
+ return_enhanced_score_notes=False,
4179
+ return_enhanced_monophonic_melody=False,
4180
+ return_chordified_enhanced_score=False,
4181
+ return_chordified_enhanced_score_with_lyrics=False,
4182
+ return_score_tones_chords=False,
4183
+ return_text_and_lyric_events=False,
4184
+ apply_sustain=False
4185
  ):
4186
 
4187
  '''TMIDIX Advanced Score Processor'''
 
4210
  basic_single_track_score.append(ev)
4211
  num_tracks += 1
4212
 
4213
+ for e in basic_single_track_score:
4214
+
4215
+ if e[0] == 'note':
4216
+ e[3] = e[3] % 16
4217
+ e[4] = e[4] % 128
4218
+ e[5] = e[5] % 128
4219
+
4220
+ if e[0] == 'patch_change':
4221
+ e[2] = e[2] % 16
4222
+ e[3] = e[3] % 128
4223
+
4224
+ if apply_sustain:
4225
+ apply_sustain_to_ms_score([1000, basic_single_track_score])
4226
+
4227
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4228
  basic_single_track_score.sort(key=lambda x: x[1])
4229
 
 
4238
  enhanced_single_track_score.append(event)
4239
  num_patch_changes += 1
4240
 
4241
+ if event[0] == 'note':
4242
  if event[3] != 9:
4243
  event.extend([patches[event[3]]])
4244
  all_score_patches.extend([patches[event[3]]])
 
4738
  ceil_timings=False,
4739
  round_timings=False,
4740
  legacy_timings=True,
4741
+ sort_drums_last=False,
4742
+ even_timings=False
4743
  ):
4744
 
4745
  esn = copy.deepcopy(enhanced_score_notes)
 
4782
  e[4] = max(1, min(127, e[4] + pitch_shift))
4783
 
4784
  pe = enhanced_score_notes[i]
4785
+
4786
+
4787
+ if even_timings:
4788
+
4789
+ for e in esn:
4790
+ if e[1] % 2 != 0:
4791
+ e[1] += 1
4792
+
4793
+ if e[2] % 2 != 0:
4794
+ e[2] += 1
4795
 
4796
  if full_sorting:
4797
 
 
6732
  def align_escore_notes_to_bars(escore_notes,
6733
  bar_time=4000,
6734
  trim_durations=False,
6735
+ split_durations=False,
6736
+ even_timings=False
6737
  ):
6738
 
6739
  #=============================================================================
6740
+
6741
+ escore = copy.deepcopy(escore_notes)
6742
+
6743
+ if even_timings:
6744
+ for e in escore:
6745
+ if e[1] % 2 != 0:
6746
+ e[1] += 1
6747
+
6748
+ if e[2] % 2 != 0:
6749
+ e[2] += 1
6750
 
6751
+ aligned_escore_notes = copy.deepcopy(escore)
6752
 
6753
  abs_time = 0
6754
  nidx = 0
 
6760
 
6761
  while next_bar:
6762
 
6763
+ next_bar = find_next_bar(escore, bar_time, nidx, bcount)
6764
 
6765
  if next_bar:
6766
+ gescore_notes = escore[nidx:next_bar[1]]
6767
+
6768
  else:
6769
+ gescore_notes = escore[nidx:]
6770
 
6771
  original_timings = [delta] + [(b[1]-a[1]) for a, b in zip(gescore_notes[:-1], gescore_notes[1:])]
6772
  adj_timings = adjust_numbers_to_sum(original_timings, bar_time)
 
6781
  nidx += 1
6782
 
6783
  if next_bar:
6784
+ delta = escore[next_bar[1]][1]-escore[next_bar[1]-1][1]
6785
+
6786
  bcount += 1
6787
 
6788
  #=============================================================================
 
11193
 
11194
  ###################################################################################
11195
 
11196
+ def multiprocessing_wrapper(function, data_list, verbose=True):
11197
 
11198
  with multiprocessing.Pool() as pool:
11199
 
11200
  results = []
11201
 
11202
+ for result in tqdm.tqdm(pool.imap_unordered(function, data_list),
11203
+ total=len(data_list),
11204
+ disable=not verbose
11205
+ ):
11206
+
11207
  results.append(result)
11208
 
11209
  return results
 
11254
  return matrix
11255
 
11256
  ###################################################################################
11257
+
11258
+ def vertical_list_search(list_of_lists, trg_list):
11259
+
11260
+ src_list = list_of_lists
11261
+
11262
+ if not src_list or not trg_list:
11263
+ return []
11264
+
11265
+ num_rows = len(src_list)
11266
+ k = len(trg_list)
11267
+
11268
+ row_sets = [set(row) for row in src_list]
11269
+
11270
+ results = []
11271
+
11272
+ for start in range(num_rows - k + 1):
11273
+ valid = True
11274
+
11275
+ for offset, target in enumerate(trg_list):
11276
+
11277
+ if target not in row_sets[start + offset]:
11278
+ valid = False
11279
+ break
11280
+
11281
+ if valid:
11282
+ results.append(list(range(start, start + k)))
11283
+
11284
+ return results
11285
+
11286
+ ###################################################################################
11287
+
11288
+ def smooth_values(values, window_size=3):
11289
+
11290
+ smoothed = []
11291
+
11292
+ for i in range(len(values)):
11293
+
11294
+ start = max(0, i - window_size // 2)
11295
+ end = min(len(values), i + window_size // 2 + 1)
11296
+
11297
+ window = values[start:end]
11298
+
11299
+ smoothed.append(int(sum(window) / len(window)))
11300
+
11301
+ return smoothed
11302
+
11303
+ ###################################################################################
11304
+
11305
+ def is_mostly_wide_peaks_and_valleys(values,
11306
+ min_range=32,
11307
+ threshold=0.7,
11308
+ smoothing_window=5
11309
+ ):
11310
+
11311
+ if not values:
11312
+ return False
11313
+
11314
+ smoothed_values = smooth_values(values, smoothing_window)
11315
+
11316
+ value_range = max(smoothed_values) - min(smoothed_values)
11317
+
11318
+ if value_range < min_range:
11319
+ return False
11320
+
11321
+ if all(v == smoothed_values[0] for v in smoothed_values):
11322
+ return False
11323
+
11324
+ trend_types = []
11325
+
11326
+ for i in range(1, len(smoothed_values)):
11327
+ if smoothed_values[i] > smoothed_values[i - 1]:
11328
+ trend_types.append(1)
11329
+
11330
+ elif smoothed_values[i] < smoothed_values[i - 1]:
11331
+ trend_types.append(-1)
11332
+
11333
+ else:
11334
+ trend_types.append(0)
11335
+
11336
+ trend_count = trend_types.count(1) + trend_types.count(-1)
11337
+
11338
+ proportion = trend_count / len(trend_types)
11339
+
11340
+ return proportion >= threshold
11341
+
11342
+ ###################################################################################
11343
+
11344
+ def system_memory_utilization(return_dict=False):
11345
+
11346
+ if return_dict:
11347
+ return dict(psutil.virtual_memory()._asdict())
11348
+
11349
+ else:
11350
+ print('RAM memory % used:', psutil.virtual_memory()[2])
11351
+ print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
11352
+
11353
+ ###################################################################################
11354
+
11355
+ def system_cpus_utilization(return_dict=False):
11356
+
11357
+ if return_dict:
11358
+ return {'num_cpus': psutil.cpu_count(),
11359
+ 'cpus_util': psutil.cpu_percent()
11360
+ }
11361
+
11362
+ else:
11363
+ print('Number of CPUs:', psutil.cpu_count())
11364
+ print('CPUs utilization:', psutil.cpu_percent())
11365
+
11366
+ ###################################################################################
11367
+
11368
+ def create_files_list(datasets_paths=['./'],
11369
+ files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'],
11370
+ max_num_files_per_dir=-1,
11371
+ randomize_dir_files=False,
11372
+ max_total_files=-1,
11373
+ randomize_files_list=True,
11374
+ check_for_dupes=False,
11375
+ use_md5_hashes=False,
11376
+ return_dupes=False,
11377
+ verbose=True
11378
+ ):
11379
+
11380
+ if verbose:
11381
+ print('=' * 70)
11382
+ print('Searching for files...')
11383
+ print('This may take a while on a large dataset in particular...')
11384
+ print('=' * 70)
11385
+
11386
+ files_exts = tuple(files_exts)
11387
+
11388
+ filez_set = defaultdict(None)
11389
+ dupes_list = []
11390
+
11391
+ for dataset_addr in datasets_paths:
11392
+
11393
+ print('=' * 70)
11394
+ print('Processing', dataset_addr)
11395
+ print('=' * 70)
11396
+
11397
+ for dirpath, dirnames, filenames in tqdm.tqdm(os.walk(dataset_addr), disable=not verbose):
11398
+
11399
+ if randomize_dir_files:
11400
+ random.shuffle(filenames)
11401
+
11402
+ if max_num_files_per_dir > 0:
11403
+ max_num_files = max_num_files_per_dir
11404
+
11405
+ else:
11406
+ max_num_files = len(filenames)
11407
+
11408
+ for file in filenames[:max_num_files]:
11409
+ if file.endswith(files_exts):
11410
+ if check_for_dupes:
11411
+
11412
+ if use_md5_hashes:
11413
+ md5_hash = hashlib.md5(open(os.path.join(dirpath, file), 'rb').read()).hexdigest()
11414
+
11415
+ if md5_hash not in filez_set:
11416
+ filez_set[md5_hash] = os.path.join(dirpath, file)
11417
+
11418
+ else:
11419
+ dupes_list.append(os.path.join(dirpath, file))
11420
+
11421
+ else:
11422
+ if file not in filez_set:
11423
+ filez_set[file] = os.path.join(dirpath, file)
11424
+
11425
+ else:
11426
+ dupes_list.append(os.path.join(dirpath, file))
11427
+ else:
11428
+ fpath = os.path.join(dirpath, file)
11429
+ filez_set[fpath] = fpath
11430
+
11431
+ filez = list(filez_set.values())
11432
+
11433
+ if verbose:
11434
+ print('Done!')
11435
+ print('=' * 70)
11436
+
11437
+ if filez:
11438
+ if randomize_files_list:
11439
+
11440
+ if verbose:
11441
+ print('Randomizing file list...')
11442
+
11443
+ random.shuffle(filez)
11444
+
11445
+ if verbose:
11446
+ print('Done!')
11447
+ print('=' * 70)
11448
+
11449
+ if verbose:
11450
+ print('Found', len(filez), 'files.')
11451
+ print('Skipped', len(dupes_list), 'duplicate files.')
11452
+ print('=' * 70)
11453
+
11454
+ else:
11455
+ if verbose:
11456
+ print('Could not find any files...')
11457
+ print('Please check dataset dirs and files extensions...')
11458
+ print('=' * 70)
11459
+
11460
+ if max_total_files > 0:
11461
+ if return_dupes:
11462
+ return filez[:max_total_files], dupes_list
11463
+
11464
+ else:
11465
+ return filez[:max_total_files]
11466
+
11467
+ else:
11468
+ if return_dupes:
11469
+ return filez, dupes_list
11470
+
11471
+ else:
11472
+ return filez
11473
+
11474
+ ###################################################################################
11475
+
11476
+ def has_consecutive_trend(nums, count):
11477
+
11478
+ if len(nums) < count:
11479
+ return False
11480
+
11481
+ increasing_streak = 1
11482
+ decreasing_streak = 1
11483
+
11484
+ for i in range(1, len(nums)):
11485
+ if nums[i] > nums[i - 1]:
11486
+ increasing_streak += 1
11487
+ decreasing_streak = 1
11488
+
11489
+ elif nums[i] < nums[i - 1]:
11490
+ decreasing_streak += 1
11491
+ increasing_streak = 1
11492
+
11493
+ else:
11494
+ increasing_streak = decreasing_streak = 1
11495
+
11496
+ if increasing_streak == count or decreasing_streak == count:
11497
+ return True
11498
+
11499
+ return False
11500
+
11501
+ ###################################################################################
11502
+
11503
+ def escore_notes_primary_features(escore_notes):
11504
+
11505
+ #=================================================================
11506
+
11507
+ def mean(values):
11508
+ return sum(values) / len(values) if values else None
11509
+
11510
+ def std(values):
11511
+ if not values:
11512
+ return None
11513
+ m = mean(values)
11514
+ return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
11515
+
11516
+ def skew(values):
11517
+ if not values:
11518
+ return None
11519
+ m = mean(values)
11520
+ s = std(values)
11521
+ if s is None or s == 0:
11522
+ return None
11523
+ return sum(((x - m) / s) ** 3 for x in values) / len(values)
11524
+
11525
+ def kurtosis(values):
11526
+ if not values:
11527
+ return None
11528
+ m = mean(values)
11529
+ s = std(values)
11530
+ if s is None or s == 0:
11531
+ return None
11532
+ return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
11533
+
11534
+ def median(values):
11535
+ if not values:
11536
+ return None
11537
+ srt = sorted(values)
11538
+ n = len(srt)
11539
+ mid = n // 2
11540
+ if n % 2 == 0:
11541
+ return (srt[mid - 1] + srt[mid]) / 2.0
11542
+ return srt[mid]
11543
+
11544
+ def percentile(values, p):
11545
+ if not values:
11546
+ return None
11547
+ srt = sorted(values)
11548
+ n = len(srt)
11549
+ k = (n - 1) * p / 100.0
11550
+ f = int(k)
11551
+ c = k - f
11552
+ if f + 1 < n:
11553
+ return srt[f] * (1 - c) + srt[f + 1] * c
11554
+ return srt[f]
11555
+
11556
+ def diff(values):
11557
+ if not values or len(values) < 2:
11558
+ return []
11559
+ return [values[i + 1] - values[i] for i in range(len(values) - 1)]
11560
+
11561
+ def mad(values):
11562
+ if not values:
11563
+ return None
11564
+ m = median(values)
11565
+ return median([abs(x - m) for x in values])
11566
+
11567
+ def entropy(values):
11568
+ if not values:
11569
+ return None
11570
+ freq = {}
11571
+ for v in values:
11572
+ freq[v] = freq.get(v, 0) + 1
11573
+ total = len(values)
11574
+ ent = 0.0
11575
+ for count in freq.values():
11576
+ p_val = count / total
11577
+ ent -= p_val * math.log2(p_val)
11578
+ return ent
11579
+
11580
+ def mode(values):
11581
+ if not values:
11582
+ return None
11583
+ freq = {}
11584
+ for v in values:
11585
+ freq[v] = freq.get(v, 0) + 1
11586
+ max_count = max(freq.values())
11587
+ modes = [k for k, count in freq.items() if count == max_count]
11588
+ return min(modes)
11589
+
11590
+
11591
+ #=================================================================
11592
+
11593
+ sp_score = solo_piano_escore_notes(escore_notes)
11594
+
11595
+ dscore = delta_score_notes(sp_score)
11596
+
11597
+ seq = []
11598
+
11599
+ for d in dscore:
11600
+ seq.extend([d[1], d[2], d[4]])
11601
+
11602
+ #=================================================================
11603
+
11604
+ n = len(seq)
11605
+ if n % 3 != 0:
11606
+ seq = seq[: n - (n % 3)]
11607
+ arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
11608
+
11609
+ #=================================================================
11610
+
11611
+ features = {}
11612
+
11613
+ delta_times = [row[0] for row in arr]
11614
+ if delta_times:
11615
+ features['delta_times_mean'] = mean(delta_times)
11616
+ features['delta_times_std'] = std(delta_times)
11617
+ features['delta_times_min'] = min(delta_times)
11618
+ features['delta_times_max'] = max(delta_times)
11619
+ features['delta_times_skew'] = skew(delta_times)
11620
+ features['delta_times_kurtosis'] = kurtosis(delta_times)
11621
+ delta_zero_count = sum(1 for x in delta_times if x == 0)
11622
+ features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
11623
+ nonzero_dt = [x for x in delta_times if x != 0]
11624
+ if nonzero_dt:
11625
+ features['delta_times_nonzero_mean'] = mean(nonzero_dt)
11626
+ features['delta_times_nonzero_std'] = std(nonzero_dt)
11627
+ else:
11628
+ features['delta_times_nonzero_mean'] = None
11629
+ features['delta_times_nonzero_std'] = None
11630
+ features['delta_times_mad'] = mad(delta_times)
11631
+ features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
11632
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11633
+ features['delta_times_entropy'] = entropy(delta_times)
11634
+ features['delta_times_range'] = max(delta_times) - min(delta_times)
11635
+ features['delta_times_median'] = median(delta_times)
11636
+ features['delta_times_quantile_25'] = percentile(delta_times, 25)
11637
+ features['delta_times_quantile_75'] = percentile(delta_times, 75)
11638
+ if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
11639
+ features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
11640
+ else:
11641
+ features['delta_times_iqr'] = None
11642
+ else:
11643
+ for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
11644
+ 'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
11645
+ 'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
11646
+ 'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
11647
+ 'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
11648
+ features[key] = None
11649
+
11650
+ #=================================================================
11651
+
11652
+ durations = [row[1] for row in arr]
11653
+ if durations:
11654
+ features['durations_mean'] = mean(durations)
11655
+ features['durations_std'] = std(durations)
11656
+ features['durations_min'] = min(durations)
11657
+ features['durations_max'] = max(durations)
11658
+ features['durations_skew'] = skew(durations)
11659
+ features['durations_kurtosis'] = kurtosis(durations)
11660
+ features['durations_mad'] = mad(durations)
11661
+ features['durations_cv'] = (features['durations_std'] / features['durations_mean']
11662
+ if features['durations_mean'] and features['durations_mean'] != 0 else None)
11663
+ features['durations_entropy'] = entropy(durations)
11664
+ features['durations_range'] = max(durations) - min(durations)
11665
+ features['durations_median'] = median(durations)
11666
+ features['durations_quantile_25'] = percentile(durations, 25)
11667
+ features['durations_quantile_75'] = percentile(durations, 75)
11668
+ if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
11669
+ features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
11670
+ else:
11671
+ features['durations_iqr'] = None
11672
+ else:
11673
+ for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
11674
+ 'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
11675
+ 'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
11676
+ 'durations_quantile_75', 'durations_iqr']:
11677
+ features[key] = None
11678
+
11679
+ #=================================================================
11680
+
11681
+ pitches = [row[2] for row in arr]
11682
+ if pitches:
11683
+ features['pitches_mean'] = mean(pitches)
11684
+ features['pitches_std'] = std(pitches)
11685
+ features['pitches_min'] = min(pitches)
11686
+ features['pitches_max'] = max(pitches)
11687
+ features['pitches_skew'] = skew(pitches)
11688
+ features['pitches_kurtosis'] = kurtosis(pitches)
11689
+ features['pitches_range'] = max(pitches) - min(pitches)
11690
+ features['pitches_median'] = median(pitches)
11691
+ features['pitches_quantile_25'] = percentile(pitches, 25)
11692
+ features['pitches_quantile_75'] = percentile(pitches, 75)
11693
+ if len(pitches) > 1:
11694
+ dps = diff(pitches)
11695
+ features['pitches_diff_mean'] = mean(dps)
11696
+ features['pitches_diff_std'] = std(dps)
11697
+ else:
11698
+ features['pitches_diff_mean'] = None
11699
+ features['pitches_diff_std'] = None
11700
+ features['pitches_mad'] = mad(pitches)
11701
+ if len(pitches) > 2:
11702
+ peaks = sum(1 for i in range(1, len(pitches)-1)
11703
+ if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
11704
+ valleys = sum(1 for i in range(1, len(pitches)-1)
11705
+ if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
11706
+ else:
11707
+ peaks, valleys = None, None
11708
+ features['pitches_peak_count'] = peaks
11709
+ features['pitches_valley_count'] = valleys
11710
+ if len(pitches) > 1:
11711
+ x = list(range(len(pitches)))
11712
+ denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
11713
+ if denominator != 0:
11714
+ slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
11715
+ sum(x) * sum(pitches)) / denominator
11716
+ else:
11717
+ slope = None
11718
+ features['pitches_trend_slope'] = slope
11719
+ else:
11720
+ features['pitches_trend_slope'] = None
11721
+
11722
+ features['pitches_unique_count'] = len(set(pitches))
11723
+ pitch_class_hist = {i: 0 for i in range(12)}
11724
+ for p in pitches:
11725
+ pitch_class_hist[p % 12] += 1
11726
+ total_pitch = len(pitches)
11727
+ for i in range(12):
11728
+ features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
11729
+
11730
+ max_asc = 0
11731
+ cur_asc = 0
11732
+ max_desc = 0
11733
+ cur_desc = 0
11734
+ for i in range(1, len(pitches)):
11735
+ if pitches[i] > pitches[i-1]:
11736
+ cur_asc += 1
11737
+ max_asc = max(max_asc, cur_asc)
11738
+ cur_desc = 0
11739
+ elif pitches[i] < pitches[i-1]:
11740
+ cur_desc += 1
11741
+ max_desc = max(max_desc, cur_desc)
11742
+ cur_asc = 0
11743
+ else:
11744
+ cur_asc = 0
11745
+ cur_desc = 0
11746
+ features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
11747
+ features['pitches_max_consecutive_descending'] = max_desc if pitches else None
11748
+ p_intervals = diff(pitches)
11749
+ features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
11750
+ if p_intervals:
11751
+ dc = sum(1 for i in range(1, len(p_intervals))
11752
+ if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
11753
+ features['pitches_direction_changes'] = dc
11754
+ else:
11755
+ features['pitches_direction_changes'] = None
11756
+ else:
11757
+ for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
11758
+ 'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
11759
+ 'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
11760
+ 'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
11761
+ 'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
11762
+ 'pitches_median_diff', 'pitches_direction_changes'] +
11763
+ [f'pitches_pc_{i}' for i in range(12)]):
11764
+ features[key] = None
11765
+
11766
+ #=================================================================
11767
+
11768
+ overall = [x for row in arr for x in row]
11769
+ if overall:
11770
+ features['overall_mean'] = mean(overall)
11771
+ features['overall_std'] = std(overall)
11772
+ features['overall_min'] = min(overall)
11773
+ features['overall_max'] = max(overall)
11774
+ features['overall_cv'] = (features['overall_std'] / features['overall_mean']
11775
+ if features['overall_mean'] and features['overall_mean'] != 0 else None)
11776
+ else:
11777
+ for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
11778
+ features[key] = None
11779
+
11780
+ #=================================================================
11781
+
11782
+ onsets = []
11783
+ cumulative = 0
11784
+ for dt in delta_times:
11785
+ onsets.append(cumulative)
11786
+ cumulative += dt
11787
+ if onsets and durations:
11788
+ overall_piece_duration = onsets[-1] + durations[-1]
11789
+ else:
11790
+ overall_piece_duration = None
11791
+ features['overall_piece_duration'] = overall_piece_duration
11792
+ features['overall_notes_density'] = (len(arr) / overall_piece_duration
11793
+ if overall_piece_duration and overall_piece_duration > 0 else None)
11794
+ features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
11795
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11796
+ features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
11797
+ features['overall_sum_durations'] = (sum(durations) if durations else None)
11798
+ features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
11799
+ if overall_piece_duration and durations else None)
11800
+ features['overall_onset_std'] = std(onsets) if onsets else None
11801
+
11802
+ #=================================================================
11803
+
11804
+ chords_raw = []
11805
+ chords_pc = []
11806
+ current_group = []
11807
+ for i, note in enumerate(arr):
11808
+ dt = note[0]
11809
+ if i == 0:
11810
+ current_group = [i]
11811
+ else:
11812
+ if dt == 0:
11813
+ current_group.append(i)
11814
+ else:
11815
+ if len(current_group) >= 2:
11816
+ chord_notes = [arr[j][2] for j in current_group]
11817
+ chords_raw.append(tuple(sorted(chord_notes)))
11818
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11819
+
11820
+ current_group = [i]
11821
+
11822
+ if current_group and len(current_group) >= 2:
11823
+ chord_notes = [arr[j][2] for j in current_group]
11824
+ chords_raw.append(tuple(sorted(chord_notes)))
11825
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11826
+
11827
+ if chords_raw:
11828
+ chord_count = len(chords_raw)
11829
+ features['chords_count'] = chord_count
11830
+ features['chords_density'] = (chord_count / overall_piece_duration
11831
+ if overall_piece_duration and chord_count is not None else None)
11832
+ chord_sizes = [len(ch) for ch in chords_raw]
11833
+ features['chords_size_mean'] = mean(chord_sizes)
11834
+ features['chords_size_std'] = std(chord_sizes)
11835
+ features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
11836
+ features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
11837
+ features['chords_unique_raw_count'] = len(set(chords_raw))
11838
+ features['chords_unique_pc_count'] = len(set(chords_pc))
11839
+ features['chords_entropy_raw'] = entropy(chords_raw)
11840
+ features['chords_entropy_pc'] = entropy(chords_pc)
11841
+ if len(chords_raw) > 1:
11842
+ rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
11843
+ features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
11844
+ else:
11845
+ features['chords_repeat_ratio_raw'] = None
11846
+ if len(chords_pc) > 1:
11847
+ rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
11848
+ features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
11849
+ else:
11850
+ features['chords_repeat_ratio_pc'] = None
11851
+ if len(chords_raw) > 1:
11852
+ bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
11853
+ features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
11854
+ else:
11855
+ features['chords_bigram_entropy_raw'] = None
11856
+ if len(chords_pc) > 1:
11857
+ bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
11858
+ features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
11859
+ else:
11860
+ features['chords_bigram_entropy_pc'] = None
11861
+ features['chords_mode_raw'] = mode(chords_raw)
11862
+ features['chords_mode_pc'] = mode(chords_pc)
11863
+ if chords_pc:
11864
+ pc_sizes = [len(ch) for ch in chords_pc]
11865
+ features['chords_pc_size_mean'] = mean(pc_sizes)
11866
+ else:
11867
+ features['chords_pc_size_mean'] = None
11868
+ else:
11869
+ for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
11870
+ 'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
11871
+ 'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
11872
+ 'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
11873
+ 'chords_pc_size_mean']:
11874
+ features[key] = None
11875
+
11876
+ #=================================================================
11877
+
11878
+ if delta_times:
11879
+ med_dt = features['delta_times_median']
11880
+ iqr_dt = features['delta_times_iqr']
11881
+ threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
11882
+ threshold_b = percentile(delta_times, 90)
11883
+ if threshold_a is not None and threshold_b is not None:
11884
+ phrase_threshold = max(threshold_a, threshold_b)
11885
+ elif threshold_a is not None:
11886
+ phrase_threshold = threshold_a
11887
+ elif threshold_b is not None:
11888
+ phrase_threshold = threshold_b
11889
+ else:
11890
+ phrase_threshold = None
11891
+ else:
11892
+ phrase_threshold = None
11893
+
11894
+ phrases = []
11895
+ current_phrase = []
11896
+ if onsets:
11897
+ current_phrase.append(0)
11898
+ for i in range(len(onsets) - 1):
11899
+ gap = onsets[i + 1] - onsets[i]
11900
+ if phrase_threshold is not None and gap > phrase_threshold:
11901
+ phrases.append(current_phrase)
11902
+ current_phrase = []
11903
+ current_phrase.append(i + 1)
11904
+ if current_phrase:
11905
+ phrases.append(current_phrase)
11906
+ if phrases:
11907
+ phrase_note_counts = []
11908
+ phrase_durations = []
11909
+ phrase_densities = []
11910
+ phrase_mean_pitches = []
11911
+ phrase_pitch_ranges = []
11912
+ phrase_start_times = []
11913
+ phrase_end_times = []
11914
+ for phrase in phrases:
11915
+ note_count = len(phrase)
11916
+ phrase_note_counts.append(note_count)
11917
+ ph_start = onsets[phrase[0]]
11918
+ ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
11919
+ phrase_start_times.append(ph_start)
11920
+ phrase_end_times.append(ph_end)
11921
+ ph_duration = ph_end - ph_start
11922
+ phrase_durations.append(ph_duration)
11923
+ density = note_count / ph_duration if ph_duration > 0 else None
11924
+ phrase_densities.append(density)
11925
+ ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
11926
+ phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
11927
+ phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
11928
+ if len(phrases) > 1:
11929
+ phrase_gaps = []
11930
+ for i in range(len(phrases) - 1):
11931
+ gap = phrase_start_times[i + 1] - phrase_end_times[i]
11932
+ phrase_gaps.append(gap if gap > 0 else 0)
11933
+ else:
11934
+ phrase_gaps = []
11935
+ features['phrases_count'] = len(phrases)
11936
+ features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
11937
+ features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
11938
+ features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
11939
+ features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
11940
+ features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
11941
+ features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
11942
+ features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
11943
+ features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
11944
+ features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
11945
+ features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
11946
+ features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
11947
+ features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
11948
+ if phrase_gaps:
11949
+ features['phrases_avg_gap'] = mean(phrase_gaps)
11950
+ features['phrases_std_gap'] = std(phrase_gaps)
11951
+ features['phrases_min_gap'] = min(phrase_gaps)
11952
+ features['phrases_max_gap'] = max(phrase_gaps)
11953
+ else:
11954
+ features['phrases_avg_gap'] = None
11955
+ features['phrases_std_gap'] = None
11956
+ features['phrases_min_gap'] = None
11957
+ features['phrases_max_gap'] = None
11958
+ features['phrases_threshold'] = phrase_threshold
11959
+ else:
11960
+ for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
11961
+ 'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
11962
+ 'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
11963
+ 'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
11964
+ 'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
11965
+ 'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
11966
+ features[key] = None
11967
+
11968
+ #=================================================================
11969
+
11970
+ return features
11971
+
11972
+ ###################################################################################
11973
+
11974
+ def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
11975
+
11976
+ #=================================================================
11977
+
11978
+ new_min, new_max = new_range
11979
+
11980
+ #=================================================================
11981
+
11982
+ def percentile(values, p):
11983
+
11984
+ srt = sorted(values)
11985
+ n = len(srt)
11986
+ if n == 1:
11987
+ return srt[0]
11988
+ k = (n - 1) * p / 100.0
11989
+ f = int(k)
11990
+ c = k - f
11991
+ if f + 1 < n:
11992
+ return srt[f] * (1 - c) + srt[f + 1] * c
11993
+
11994
+ return srt[f]
11995
+
11996
+ #=================================================================
11997
+
11998
+ q1 = percentile(data, 25)
11999
+ q3 = percentile(data, 75)
12000
+ iqr = q3 - q1
12001
+
12002
+ lower_bound_w = q1 - clip * iqr
12003
+ upper_bound_w = q3 + clip * iqr
12004
+
12005
+ data_min = min(data)
12006
+ data_max = max(data)
12007
+ effective_low = max(lower_bound_w, data_min)
12008
+ effective_high = min(upper_bound_w, data_max)
12009
+
12010
+ #=================================================================
12011
+
12012
+ if effective_high == effective_low:
12013
+
12014
+ if data_max == data_min:
12015
+ return [int(new_min)] * len(data)
12016
+
12017
+ normalized = [(x - data_min) / (data_max - data_min) for x in data]
12018
+
12019
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
12020
+
12021
+ #=================================================================
12022
+
12023
+ clipped = [x if x >= effective_low else effective_low for x in data]
12024
+ clipped = [x if x <= effective_high else effective_high for x in clipped]
12025
+
12026
+ normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
12027
+
12028
+ #=================================================================
12029
+
12030
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
12031
+
12032
+ ###################################################################################
12033
+
12034
+ def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
12035
+
12036
+ values = []
12037
+ tokens = []
12038
+
12039
+ #=================================================================
12040
+
12041
+ def process_value(val):
12042
+
12043
+ if isinstance(val, (int, float)):
12044
+ return int(round(abs(val)))
12045
+
12046
+ elif isinstance(val, (list, tuple)):
12047
+ return int(round(abs(sum(val) / len(val))))
12048
+
12049
+ else:
12050
+ return int(abs(hash(val)) % (10 ** 8))
12051
+
12052
+ #=================================================================
12053
+
12054
+ for key in sorted(features.keys()):
12055
+
12056
+ value = features[key]
12057
+
12058
+ if value is None:
12059
+ tokens.append(none_token)
12060
+ values.append(none_token)
12061
+
12062
+ else:
12063
+ tokens.append(process_value(value))
12064
+
12065
+ if isinstance(value, (list, tuple)):
12066
+ values.append(sum(value) / len(value))
12067
+
12068
+ else:
12069
+ values.append(value)
12070
+
12071
+ #=================================================================
12072
+
12073
+ norm_tokens = winsorized_normalize(tokens, new_range, clip)
12074
+
12075
+ #=================================================================
12076
+
12077
+ return values, tokens, norm_tokens
12078
+
12079
+ ###################################################################################
12080
+
12081
+ def write_jsonl(records_dicts_list,
12082
+ file_name='data',
12083
+ file_ext='.jsonl',
12084
+ file_mode='w',
12085
+ line_sep='\n',
12086
+ verbose=True
12087
+ ):
12088
+
12089
+ if verbose:
12090
+ print('=' * 70)
12091
+ print('Writing', len(records_dicts_list), 'records to jsonl file...')
12092
+ print('=' * 70)
12093
+
12094
+ if not os.path.splitext(file_name)[1]:
12095
+ file_name += file_ext
12096
+
12097
+ l_count = 0
12098
+
12099
+ with open(file_name, mode=file_mode) as f:
12100
+ for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
12101
+ f.write(json.dumps(record) + line_sep)
12102
+ l_count += 1
12103
+
12104
+ f.close()
12105
+
12106
+ if verbose:
12107
+ print('=' * 70)
12108
+ print('Written total of', l_count, 'jsonl records.')
12109
+ print('=' * 70)
12110
+ print('Done!')
12111
+ print('=' * 70)
12112
+
12113
+ ###################################################################################
12114
+
12115
+ def read_jsonl(file_name='data',
12116
+ file_ext='.jsonl',
12117
+ verbose=True
12118
+ ):
12119
+
12120
+ if verbose:
12121
+ print('=' * 70)
12122
+ print('Reading jsonl file...')
12123
+ print('=' * 70)
12124
+
12125
+ if not os.path.splitext(file_name)[1]:
12126
+ file_name += file_ext
12127
+
12128
+ with open(file_name, 'r') as f:
12129
+
12130
+ records = []
12131
+ gl_count = 0
12132
+
12133
+ for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
12134
+
12135
+ try:
12136
+ record = json.loads(line)
12137
+ records.append(record)
12138
+ gl_count += 1
12139
+
12140
+ except KeyboardInterrupt:
12141
+ if verbose:
12142
+ print('=' * 70)
12143
+ print('Stoping...')
12144
+ print('=' * 70)
12145
+
12146
+ f.close()
12147
+
12148
+ return records
12149
+
12150
+ except json.JSONDecodeError:
12151
+ if verbose:
12152
+ print('=' * 70)
12153
+ print('[ERROR] Line', i, 'is corrupted! Skipping it...')
12154
+ print('=' * 70)
12155
+
12156
+ continue
12157
+
12158
+ f.close()
12159
+
12160
+ if verbose:
12161
+ print('=' * 70)
12162
+ print('Loaded total of', gl_count, 'jsonl records.')
12163
+ print('=' * 70)
12164
+ print('Done!')
12165
+ print('=' * 70)
12166
+
12167
+ return records
12168
+
12169
+ ###################################################################################
12170
+
12171
+ def read_jsonl_lines(lines_indexes_list,
12172
+ file_name='data',
12173
+ file_ext='.jsonl',
12174
+ verbose=True
12175
+ ):
12176
+
12177
+ if verbose:
12178
+ print('=' * 70)
12179
+ print('Reading jsonl file...')
12180
+ print('=' * 70)
12181
+
12182
+ if not os.path.splitext(file_name)[1]:
12183
+ file_name += file_ext
12184
+
12185
+ records = []
12186
+ l_count = 0
12187
+
12188
+ lines_indexes_list.sort(reverse=True)
12189
+
12190
+ with open(file_name, 'r') as f:
12191
+ for current_line_number, line in tqdm.tqdm(enumerate(f)):
12192
+
12193
+ try:
12194
+ if current_line_number in lines_indexes_list:
12195
+ record = json.loads(line)
12196
+ records.append(record)
12197
+ lines_indexes_list = lines_indexes_list[:-1]
12198
+ l_count += 1
12199
+
12200
+ if not lines_indexes_list:
12201
+ break
12202
+
12203
+ except KeyboardInterrupt:
12204
+ if verbose:
12205
+ print('=' * 70)
12206
+ print('Stoping...')
12207
+ print('=' * 70)
12208
+
12209
+ f.close()
12210
+
12211
+ return records
12212
+
12213
+ except json.JSONDecodeError:
12214
+ if verbose:
12215
+ print('=' * 70)
12216
+ print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
12217
+ print('=' * 70)
12218
+
12219
+ continue
12220
+
12221
+ f.close()
12222
+
12223
+ if verbose:
12224
+ print('=' * 70)
12225
+ print('Loaded total of', l_count, 'jsonl records.')
12226
+ print('=' * 70)
12227
+ print('Done!')
12228
+ print('=' * 70)
12229
+
12230
+ return records
12231
+
12232
+ ###################################################################################
12233
+
12234
+ def compute_base(x: int, n: int) -> int:
12235
+
12236
+ if x < 0:
12237
+ raise ValueError("x must be non-negative.")
12238
+ if x == 0:
12239
+ return 2
12240
+
12241
+ b = max(2, int(x ** (1 / n)))
12242
+
12243
+ if b ** n <= x:
12244
+ b += 1
12245
+
12246
+ return b
12247
+
12248
+ ###################################################################################
12249
+
12250
+ def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
12251
+
12252
+ base = compute_base(x, n)
12253
+ digits = [0] * n
12254
+
12255
+ for i in range(n - 1, -1, -1):
12256
+ digits[i] = x % base
12257
+ x //= base
12258
+
12259
+ return base, digits
12260
+
12261
+ ###################################################################################
12262
+
12263
+ def decode_int_auto(base: int, digits: list[int]) -> int:
12264
+
12265
+ x = 0
12266
+ for digit in digits:
12267
+ if digit < 0 or digit >= base:
12268
+ raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
12269
+
12270
+ x = x * base + digit
12271
+
12272
+ return x
12273
+
12274
+ ###################################################################################
12275
+
12276
+ def encode_int_manual(x, base, n):
12277
+
12278
+ digits = [0] * n
12279
+
12280
+ for i in range(n - 1, -1, -1):
12281
+ digits[i] = x % base
12282
+ x //= base
12283
+
12284
+ return digits
12285
+
12286
+ ###################################################################################
12287
+
12288
+ def escore_notes_pitches_chords_signature(escore_notes,
12289
+ max_patch=128,
12290
+ sort_by_counts=False,
12291
+ use_full_chords=False
12292
+ ):
12293
+
12294
+ if use_full_chords:
12295
+ CHORDS = ALL_CHORDS_FULL
12296
+
12297
+ else:
12298
+ CHORDS = ALL_CHORDS_SORTED
12299
+
12300
+ max_patch = max(0, min(128, max_patch))
12301
+
12302
+ escore_notes = [e for e in escore_notes if e[6] <= max_patch]
12303
+
12304
+ if escore_notes:
12305
+
12306
+ cscore = chordify_score([1000, escore_notes])
12307
+
12308
+ sig = []
12309
+ dsig = []
12310
+
12311
+ drums_offset = len(CHORDS) + 128
12312
+
12313
+ bad_chords_counter = 0
12314
+
12315
+ for c in cscore:
12316
+
12317
+ all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
12318
+ chord = sorted(set(all_pitches))
12319
+
12320
+ pitches = sorted([p for p in chord if p < 128], reverse=True)
12321
+ drums = [(d+drums_offset)-128 for d in chord if d > 127]
12322
+
12323
+ if pitches:
12324
+ if len(pitches) > 1:
12325
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12326
+
12327
+ try:
12328
+ sig_token = CHORDS.index(tones_chord) + 128
12329
+ except:
12330
+ checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
12331
+ sig_token = CHORDS.index(checked_tones_chord) + 128
12332
+ bad_chords_counter += 1
12333
+
12334
+ elif len(pitches) == 1:
12335
+ sig_token = pitches[0]
12336
+
12337
+ sig.append(sig_token)
12338
+
12339
+ if drums:
12340
+ dsig.extend(drums)
12341
+
12342
+ sig_p = {}
12343
+
12344
+ for item in sig+dsig:
12345
+
12346
+ if item in sig_p:
12347
+ sig_p[item] += 1
12348
+
12349
+ else:
12350
+ sig_p[item] = 1
12351
+
12352
+ sig_p[-1] = bad_chords_counter
12353
+
12354
+ fsig = [list(v) for v in sig_p.items()]
12355
+
12356
+ if sort_by_counts:
12357
+ fsig.sort(key=lambda x: x[1], reverse=True)
12358
+
12359
+ return fsig
12360
+
12361
+ else:
12362
+ return []
12363
+
12364
+ ###################################################################################
12365
+
12366
+ def compute_sustain_intervals(events):
12367
+
12368
+ intervals = []
12369
+ pedal_on = False
12370
+ current_start = None
12371
+
12372
+ for t, cc in events:
12373
+ if not pedal_on and cc >= 64:
12374
+
12375
+ pedal_on = True
12376
+ current_start = t
12377
+ elif pedal_on and cc < 64:
12378
+
12379
+ pedal_on = False
12380
+ intervals.append((current_start, t))
12381
+ current_start = None
12382
+
12383
+ if pedal_on:
12384
+ intervals.append((current_start, float('inf')))
12385
+
12386
+ merged = []
12387
+
12388
+ for interval in intervals:
12389
+ if merged and interval[0] <= merged[-1][1]:
12390
+ merged[-1] = (merged[-1][0], max(merged[-1][1], interval[1]))
12391
+ else:
12392
+ merged.append(interval)
12393
+ return merged
12394
+
12395
+ ###################################################################################
12396
+
12397
+ def apply_sustain_to_ms_score(score):
12398
+
12399
+ sustain_by_channel = {}
12400
+
12401
+ for track in score[1:]:
12402
+ for event in track:
12403
+ if event[0] == 'control_change' and event[3] == 64:
12404
+ channel = event[2]
12405
+ sustain_by_channel.setdefault(channel, []).append((event[1], event[4]))
12406
+
12407
+ sustain_intervals_by_channel = {}
12408
+
12409
+ for channel, events in sustain_by_channel.items():
12410
+ events.sort(key=lambda x: x[0])
12411
+ sustain_intervals_by_channel[channel] = compute_sustain_intervals(events)
12412
+
12413
+ global_max_off = 0
12414
+
12415
+ for track in score[1:]:
12416
+ for event in track:
12417
+ if event[0] == 'note':
12418
+ global_max_off = max(global_max_off, event[1] + event[2])
12419
+
12420
+ for channel, intervals in sustain_intervals_by_channel.items():
12421
+ updated_intervals = []
12422
+ for start, end in intervals:
12423
+ if end == float('inf'):
12424
+ end = global_max_off
12425
+ updated_intervals.append((start, end))
12426
+ sustain_intervals_by_channel[channel] = updated_intervals
12427
+
12428
+ if sustain_intervals_by_channel:
12429
+
12430
+ for track in score[1:]:
12431
+ for event in track:
12432
+ if event[0] == 'note':
12433
+ start = event[1]
12434
+ nominal_dur = event[2]
12435
+ nominal_off = start + nominal_dur
12436
+ channel = event[3]
12437
+
12438
+ intervals = sustain_intervals_by_channel.get(channel, [])
12439
+ effective_off = nominal_off
12440
+
12441
+ for intv_start, intv_end in intervals:
12442
+ if intv_start < nominal_off < intv_end:
12443
+ effective_off = intv_end
12444
+ break
12445
+
12446
+ effective_dur = effective_off - start
12447
+
12448
+ event[2] = effective_dur
12449
+
12450
+ return score
12451
+
12452
+ ###################################################################################
12453
+
12454
+ def copy_file(src_file: str, trg_dir: str, add_subdir: bool = False, verbose: bool = False):
12455
+
12456
+ src_path = Path(src_file)
12457
+ target_directory = Path(trg_dir)
12458
+
12459
+ if not src_path.is_file():
12460
+ if verbose:
12461
+ print("Source file does not exist or is not a file.")
12462
+
12463
+ return None
12464
+
12465
+ target_directory.mkdir(parents=True, exist_ok=True)
12466
+
12467
+ if add_subdir:
12468
+ first_letter = src_path.name[0]
12469
+ target_directory = target_directory / first_letter
12470
+ target_directory.mkdir(parents=True, exist_ok=True)
12471
+
12472
+ destination = target_directory / src_path.name
12473
+
12474
+ try:
12475
+ shutil.copy2(src_path, destination)
12476
+
12477
+ except:
12478
+ if verbose:
12479
+ print('File could not be copied!')
12480
+
12481
+ return None
12482
+
12483
+ if verbose:
12484
+ print('File copied!')
12485
+
12486
+ return None
12487
+
12488
+ ###################################################################################
12489
+
12490
+ def escore_notes_even_timings(escore_notes, in_place=True):
12491
+
12492
+ if in_place:
12493
+ for e in escore_notes:
12494
+ if e[1] % 2 != 0:
12495
+ e[1] += 1
12496
+
12497
+ if e[2] % 2 != 0:
12498
+ e[2] += 1
12499
+
12500
+ return []
12501
+
12502
+ else:
12503
+ escore = copy.deepcopy(escore_notes)
12504
+
12505
+ for e in escore:
12506
+ if e[1] % 2 != 0:
12507
+ e[1] += 1
12508
+
12509
+ if e[2] % 2 != 0:
12510
+ e[2] += 1
12511
+
12512
+ return escore
12513
+
12514
+ ###################################################################################
12515
+
12516
+ def both_chords(chord1, chord2, merge_threshold=2):
12517
+
12518
+ if len(chord1) > 1 and len(chord2) > 0 and chord2[0][1]-chord1[0][1] <= merge_threshold:
12519
+ return True
12520
+
12521
+ elif len(chord1) > 0 and len(chord2) > 1 and chord2[0][1]-chord1[0][1] <= merge_threshold:
12522
+ return True
12523
+
12524
+ else:
12525
+ return False
12526
+
12527
+ def merge_chords(chord1, chord2, sort_drums_last=False):
12528
+
12529
+ mchord = chord1
12530
+
12531
+ seen = []
12532
+
12533
+ for e in chord2:
12534
+ if tuple([e[4], e[6]]) not in seen:
12535
+ mchord.append(e)
12536
+ seen.append(tuple([e[4], e[6]]))
12537
+
12538
+ for e in mchord[1:]:
12539
+ e[1] = mchord[0][1]
12540
+
12541
+ if sort_drums_last:
12542
+ mchord.sort(key=lambda x: (-x[4], x[6]) if x[6] != 128 else (x[6], -x[4]))
12543
+
12544
+ else:
12545
+ mchord.sort(key=lambda x: (-x[4], x[6]))
12546
+
12547
+ return mchord
12548
+
12549
+ def merge_escore_notes(escore_notes, merge_threshold=2, sort_drums_last=False):
12550
+
12551
+ cscore = chordify_score([1000, escore_notes])
12552
+
12553
+ merged_chords = []
12554
+ merged_chord = cscore[0]
12555
+
12556
+ for i in range(1, len(cscore)):
12557
+
12558
+ cchord = cscore[i]
12559
+
12560
+ if both_chords(merged_chord, cchord, merge_threshold=merge_threshold):
12561
+ merged_chord = merge_chords(merged_chord, cchord, sort_drums_last=sort_drums_last)
12562
+
12563
+ else:
12564
+ merged_chords.append(merged_chord)
12565
+ merged_chord = cchord
12566
+
12567
+ return flatten(merged_chords)
12568
+
12569
+ ###################################################################################
12570
+
12571
+ def solo_piano_escore_notes_tokenized(escore_notes,
12572
+ compress_start_times=True,
12573
+ encode_velocities=False,
12574
+ verbose=False
12575
+ ):
12576
+
12577
+ if verbose:
12578
+ print('=' * 70)
12579
+ print('Encoding MIDI...')
12580
+
12581
+ sp_escore_notes = solo_piano_escore_notes(escore_notes)
12582
+ zscore = recalculate_score_timings(sp_escore_notes)
12583
+ dscore = delta_score_notes(zscore, timings_clip_value=127)
12584
+
12585
+ score = []
12586
+
12587
+ notes_counter = 0
12588
+ chords_counter = 1
12589
+
12590
+ for i, e in enumerate(dscore):
12591
+
12592
+ dtime = e[1]
12593
+ dur = e[2]
12594
+ ptc = e[4]
12595
+ vel = e[5]
12596
+
12597
+ if compress_start_times:
12598
+
12599
+ if i == 0:
12600
+ score.extend([0, dur+128, ptc+256])
12601
+
12602
+ if encode_velocities:
12603
+ score.append(vel+384)
12604
+
12605
+ else:
12606
+ if dtime == 0:
12607
+ score.extend([dur+128, ptc+256])
12608
+
12609
+ else:
12610
+ score.extend([dtime, dur+128, ptc+256])
12611
+
12612
+ if encode_velocities:
12613
+ score.append(vel+384)
12614
+
12615
+ if dtime != 0:
12616
+ chords_counter += 1
12617
+
12618
+ else:
12619
+ score.extend([dtime, dur+128, ptc+256])
12620
+
12621
+ if encode_velocities:
12622
+ score.append(vel+384)
12623
+
12624
+ if dtime != 0:
12625
+ chords_counter += 1
12626
+
12627
+ notes_counter += 1
12628
+
12629
+ if verbose:
12630
+ print('Done!')
12631
+ print('=' * 70)
12632
+
12633
+ print('Source MIDI composition has', len(zscore), 'notes')
12634
+ print('Source MIDI composition has', len([d[1] for d in dscore if d[1] !=0 ])+1, 'chords')
12635
+ print('-' * 70)
12636
+ print('Encoded sequence has', notes_counter, 'pitches')
12637
+ print('Encoded sequence has', chords_counter, 'chords')
12638
+ print('-' * 70)
12639
+ print('Final encoded sequence has', len(score), 'tokens')
12640
+ print('=' * 70)
12641
+
12642
+ return score
12643
+
12644
+ ###################################################################################
12645
+
12646
+ def equalize_closest_elements_dynamic(seq,
12647
+ min_val=128,
12648
+ max_val=256,
12649
+ splitting_factor=1.5,
12650
+ tightness_threshold=0.15
12651
+ ):
12652
+
12653
+ candidates = [(i, x) for i, x in enumerate(seq) if min_val <= x <= max_val]
12654
+
12655
+ if len(candidates) < 2:
12656
+ return seq.copy()
12657
+
12658
+ sorted_candidates = sorted(candidates, key=lambda pair: pair[1])
12659
+ candidate_values = [val for _, val in sorted_candidates]
12660
+
12661
+ differences = [candidate_values[i+1] - candidate_values[i] for i in range(len(candidate_values)-1)]
12662
+
12663
+ def median(lst):
12664
+
12665
+ n = len(lst)
12666
+ sorted_lst = sorted(lst)
12667
+ mid = n // 2
12668
+
12669
+ if n % 2 == 0:
12670
+ return (sorted_lst[mid - 1] + sorted_lst[mid]) / 2.0
12671
+
12672
+ else:
12673
+ return sorted_lst[mid]
12674
+
12675
+ med_diff = median(differences)
12676
+
12677
+ split_indices = [i for i, diff in enumerate(differences) if diff > splitting_factor * med_diff]
12678
+
12679
+ clusters = []
12680
+
12681
+ if split_indices:
12682
+ start = 0
12683
+ for split_index in split_indices:
12684
+ clusters.append(sorted_candidates[start:split_index+1])
12685
+ start = split_index + 1
12686
+ clusters.append(sorted_candidates[start:])
12687
+
12688
+ else:
12689
+ clusters = [sorted_candidates]
12690
+
12691
+
12692
+ valid_clusters = [cluster for cluster in clusters if len(cluster) >= 2]
12693
+ if not valid_clusters:
12694
+ return seq.copy()
12695
+
12696
+ def cluster_spread(cluster):
12697
+ values = [val for (_, val) in cluster]
12698
+ return max(values) - min(values)
12699
+
12700
+ valid_clusters.sort(key=lambda cluster: (len(cluster), -cluster_spread(cluster)), reverse=True)
12701
+ selected_cluster = valid_clusters[0]
12702
+
12703
+ allowed_range_width = max_val - min_val
12704
+ spread = cluster_spread(selected_cluster)
12705
+ ratio = spread / allowed_range_width
12706
+
12707
+ if ratio > tightness_threshold:
12708
+ return seq.copy()
12709
+
12710
+ cluster_values = [val for (_, val) in selected_cluster]
12711
+ equal_value = sum(cluster_values) // len(cluster_values)
12712
+
12713
+
12714
+ result = list(seq)
12715
+ for idx, _ in selected_cluster:
12716
+ result[idx] = equal_value
12717
+
12718
+ return result
12719
+
12720
+ ###################################################################################
12721
+
12722
+ def chunk_list(lst, chunk_size):
12723
+ return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
12724
+
12725
+ ###################################################################################
12726
+
12727
+ def compress_tokens_sequence(seq,
12728
+ min_val=128,
12729
+ max_val=256,
12730
+ group_size=2,
12731
+ splitting_factor=1.5,
12732
+ tightness_threshold=0.15
12733
+ ):
12734
+
12735
+ comp_seq = equalize_closest_elements_dynamic(seq,
12736
+ min_val,
12737
+ max_val,
12738
+ splitting_factor=splitting_factor,
12739
+ tightness_threshold=tightness_threshold
12740
+ )
12741
+
12742
+ seq_split = sorted(chunk_list(comp_seq, group_size), key=lambda x: (-x[0], -x[1]))
12743
+
12744
+ seq_grouped = [[[k]] + [vv[1:] for vv in v] for k, v in groupby(seq_split, key=lambda x: x[0])]
12745
+
12746
+ return flatten(flatten(sorted(seq_grouped, key=lambda x: -x[1][0])))
12747
+
12748
+ ###################################################################################
12749
+
12750
+ def merge_adjacent_pairs(values_counts):
12751
+
12752
+ merged = []
12753
+ i = 0
12754
+
12755
+ while i < len(values_counts):
12756
+
12757
+ if i < len(values_counts) - 1:
12758
+ value1, count1 = values_counts[i]
12759
+ value2, count2 = values_counts[i + 1]
12760
+
12761
+ if value2 - value1 == 1:
12762
+ if count2 > count1:
12763
+ merged_value = value2
12764
+
12765
+ else:
12766
+ merged_value = value1
12767
+
12768
+ merged_count = count1 + count2
12769
+ merged.append((merged_value, merged_count))
12770
+
12771
+ i += 2
12772
+
12773
+ continue
12774
+
12775
+ merged.append(values_counts[i])
12776
+
12777
+ i += 1
12778
+
12779
+ return merged
12780
+
12781
+ ###################################################################################
12782
+
12783
+ def merge_escore_notes_start_times(escore_notes, num_merges=1):
12784
+
12785
+ new_dscore = delta_score_notes(escore_notes)
12786
+
12787
+ times = [e[1] for e in new_dscore if e[1] != 0]
12788
+ times_counts = sorted(Counter(times).most_common())
12789
+
12790
+ prev_counts = []
12791
+ new_times_counts = times_counts
12792
+
12793
+ mcount = 0
12794
+
12795
+ while prev_counts != new_times_counts:
12796
+ prev_counts = new_times_counts
12797
+ new_times_counts = merge_adjacent_pairs(new_times_counts)
12798
+
12799
+ mcount += 1
12800
+
12801
+ if mcount == num_merges:
12802
+ break
12803
+
12804
+ gtimes = [r[0] for r in new_times_counts]
12805
+
12806
+ for e in new_dscore:
12807
+ if e[1] > 0:
12808
+ e[1] = find_closest_value(gtimes, e[1])[0]
12809
+ e[2] -= num_merges
12810
+
12811
+ return delta_score_to_abs_score(new_dscore)
12812
+
12813
+ ###################################################################################
12814
+
12815
+ def multi_instrumental_escore_notes_tokenized(escore_notes, compress_seq=False):
12816
+
12817
+ melody_chords = []
12818
+
12819
+ pe = escore_notes[0]
12820
+
12821
+ for i, e in enumerate(escore_notes):
12822
+
12823
+ dtime = max(0, min(255, e[1]-pe[1]))
12824
+
12825
+ dur = max(0, min(255, e[2]))
12826
+
12827
+ cha = max(0, min(15, e[3]))
12828
+
12829
+ if cha == 9:
12830
+ pat = 128
12831
+
12832
+ else:
12833
+ pat = max(0, min(127, e[6]))
12834
+
12835
+ ptc = max(0, min(127, e[4]))
12836
+
12837
+ vel = max(8, min(127, e[5]))
12838
+ velocity = round(vel / 15)-1
12839
+
12840
+ dur_vel = (8 * dur) + velocity
12841
+ pat_ptc = (129 * pat) + ptc
12842
+
12843
+ if compress_seq:
12844
+ if dtime != 0 or i == 0:
12845
+ melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
12846
+
12847
+ else:
12848
+ melody_chords.extend([dur_vel+256, pat_ptc+2304])
12849
+
12850
+ else:
12851
+ melody_chords.extend([dtime, dur_vel+256, pat_ptc+2304])
12852
+
12853
+ pe = e
12854
+
12855
+ return melody_chords
12856
+
12857
+ ###################################################################################
12858
+
12859
+ def merge_counts(data, return_lists=True):
12860
+
12861
+ merged = defaultdict(int)
12862
+
12863
+ for value, count in data:
12864
+ merged[value] += count
12865
+
12866
+ if return_lists:
12867
+ return [[k, v] for k, v in merged.items()]
12868
+
12869
+ else:
12870
+ return list(merged.items())
12871
+
12872
+ ###################################################################################
12873
+
12874
+ def convert_escore_notes_pitches_chords_signature(signature, convert_to_full_chords=True):
12875
+
12876
+ if convert_to_full_chords:
12877
+ SRC_CHORDS = ALL_CHORDS_SORTED
12878
+ TRG_CHORDS = ALL_CHORDS_FULL
12879
+
12880
+ else:
12881
+ SRC_CHORDS = ALL_CHORDS_FULL
12882
+ TRG_CHORDS = ALL_CHORDS_SORTED
12883
+
12884
+ cdiff = len(TRG_CHORDS) - len(SRC_CHORDS)
12885
+
12886
+ pitches_counts = [c for c in signature if -1 < c[0] < 128]
12887
+ chords_counts = [c for c in signature if 127 < c[0] < len(SRC_CHORDS)+128]
12888
+ drums_counts = [[c[0]+cdiff, c[1]] for c in signature if len(SRC_CHORDS)+127 < c[0] < len(SRC_CHORDS)+256]
12889
+ bad_chords_count = [c for c in signature if c[0] == -1]
12890
+
12891
+ new_chords_counts = []
12892
+
12893
+ for c in chords_counts:
12894
+ tones_chord = SRC_CHORDS[c[0]-128]
12895
+
12896
+ if tones_chord not in TRG_CHORDS:
12897
+ tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=convert_to_full_chords)
12898
+ bad_chords_count[0][1] += 1
12899
+
12900
+ new_chords_counts.append([TRG_CHORDS.index(tones_chord)+128, c[1]])
12901
+
12902
+ return pitches_counts + merge_counts(new_chords_counts) + drums_counts + bad_chords_count
12903
+
12904
+ ###################################################################################
12905
+
12906
+ def convert_bytes_in_nested_list(lst, encoding='utf-8', errors='ignore'):
12907
+
12908
+ new_list = []
12909
+
12910
+ for item in lst:
12911
+ if isinstance(item, list):
12912
+ new_list.append(convert_bytes_in_nested_list(item))
12913
+
12914
+ elif isinstance(item, bytes):
12915
+ new_list.append(item.decode(encoding, errors=errors))
12916
+
12917
+ else:
12918
+ new_list.append(item)
12919
+
12920
+ return new_list
12921
+
12922
+ ###################################################################################
12923
+
12924
+ def mult_pitches(pitches, min_oct=4, max_oct=6):
12925
+
12926
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12927
+
12928
+ mult_ptcs = []
12929
+
12930
+ for t in tones_chord:
12931
+ for i in range(min_oct, max_oct):
12932
+ mult_ptcs.append((i*12)+t)
12933
+
12934
+ return mult_ptcs
12935
+
12936
+ ###################################################################################
12937
+
12938
+ def find_next(pitches, cur_ptc):
12939
+
12940
+ i = 0
12941
+
12942
+ for i, p in enumerate(pitches):
12943
+ if p != cur_ptc:
12944
+ break
12945
+
12946
+ return i
12947
+
12948
+ ###################################################################################
12949
+
12950
+ def ordered_groups_unsorted(data, key_index):
12951
+
12952
+ def keyfunc(sublist):
12953
+ return sublist[key_index]
12954
+
12955
+ groups = []
12956
+
12957
+ for key, group in groupby(data, key=keyfunc):
12958
+ groups.append((key, list(group)))
12959
+
12960
+ return groups
12961
+
12962
+ ###################################################################################
12963
+
12964
+ def ordered_groups(data, key_index):
12965
+
12966
+ groups = OrderedDict()
12967
+
12968
+ for sublist in data:
12969
+ key = sublist[key_index]
12970
+
12971
+ if key not in groups:
12972
+ groups[key] = []
12973
+
12974
+ groups[key].append(sublist)
12975
+
12976
+ return list(groups.items())
12977
+
12978
+ ###################################################################################
12979
+
12980
+ def merge_melody_notes(escore_notes, pitches_idx=4, max_dur=255, last_dur=128):
12981
+
12982
+ groups = ordered_groups_unsorted(escore_notes, pitches_idx)
12983
+
12984
+ merged_melody_notes = []
12985
+
12986
+ for i, (k, g) in enumerate(groups[:-1]):
12987
+
12988
+ if len(g) == 1:
12989
+ merged_melody_notes.extend(g)
12990
+
12991
+ else:
12992
+ dur = min(max_dur, groups[i+1][1][0][1] - g[0][1])
12993
+
12994
+ merged_melody_notes.append(['note',
12995
+ g[0][1],
12996
+ dur,
12997
+ g[0][3],
12998
+ g[0][4],
12999
+ g[0][5],
13000
+ g[0][6]
13001
+ ])
13002
+
13003
+ merged_melody_notes.append(['note',
13004
+ groups[-1][1][0][1],
13005
+ last_dur,
13006
+ groups[-1][1][0][3],
13007
+ groups[-1][1][0][4],
13008
+ groups[-1][1][0][5],
13009
+ groups[-1][1][0][6]
13010
+ ])
13011
+
13012
+ return merged_melody_notes
13013
+
13014
+ ###################################################################################
13015
+
13016
+ def add_expressive_melody_to_enhanced_score_notes(escore_notes,
13017
+ melody_start_chord=0,
13018
+ melody_prime_pitch=60,
13019
+ melody_step=1,
13020
+ melody_channel=3,
13021
+ melody_patch=40,
13022
+ melody_notes_max_duration=255,
13023
+ melody_last_note_dur=128,
13024
+ melody_clip_max_min_durs=[],
13025
+ melody_max_velocity=120,
13026
+ acc_max_velocity=90,
13027
+ return_melody=False
13028
+ ):
13029
+
13030
+
13031
+ score = copy.deepcopy(escore_notes)
13032
+
13033
+ adjust_score_velocities(score, acc_max_velocity)
13034
+
13035
+ cscore = chordify_score([1000, score])
13036
+
13037
+ melody_pitches = [melody_prime_pitch]
13038
+
13039
+ for i, c in enumerate(cscore[melody_start_chord:]):
13040
+
13041
+ if i % melody_step == 0:
13042
+
13043
+ pitches = [e[4] for e in c if e[3] != 9]
13044
+
13045
+ if pitches:
13046
+ cptc = find_closest_value(mult_pitches(pitches), melody_pitches[-1])[0]
13047
+ melody_pitches.append(cptc)
13048
+
13049
+ song_f = []
13050
+ mel_f = []
13051
+
13052
+ idx = 1
13053
+
13054
+ for i, c in enumerate(cscore[:-melody_step]):
13055
+ pitches = [e[4] for e in c if e[3] != 9]
13056
+
13057
+ if pitches and i >= melody_start_chord and i % melody_step == 0:
13058
+ dur = min(cscore[i+melody_step][0][1] - c[0][1], melody_notes_max_duration)
13059
+
13060
+ mel_f.append(['note',
13061
+ c[0][1],
13062
+ dur,
13063
+ melody_channel,
13064
+ 60+(melody_pitches[idx] % 24),
13065
+ 100 + ((melody_pitches[idx] % 12) * 2),
13066
+ melody_patch
13067
+ ])
13068
+ idx += 1
13069
+
13070
+ song_f.extend(c)
13071
+
13072
+ song_f.extend(flatten(cscore[-melody_step:]))
13073
+
13074
+ if len(melody_clip_max_min_durs) == 2:
13075
+ for e in mel_f:
13076
+ if e[2] >= melody_clip_max_min_durs[0]:
13077
+ e[2] = melody_clip_max_min_durs[1]
13078
+
13079
+ adjust_score_velocities(mel_f, melody_max_velocity)
13080
+
13081
+ merged_melody_notes = merge_melody_notes(mel_f,
13082
+ max_dur=melody_notes_max_duration,
13083
+ last_dur=melody_last_note_dur
13084
+ )
13085
+
13086
+ song_f = sorted(merged_melody_notes + song_f,
13087
+ key=lambda x: x[1]
13088
+ )
13089
+
13090
+ if return_melody:
13091
+ return mel_f
13092
+
13093
+ else:
13094
+ return song_f
13095
+
13096
+ ###################################################################################
13097
+
13098
+ def list_md5_hash(ints_list):
13099
+
13100
+ arr = array('H', ints_list)
13101
+ binary_data = arr.tobytes()
13102
+
13103
+ return hashlib.md5(binary_data).hexdigest()
13104
+
13105
+ ###################################################################################
13106
+
13107
+ def fix_escore_notes_durations(escore_notes,
13108
+ min_notes_gap=1,
13109
+ min_notes_dur=1,
13110
+ times_idx=1,
13111
+ durs_idx=2,
13112
+ channels_idx = 3,
13113
+ pitches_idx=4
13114
+ ):
13115
+
13116
+ notes = [e for e in escore_notes if e[channels_idx] != 9]
13117
+ drums = [e for e in escore_notes if e[channels_idx] == 9]
13118
+
13119
+ escore_groups = ordered_groups(notes, pitches_idx)
13120
+
13121
+ merged_score = []
13122
+
13123
+ for k, g in escore_groups:
13124
+ if len(g) > 2:
13125
+ fg = fix_monophonic_score_durations(g,
13126
+ min_notes_gap=min_notes_gap,
13127
+ min_notes_dur=min_notes_dur
13128
+ )
13129
+ merged_score.extend(fg)
13130
+
13131
+ elif len(g) == 2:
13132
+
13133
+ if g[0][times_idx]+g[0][durs_idx] >= g[1][times_idx]:
13134
+ g[0][durs_idx] = max(1, g[1][times_idx] - g[0][times_idx] - 1)
13135
+
13136
+ merged_score.extend(g)
13137
+
13138
+ else:
13139
+ merged_score.extend(g)
13140
+
13141
+ return sorted(merged_score + drums, key=lambda x: x[times_idx])
13142
+
13143
+ ###################################################################################
13144
+
13145
+ print('Module loaded!')
13146
+ print('=' * 70)
13147
+ print('Enjoy! :)')
13148
+ print('=' * 70)
13149
+
13150
+ ###################################################################################
13151
+ # This is the end of the TMIDI X Python module
13152
  ###################################################################################