Spaces:
Running
on
Zero
Running
on
Zero
PRamoneda
commited on
Commit
·
c66e52a
0
Parent(s):
Initial commit for Hugging Face Space
Browse files- .idea/.gitignore +8 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/interface-audio-difficulty.iml +7 -0
- .idea/misc.xml +7 -0
- .idea/vcs.xml +7 -0
- README.md +9 -0
- __pycache__/get_difficulty.cpython-310.pyc +0 -0
- __pycache__/get_difficulty.cpython-312.pyc +0 -0
- __pycache__/model.cpython-310.pyc +0 -0
- __pycache__/model.cpython-312.pyc +0 -0
- __pycache__/model.cpython-38.pyc +0 -0
- __pycache__/utils.cpython-310.pyc +0 -0
- __pycache__/utils.cpython-312.pyc +0 -0
- __pycache__/utils.cpython-38.pyc +0 -0
- app.py +39 -0
- app.txt +1 -0
- clean.py +15 -0
- get_difficulty.py +115 -0
- poetry.lock +0 -0
- pyproject.toml +23 -0
- requirements.txt +106 -0
- utils.py +37 -0
.idea/.gitignore
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Default ignored files
|
2 |
+
/shelf/
|
3 |
+
/workspace.xml
|
4 |
+
# Editor-based HTTP Client requests
|
5 |
+
/httpRequests/
|
6 |
+
# Datasource local storage ignored files
|
7 |
+
/dataSources/
|
8 |
+
/dataSources.local.xml
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/interface-audio-difficulty.iml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module version="4">
|
3 |
+
<component name="PyDocumentationSettings">
|
4 |
+
<option name="format" value="PLAIN" />
|
5 |
+
<option name="myDocStringFormat" value="Plain" />
|
6 |
+
</component>
|
7 |
+
</module>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="Black">
|
4 |
+
<option name="sdkName" value="Poetry (interface-audio-difficulty)" />
|
5 |
+
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Poetry (interface-audio-difficulty) (2)" project-jdk-type="Python SDK" />
|
7 |
+
</project>
|
.idea/vcs.xml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="VcsDirectoryMappings">
|
4 |
+
<mapping directory="" vcs="Git" />
|
5 |
+
<mapping directory="$PROJECT_DIR$/temp-space" vcs="Git" />
|
6 |
+
</component>
|
7 |
+
</project>
|
README.md
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Music Difficulty Estimator 🎹
|
2 |
+
|
3 |
+
Upload an MP3, MP4, or YouTube link. The app extracts audio, predicts piano score difficulty, and generates a MIDI file.
|
4 |
+
|
5 |
+
- Supports video/audio inputs
|
6 |
+
- Uses Gradio and ffmpeg-python
|
7 |
+
- Fully Python-based, no system-level ffmpeg required for conversion
|
8 |
+
|
9 |
+
Built with ❤️ using Poetry + Gradio.
|
__pycache__/get_difficulty.cpython-310.pyc
ADDED
Binary file (3.75 kB). View file
|
|
__pycache__/get_difficulty.cpython-312.pyc
ADDED
Binary file (6.19 kB). View file
|
|
__pycache__/model.cpython-310.pyc
ADDED
Binary file (10.8 kB). View file
|
|
__pycache__/model.cpython-312.pyc
ADDED
Binary file (20.5 kB). View file
|
|
__pycache__/model.cpython-38.pyc
ADDED
Binary file (10.7 kB). View file
|
|
__pycache__/utils.cpython-310.pyc
ADDED
Binary file (1.29 kB). View file
|
|
__pycache__/utils.cpython-312.pyc
ADDED
Binary file (1.94 kB). View file
|
|
__pycache__/utils.cpython-38.pyc
ADDED
Binary file (1.26 kB). View file
|
|
app.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from huggingface_hub import hf_hub_download
|
2 |
+
import torch
|
3 |
+
import os
|
4 |
+
|
5 |
+
REPO_ID = "pramoneda/audio"
|
6 |
+
CACHE_BASE = "models"
|
7 |
+
|
8 |
+
|
9 |
+
def download_model_checkpoint(model_name: str, checkpoint_id: int):
|
10 |
+
filename = f"{model_name}/checkpoint_{checkpoint_id}_clean.pth"
|
11 |
+
cache_dir = os.path.join(CACHE_BASE, model_name)
|
12 |
+
|
13 |
+
print(f"Downloading {filename} from {REPO_ID} to {cache_dir}")
|
14 |
+
|
15 |
+
path = hf_hub_download(
|
16 |
+
repo_id=REPO_ID,
|
17 |
+
filename=filename,
|
18 |
+
cache_dir=cache_dir
|
19 |
+
)
|
20 |
+
|
21 |
+
state_dict = torch.load(path, map_location="cpu")
|
22 |
+
return state_dict
|
23 |
+
|
24 |
+
|
25 |
+
def ensure_local_checkpoints():
|
26 |
+
models = {
|
27 |
+
"audio_midi_cqt5_ps_v5": 0,
|
28 |
+
"audio_midi_pianoroll_ps_5_v4": 0,
|
29 |
+
"audio_midi_multi_ps_v5": 0
|
30 |
+
}
|
31 |
+
for model_name, checkpoint_id in models.items():
|
32 |
+
try:
|
33 |
+
_ = download_model_checkpoint(model_name, checkpoint_id)
|
34 |
+
except Exception as e:
|
35 |
+
print(f"❌ Failed to download {model_name}: {e}")
|
36 |
+
|
37 |
+
|
38 |
+
if __name__ == "__main__":
|
39 |
+
ensure_local_checkpoints()
|
app.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
clean.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
def delete_clean_checkpoints(root_dir="models"):
|
4 |
+
deleted = 0
|
5 |
+
for dirpath, _, filenames in os.walk(root_dir):
|
6 |
+
for fname in filenames:
|
7 |
+
if fname.endswith("_clean.pth"):
|
8 |
+
file_path = os.path.join(dirpath, fname)
|
9 |
+
print(f"🗑️ Deleting: {file_path}")
|
10 |
+
os.remove(file_path)
|
11 |
+
deleted += 1
|
12 |
+
print(f"\n✅ Deleted {deleted} clean checkpoint(s) from '{root_dir}'")
|
13 |
+
|
14 |
+
if __name__ == "__main__":
|
15 |
+
delete_clean_checkpoints("models")
|
get_difficulty.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pdb
|
3 |
+
from statistics import mean
|
4 |
+
|
5 |
+
import torch
|
6 |
+
from torch import nn
|
7 |
+
import numpy as np
|
8 |
+
import librosa
|
9 |
+
from piano_transcription_inference import PianoTranscription, sample_rate, load_audio
|
10 |
+
import pretty_midi
|
11 |
+
from utils import prediction2label
|
12 |
+
from model import AudioModel
|
13 |
+
from scipy.signal import resample
|
14 |
+
|
15 |
+
|
16 |
+
def downsample_log_cqt(cqt_matrix, target_fs=5):
|
17 |
+
original_fs = 44100 / 160
|
18 |
+
ratio = original_fs / target_fs
|
19 |
+
downsampled = resample(cqt_matrix, int(cqt_matrix.shape[0] / ratio), axis=0)
|
20 |
+
return downsampled
|
21 |
+
|
22 |
+
def downsample_matrix(mat, original_fs, target_fs):
|
23 |
+
ratio = original_fs / target_fs
|
24 |
+
return resample(mat, int(mat.shape[0] / ratio), axis=0)
|
25 |
+
|
26 |
+
def get_cqt_from_mp3(mp3_path):
|
27 |
+
sample_rate = 44100
|
28 |
+
hop_length = 160
|
29 |
+
y, sr = librosa.load(mp3_path, sr=sample_rate, mono=True)
|
30 |
+
cqt = librosa.cqt(y, sr=sr, hop_length=hop_length, n_bins=88, bins_per_octave=12)
|
31 |
+
log_cqt = librosa.amplitude_to_db(np.abs(cqt))
|
32 |
+
log_cqt = log_cqt.T # shape (T, 88)
|
33 |
+
log_cqt = downsample_log_cqt(log_cqt, target_fs=5)
|
34 |
+
cqt_tensor = torch.tensor(log_cqt, dtype=torch.float32).unsqueeze(0).unsqueeze(0).cuda()
|
35 |
+
# pdb.set_trace()
|
36 |
+
print(f"cqt shape: {log_cqt.shape}")
|
37 |
+
return cqt_tensor
|
38 |
+
|
39 |
+
def get_pianoroll_from_mp3(mp3_path):
|
40 |
+
audio, _ = load_audio(mp3_path, sr=sample_rate, mono=True)
|
41 |
+
transcriptor = PianoTranscription(device='cuda')
|
42 |
+
midi_path = "temp.mid"
|
43 |
+
transcriptor.transcribe(audio, midi_path)
|
44 |
+
midi_data = pretty_midi.PrettyMIDI(midi_path)
|
45 |
+
|
46 |
+
# Create pianoroll and onset matrix
|
47 |
+
fs = 5 # original frames per second
|
48 |
+
piano_roll = midi_data.get_piano_roll(fs=fs)[21:109].T # shape: (T, 88)
|
49 |
+
piano_roll = piano_roll / 127
|
50 |
+
time_steps = piano_roll.shape[0]
|
51 |
+
|
52 |
+
onsets = np.zeros_like(piano_roll)
|
53 |
+
for instrument in midi_data.instruments:
|
54 |
+
for note in instrument.notes:
|
55 |
+
pitch = note.pitch - 21
|
56 |
+
onset_frame = int(note.start * fs)
|
57 |
+
if 0 <= pitch < 88 and onset_frame < time_steps:
|
58 |
+
onsets[onset_frame, pitch] = 1.0
|
59 |
+
|
60 |
+
pr_tensor = torch.tensor(piano_roll.T).unsqueeze(0).unsqueeze(1).cuda().float()
|
61 |
+
on_tensor = torch.tensor(onsets.T).unsqueeze(0).unsqueeze(1).cuda().float()
|
62 |
+
out_tensor = torch.cat([pr_tensor, on_tensor], dim=1)
|
63 |
+
print(f"piano_roll shape: {out_tensor.shape}")
|
64 |
+
return out_tensor.transpose(2, 3)
|
65 |
+
|
66 |
+
def predict_difficulty(mp3_path, model_name, rep):
|
67 |
+
if "only_cqt" in rep:
|
68 |
+
only_cqt, only_pr = True, False
|
69 |
+
rep_clean = "multimodal5"
|
70 |
+
elif "only_pr" in rep:
|
71 |
+
only_cqt, only_pr = False, True
|
72 |
+
rep_clean = "multimodal5"
|
73 |
+
else:
|
74 |
+
only_cqt = only_pr = False
|
75 |
+
rep_clean = rep
|
76 |
+
|
77 |
+
model = AudioModel(num_classes=11, rep=rep_clean, modality_dropout=False, only_cqt=only_cqt, only_pr=only_pr)
|
78 |
+
checkpoint = [torch.load(f"models/{model_name}/checkpoint_{i}.pth", map_location="cuda", weights_only=False)
|
79 |
+
for i in range(5)]
|
80 |
+
|
81 |
+
|
82 |
+
if rep == "cqt5":
|
83 |
+
inp_data = get_cqt_from_mp3(mp3_path)
|
84 |
+
elif rep == "pianoroll5":
|
85 |
+
inp_data = get_pianoroll_from_mp3(mp3_path)
|
86 |
+
elif rep_clean == "multimodal5":
|
87 |
+
x1 = get_pianoroll_from_mp3(mp3_path)
|
88 |
+
x2 = get_cqt_from_mp3(mp3_path)
|
89 |
+
inp_data = [x1, x2]
|
90 |
+
else:
|
91 |
+
raise ValueError(f"Representation {rep} not supported")
|
92 |
+
|
93 |
+
preds = []
|
94 |
+
for cheks in checkpoint:
|
95 |
+
model.load_state_dict(cheks["model_state_dict"])
|
96 |
+
model = model.cuda().eval()
|
97 |
+
with torch.inference_mode():
|
98 |
+
logits = model(inp_data, None)
|
99 |
+
pred = prediction2label(logits).item()
|
100 |
+
preds.append(pred)
|
101 |
+
|
102 |
+
return mean(preds)
|
103 |
+
# return preds
|
104 |
+
|
105 |
+
if __name__ == "__main__":
|
106 |
+
mp3_path = "yt_audio.mp3"
|
107 |
+
model_name = ""
|
108 |
+
# pred_cqt = predict_difficulty(mp3_path, model_name="audio_midi_cqt5_ps_v5", rep="cqt5")
|
109 |
+
# print(f"Predicción dificultad CQT: {pred_cqt}")
|
110 |
+
|
111 |
+
# pred_pr = predict_difficulty(mp3_path, model_name="audio_midi_pianoroll_ps_5_v4", rep="pianoroll5")
|
112 |
+
# print(f"Predicción dificultad PR: {pred_pr}")
|
113 |
+
|
114 |
+
pred_multi = predict_difficulty(mp3_path, model_name="audio_midi_multi_ps_v5", rep="multimodal5")
|
115 |
+
print(f"Predicción dificultad multimodal: {pred_multi}")
|
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
name = "interface-audio-difficulty"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = ""
|
5 |
+
authors = ["PRamoneda <PRamoneda@github.com>"]
|
6 |
+
readme = "README.md"
|
7 |
+
|
8 |
+
[tool.poetry.dependencies]
|
9 |
+
python = "^3.10"
|
10 |
+
gradio = "^5.29.0"
|
11 |
+
pydub = "^0.25.1"
|
12 |
+
yt-dlp = "^2025.4.30"
|
13 |
+
librosa = "0.9.2"
|
14 |
+
pretty_midi = "^0.2.10"
|
15 |
+
ffmpeg-python = "^0.2.0"
|
16 |
+
scipy = "^1.13.0"
|
17 |
+
torch = "^2.2.0"
|
18 |
+
piano-transcription-inference = "^0.0.6"
|
19 |
+
seaborn = "^0.13.2"
|
20 |
+
|
21 |
+
[build-system]
|
22 |
+
requires = ["poetry-core"]
|
23 |
+
build-backend = "poetry.core.masonry.api"
|
requirements.txt
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==24.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
2 |
+
annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
3 |
+
anyio==4.9.0 ; python_version >= "3.10" and python_version < "4.0"
|
4 |
+
audioop-lts==0.2.1 ; python_version >= "3.13" and python_version < "4.0"
|
5 |
+
audioread==3.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
6 |
+
certifi==2025.4.26 ; python_version >= "3.10" and python_version < "4.0"
|
7 |
+
cffi==1.17.1 ; python_version >= "3.10" and python_version < "4.0"
|
8 |
+
charset-normalizer==3.4.2 ; python_version >= "3.10" and python_version < "4.0"
|
9 |
+
click==8.2.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
10 |
+
colorama==0.4.6 ; python_version >= "3.10" and python_version < "4.0" and platform_system == "Windows"
|
11 |
+
contourpy==1.3.2 ; python_version >= "3.10" and python_version < "4.0"
|
12 |
+
cycler==0.12.1 ; python_version >= "3.10" and python_version < "4.0"
|
13 |
+
decorator==5.2.1 ; python_version >= "3.10" and python_version < "4.0"
|
14 |
+
exceptiongroup==1.3.0 ; python_version >= "3.10" and python_version < "3.11"
|
15 |
+
fastapi==0.115.12 ; python_version >= "3.10" and python_version < "4.0"
|
16 |
+
ffmpeg-python==0.2.0 ; python_version >= "3.10" and python_version < "4.0"
|
17 |
+
ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
18 |
+
filelock==3.18.0 ; python_version >= "3.10" and python_version < "4.0"
|
19 |
+
fonttools==4.58.0 ; python_version >= "3.10" and python_version < "4.0"
|
20 |
+
fsspec==2025.3.2 ; python_version >= "3.10" and python_version < "4.0"
|
21 |
+
future==1.0.0 ; python_version >= "3.10" and python_version < "4.0"
|
22 |
+
gradio-client==1.10.1 ; python_version >= "3.10" and python_version < "4.0"
|
23 |
+
gradio==5.29.1 ; python_version >= "3.10" and python_version < "4.0"
|
24 |
+
groovy==0.1.2 ; python_version >= "3.10" and python_version < "4.0"
|
25 |
+
h11==0.16.0 ; python_version >= "3.10" and python_version < "4.0"
|
26 |
+
httpcore==1.0.9 ; python_version >= "3.10" and python_version < "4.0"
|
27 |
+
httpx==0.28.1 ; python_version >= "3.10" and python_version < "4.0"
|
28 |
+
huggingface-hub==0.31.2 ; python_version >= "3.10" and python_version < "4.0"
|
29 |
+
idna==3.10 ; python_version >= "3.10" and python_version < "4.0"
|
30 |
+
jinja2==3.1.6 ; python_version >= "3.10" and python_version < "4.0"
|
31 |
+
joblib==1.5.0 ; python_version >= "3.10" and python_version < "4.0"
|
32 |
+
kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "4.0"
|
33 |
+
librosa==0.9.2 ; python_version >= "3.10" and python_version < "4.0"
|
34 |
+
llvmlite==0.44.0 ; python_version >= "3.10" and python_version < "4.0"
|
35 |
+
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
36 |
+
markupsafe==3.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
37 |
+
matplotlib==3.10.3 ; python_version >= "3.10" and python_version < "4.0"
|
38 |
+
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
39 |
+
mido==1.3.3 ; python_version >= "3.10" and python_version < "4.0"
|
40 |
+
mpmath==1.3.0 ; python_version >= "3.10" and python_version < "4.0"
|
41 |
+
networkx==3.4.2 ; python_version >= "3.10" and python_version < "4.0"
|
42 |
+
numba==0.61.2 ; python_version >= "3.10" and python_version < "4.0"
|
43 |
+
numpy==2.2.5 ; python_version >= "3.10" and python_version < "4.0"
|
44 |
+
nvidia-cublas-cu12==12.6.4.1 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
45 |
+
nvidia-cuda-cupti-cu12==12.6.80 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
46 |
+
nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
47 |
+
nvidia-cuda-runtime-cu12==12.6.77 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
48 |
+
nvidia-cudnn-cu12==9.5.1.17 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
49 |
+
nvidia-cufft-cu12==11.3.0.4 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
50 |
+
nvidia-cufile-cu12==1.11.1.6 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
51 |
+
nvidia-curand-cu12==10.3.7.77 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
52 |
+
nvidia-cusolver-cu12==11.7.1.2 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
53 |
+
nvidia-cusparse-cu12==12.5.4.2 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
54 |
+
nvidia-cusparselt-cu12==0.6.3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
55 |
+
nvidia-nccl-cu12==2.26.2 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
56 |
+
nvidia-nvjitlink-cu12==12.6.85 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
57 |
+
nvidia-nvtx-cu12==12.6.77 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
58 |
+
orjson==3.10.18 ; python_version >= "3.10" and python_version < "4.0"
|
59 |
+
packaging==25.0 ; python_version >= "3.10" and python_version < "4.0"
|
60 |
+
pandas==2.2.3 ; python_version >= "3.10" and python_version < "4.0"
|
61 |
+
piano-transcription-inference==0.0.6 ; python_version >= "3.10" and python_version < "4.0"
|
62 |
+
pillow==11.2.1 ; python_version >= "3.10" and python_version < "4.0"
|
63 |
+
platformdirs==4.3.8 ; python_version >= "3.10" and python_version < "4.0"
|
64 |
+
pooch==1.8.2 ; python_version >= "3.10" and python_version < "4.0"
|
65 |
+
pretty-midi==0.2.10 ; python_version >= "3.10" and python_version < "4.0"
|
66 |
+
pycparser==2.22 ; python_version >= "3.10" and python_version < "4.0"
|
67 |
+
pydantic-core==2.33.2 ; python_version >= "3.10" and python_version < "4.0"
|
68 |
+
pydantic==2.11.4 ; python_version >= "3.10" and python_version < "4.0"
|
69 |
+
pydub==0.25.1 ; python_version >= "3.10" and python_version < "4.0"
|
70 |
+
pygments==2.19.1 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
71 |
+
pyparsing==3.2.3 ; python_version >= "3.10" and python_version < "4.0"
|
72 |
+
python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "4.0"
|
73 |
+
python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "4.0"
|
74 |
+
pytz==2025.2 ; python_version >= "3.10" and python_version < "4.0"
|
75 |
+
pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "4.0"
|
76 |
+
requests==2.32.3 ; python_version >= "3.10" and python_version < "4.0"
|
77 |
+
resampy==0.4.3 ; python_version >= "3.10" and python_version < "4.0"
|
78 |
+
rich==14.0.0 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
79 |
+
ruff==0.11.10 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
80 |
+
safehttpx==0.1.6 ; python_version >= "3.10" and python_version < "4.0"
|
81 |
+
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "4.0"
|
82 |
+
scipy==1.15.3 ; python_version >= "3.10" and python_version < "4.0"
|
83 |
+
seaborn==0.13.2 ; python_version >= "3.10" and python_version < "4.0"
|
84 |
+
semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "4.0"
|
85 |
+
setuptools==80.7.1 ; python_version >= "3.12" and python_version < "4.0" or platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
86 |
+
shellingham==1.5.4 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
87 |
+
six==1.17.0 ; python_version >= "3.10" and python_version < "4.0"
|
88 |
+
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "4.0"
|
89 |
+
soundfile==0.13.1 ; python_version >= "3.10" and python_version < "4.0"
|
90 |
+
starlette==0.46.2 ; python_version >= "3.10" and python_version < "4.0"
|
91 |
+
sympy==1.14.0 ; python_version >= "3.10" and python_version < "4.0"
|
92 |
+
threadpoolctl==3.6.0 ; python_version >= "3.10" and python_version < "4.0"
|
93 |
+
tomlkit==0.13.2 ; python_version >= "3.10" and python_version < "4.0"
|
94 |
+
torch==2.7.0 ; python_version >= "3.10" and python_version < "4.0"
|
95 |
+
torchlibrosa==0.1.0 ; python_version >= "3.10" and python_version < "4.0"
|
96 |
+
tqdm==4.67.1 ; python_version >= "3.10" and python_version < "4.0"
|
97 |
+
triton==3.3.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.10" and python_version < "4.0"
|
98 |
+
typer==0.15.3 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
99 |
+
typing-extensions==4.13.2 ; python_version >= "3.10" and python_version < "4.0"
|
100 |
+
typing-inspection==0.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
101 |
+
tzdata==2025.2 ; python_version >= "3.10" and python_version < "4.0"
|
102 |
+
urllib3==2.4.0 ; python_version >= "3.10" and python_version < "4.0"
|
103 |
+
uvicorn==0.34.2 ; python_version >= "3.10" and python_version < "4.0" and sys_platform != "emscripten"
|
104 |
+
websockets==15.0.1 ; python_version >= "3.10" and python_version < "4.0"
|
105 |
+
yt-dlp==2025.4.30 ; python_version >= "3.10" and python_version < "4.0"
|
106 |
+
huggingface_hub
|
utils.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import pickle
|
3 |
+
|
4 |
+
|
5 |
+
def save_json(dictionary, name_file):
|
6 |
+
with open(name_file, 'w') as fp:
|
7 |
+
json.dump(dictionary, fp, sort_keys=True, indent=4)
|
8 |
+
|
9 |
+
|
10 |
+
def prediction2label(pred):
|
11 |
+
"""Convert ordinal predictions to class labels, e.g.
|
12 |
+
|
13 |
+
[0.9, 0.1, 0.1, 0.1] -> 0
|
14 |
+
[0.9, 0.9, 0.1, 0.1] -> 1
|
15 |
+
[0.9, 0.9, 0.9, 0.1] -> 2
|
16 |
+
etc.
|
17 |
+
"""
|
18 |
+
return (pred > 0.5).cumprod(axis=1).sum(axis=1) - 1
|
19 |
+
|
20 |
+
|
21 |
+
def load_json(name_file):
|
22 |
+
data = None
|
23 |
+
with open(name_file, 'r') as fp:
|
24 |
+
data = json.load(fp)
|
25 |
+
return data
|
26 |
+
|
27 |
+
|
28 |
+
def save_binary(dictionary, name_file):
|
29 |
+
with open(name_file, 'wb') as fp:
|
30 |
+
pickle.dump(dictionary, fp, protocol=pickle.HIGHEST_PROTOCOL)
|
31 |
+
|
32 |
+
|
33 |
+
def load_binary(name_file):
|
34 |
+
data = None
|
35 |
+
with open(name_file, 'rb') as fp:
|
36 |
+
data = pickle.load(fp)
|
37 |
+
return data
|