Spaces:
Sleeping
Sleeping
Commit
·
5332e66
1
Parent(s):
d8a0f82
update libs
Browse files- README.md +3 -1
- app.py +20 -17
- pyproject.toml +4 -0
- requirements.txt +2 -2
- utils.py +48 -2
README.md
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
---
|
| 2 |
title: Pop2Piano Demo
|
| 3 |
emoji: 🎹
|
|
|
|
|
|
|
| 4 |
sdk: gradio
|
| 5 |
-
sdk_version:
|
| 6 |
app_file: app.py
|
| 7 |
pinned: true
|
| 8 |
---
|
|
|
|
| 1 |
---
|
| 2 |
title: Pop2Piano Demo
|
| 3 |
emoji: 🎹
|
| 4 |
+
python_version: 3.10
|
| 5 |
+
models: ["sweetcocoa/pop2piano"]
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.39.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: true
|
| 10 |
---
|
app.py
CHANGED
|
@@ -1,18 +1,15 @@
|
|
| 1 |
-
import os
|
| 2 |
import binascii
|
| 3 |
-
import
|
| 4 |
|
| 5 |
import gradio as gr
|
| 6 |
import librosa
|
| 7 |
import numpy as np
|
| 8 |
-
import torch
|
| 9 |
import pretty_midi
|
| 10 |
-
import
|
| 11 |
-
|
| 12 |
-
from pytube.exceptions import VideoUnavailable
|
| 13 |
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
| 14 |
|
| 15 |
-
from utils import mp3_write, normalize
|
| 16 |
|
| 17 |
yt_video_dir = "./yt_dir"
|
| 18 |
outputs_dir = "./midi_wav_outputs"
|
|
@@ -26,14 +23,20 @@ composers = model.generation_config.composer_to_feature_token.keys()
|
|
| 26 |
|
| 27 |
|
| 28 |
def get_audio_from_yt_video(yt_link: str):
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
return filename, filename
|
| 39 |
|
|
@@ -82,7 +85,7 @@ block = gr.Blocks()
|
|
| 82 |
with block:
|
| 83 |
gr.HTML(
|
| 84 |
"""
|
| 85 |
-
<div style="text-align: center; max-width:
|
| 86 |
<div
|
| 87 |
style="
|
| 88 |
display: inline-flex;
|
|
@@ -131,7 +134,7 @@ with block:
|
|
| 131 |
<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
|
| 132 |
"""
|
| 133 |
)
|
| 134 |
-
with gr.Row(
|
| 135 |
stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
|
| 136 |
wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
|
| 137 |
|
|
|
|
|
|
|
| 1 |
import binascii
|
| 2 |
+
import os
|
| 3 |
|
| 4 |
import gradio as gr
|
| 5 |
import librosa
|
| 6 |
import numpy as np
|
|
|
|
| 7 |
import pretty_midi
|
| 8 |
+
import torch
|
| 9 |
+
import yt_dlp
|
|
|
|
| 10 |
from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor
|
| 11 |
|
| 12 |
+
from utils import cli_to_api, mp3_write, normalize
|
| 13 |
|
| 14 |
yt_video_dir = "./yt_dir"
|
| 15 |
outputs_dir = "./midi_wav_outputs"
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def get_audio_from_yt_video(yt_link: str):
|
| 26 |
+
filename = binascii.hexlify(os.urandom(8)).decode() + ".mp3"
|
| 27 |
+
filename = os.path.join(yt_video_dir, filename)
|
| 28 |
+
yt_opt = cli_to_api(
|
| 29 |
+
[
|
| 30 |
+
"--extract-audio",
|
| 31 |
+
"--audio-format",
|
| 32 |
+
"mp3",
|
| 33 |
+
"--restrict-filenames",
|
| 34 |
+
"-o",
|
| 35 |
+
filename,
|
| 36 |
+
]
|
| 37 |
+
)
|
| 38 |
+
with yt_dlp.YoutubeDL(yt_opt) as ydl:
|
| 39 |
+
ydl.download([yt_link])
|
| 40 |
|
| 41 |
return filename, filename
|
| 42 |
|
|
|
|
| 85 |
with block:
|
| 86 |
gr.HTML(
|
| 87 |
"""
|
| 88 |
+
<div style="text-align: center; max-width: 400px; margin: 0 auto;">
|
| 89 |
<div
|
| 90 |
style="
|
| 91 |
display: inline-flex;
|
|
|
|
| 134 |
<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
|
| 135 |
"""
|
| 136 |
)
|
| 137 |
+
with gr.Row(equal_height=True):
|
| 138 |
stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
|
| 139 |
wav_output1 = gr.Audio(label="Listen to the Generated MIDI")
|
| 140 |
|
pyproject.toml
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.ruff]
|
| 2 |
+
line-length=100
|
| 3 |
+
select = ["F", "I"]
|
| 4 |
+
ignore = []
|
requirements.txt
CHANGED
|
@@ -4,7 +4,7 @@ pretty-midi==0.2.9
|
|
| 4 |
essentia==2.1b6.dev1034
|
| 5 |
pyFluidSynth==1.3.0
|
| 6 |
transformers
|
| 7 |
-
|
| 8 |
-
gradio
|
| 9 |
resampy
|
| 10 |
pydub
|
|
|
|
| 4 |
essentia==2.1b6.dev1034
|
| 5 |
pyFluidSynth==1.3.0
|
| 6 |
transformers
|
| 7 |
+
yt-dlp>=2024.7.25
|
| 8 |
+
gradio
|
| 9 |
resampy
|
| 10 |
pydub
|
utils.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import numpy as np
|
| 2 |
import pydub
|
|
|
|
|
|
|
| 3 |
|
| 4 |
|
| 5 |
def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
|
@@ -8,14 +10,58 @@ def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
|
| 8 |
y = np.int16(x * 2**15)
|
| 9 |
else:
|
| 10 |
y = np.int16(x)
|
| 11 |
-
song = pydub.AudioSegment(
|
|
|
|
|
|
|
| 12 |
song.export(f, format="mp3", bitrate="256k")
|
| 13 |
|
| 14 |
|
| 15 |
-
def normalize(
|
|
|
|
|
|
|
| 16 |
max_y -= eps
|
| 17 |
min_y += eps
|
| 18 |
amax = audio.max()
|
| 19 |
amin = audio.min()
|
| 20 |
audio = (max_y - min_y) * (audio - amin) / (amax - amin) + min_y
|
| 21 |
return audio
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import pydub
|
| 3 |
+
import yt_dlp
|
| 4 |
+
import yt_dlp.options
|
| 5 |
|
| 6 |
|
| 7 |
def mp3_write(f: str, sr: int, x: np.ndarray, normalized: bool = False):
|
|
|
|
| 10 |
y = np.int16(x * 2**15)
|
| 11 |
else:
|
| 12 |
y = np.int16(x)
|
| 13 |
+
song = pydub.AudioSegment(
|
| 14 |
+
y.tobytes(), frame_rate=sr, sample_width=2, channels=channels
|
| 15 |
+
)
|
| 16 |
song.export(f, format="mp3", bitrate="256k")
|
| 17 |
|
| 18 |
|
| 19 |
+
def normalize(
|
| 20 |
+
audio: np.ndarray, min_y: float = -1.0, max_y: float = 1.0, eps: float = 1e-8
|
| 21 |
+
):
|
| 22 |
max_y -= eps
|
| 23 |
min_y += eps
|
| 24 |
amax = audio.max()
|
| 25 |
amin = audio.min()
|
| 26 |
audio = (max_y - min_y) * (audio - amin) / (amax - amin) + min_y
|
| 27 |
return audio
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# yt_dlp script copied from https://github.com/yt-dlp/yt-dlp/blob/28d485714fef88937c82635438afba5db81f9089/devscripts/cli_to_api.py
|
| 31 |
+
create_parser = yt_dlp.options.create_parser
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def parse_patched_options(opts):
|
| 35 |
+
patched_parser = create_parser()
|
| 36 |
+
patched_parser.defaults.update(
|
| 37 |
+
{
|
| 38 |
+
"ignoreerrors": False,
|
| 39 |
+
"retries": 0,
|
| 40 |
+
"fragment_retries": 0,
|
| 41 |
+
"extract_flat": False,
|
| 42 |
+
"concat_playlist": "never",
|
| 43 |
+
}
|
| 44 |
+
)
|
| 45 |
+
yt_dlp.options.create_parser = lambda: patched_parser
|
| 46 |
+
try:
|
| 47 |
+
return yt_dlp.parse_options(opts)
|
| 48 |
+
finally:
|
| 49 |
+
yt_dlp.options.create_parser = create_parser
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
default_opts = parse_patched_options([]).ydl_opts
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def cli_to_api(opts, cli_defaults=False):
|
| 56 |
+
opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(
|
| 57 |
+
opts
|
| 58 |
+
).ydl_opts
|
| 59 |
+
|
| 60 |
+
diff = {k: v for k, v in opts.items() if default_opts[k] != v}
|
| 61 |
+
if "postprocessors" in diff:
|
| 62 |
+
diff["postprocessors"] = [
|
| 63 |
+
pp
|
| 64 |
+
for pp in diff["postprocessors"]
|
| 65 |
+
if pp not in default_opts["postprocessors"]
|
| 66 |
+
]
|
| 67 |
+
return diff
|