Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import librosa | |
| import numpy as np | |
| import torch | |
| from diffusers import SpectrogramDiffusionPipeline, MidiProcessor | |
| pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion", torch_dtype=torch.float16).to("cuda") | |
| pipe.enable_xformers_memory_efficient_attention() | |
| processor = MidiProcessor() | |
| def predict(audio_file_pth): | |
| with torch.inference_mode(): | |
| output = pipe(processor(audio_file_pth.name)[:2]) | |
| audio = output.audios[0] | |
| return (16000, audio.ravel()) | |
| title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion" | |
| description = """ | |
| In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime. | |
| This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments. | |
| They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter. | |
| """ | |
| examples = [] | |
| gr.Interface( | |
| fn=predict, | |
| inputs=[ | |
| gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]), | |
| ], | |
| outputs=[ | |
| gr.Audio(label="Synthesised Music", type="numpy"), | |
| ], | |
| title=title, | |
| description=description, | |
| theme='gstaff/xkcd', | |
| ).launch(debug=True) |