Spaces:

reach-vb
/

music-spectrogram-diffusion

Runtime error

Update app.py

ba2b8a5 over 2 years ago

1.53 kB

	import gradio as gr
	import librosa
	import numpy as np
	import torch

	from diffusers import SpectrogramDiffusionPipeline, MidiProcessor

	pipe = SpectrogramDiffusionPipeline.from_pretrained("google/music-spectrogram-diffusion", torch_dtype=torch.float16).to("cuda")
	pipe.enable_xformers_memory_efficient_attention()

	processor = MidiProcessor()


	def predict(audio_file_pth):

	with torch.inference_mode():
	output = pipe(processor(audio_file_pth.name)[:2])
	audio = output.audios[0]

	return (16000, audio.ravel())


	title = "Music Spectrogram Diffusion: Multi-instrument Music Synthesis with Spectrogram Diffusion"

	description = """
	In this work, the authors focus on a middle ground of neural synthesizers that can generate audio from MIDI sequences with arbitrary combinations of instruments in realtime.
	This enables training on a wide range of transcription datasets with a single model, which in turn offers note-level control of composition and instrumentation across a wide range of instruments.

	They use a simple two-stage process: MIDI to spectrograms with an encoder-decoder Transformer, then spectrograms to audio with a generative adversarial network (GAN) spectrogram inverter.
	"""
	examples = []


	gr.Interface(
	fn=predict,
	inputs=[
	gr.File(label="Upload MIDI", file_count="single", file_types=[".mid"]),
	],
	outputs=[
	gr.Audio(label="Synthesised Music", type="numpy"),
	],
	title=title,
	description=description,
	theme='gstaff/xkcd',
	).launch(debug=True)