Commit
·
042ba6c
1
Parent(s):
f1ab612
Update README.md
Browse files
README.md
CHANGED
|
@@ -71,7 +71,6 @@ sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
|
|
| 71 |
3. Run inference via the Transformers modelling code with the model and hifigan combined
|
| 72 |
|
| 73 |
```python
|
| 74 |
-
|
| 75 |
from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
|
| 76 |
import soundfile as sf
|
| 77 |
|
|
@@ -86,6 +85,19 @@ waveform = output_dict["waveform"]
|
|
| 86 |
sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
|
| 87 |
```
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
### Direct Use
|
| 90 |
|
| 91 |
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
|
|
|
| 71 |
3. Run inference via the Transformers modelling code with the model and hifigan combined
|
| 72 |
|
| 73 |
```python
|
|
|
|
| 74 |
from transformers import FastSpeech2ConformerTokenizer, FastSpeech2ConformerWithHifiGan
|
| 75 |
import soundfile as sf
|
| 76 |
|
|
|
|
| 85 |
sf.write("speech.wav", waveform.squeeze().detach().numpy(), samplerate=22050)
|
| 86 |
```
|
| 87 |
|
| 88 |
+
4. Run inference with a pipeline and specify which vocoder to use
|
| 89 |
+
```python
|
| 90 |
+
from transformers import pipeline, FastSpeech2ConformerHifiGan
|
| 91 |
+
import soundfile as sf
|
| 92 |
+
|
| 93 |
+
vocoder = FastSpeech2ConformerHifiGan.from_pretrained("espnet/fastspeech2_conformer_hifigan")
|
| 94 |
+
synthesiser = pipeline(model="espnet/fastspeech2_conformer", vocoder=vocoder)
|
| 95 |
+
|
| 96 |
+
speech = synthesiser("Hello, my dog is cooler than you!")
|
| 97 |
+
|
| 98 |
+
sf.write("speech.wav", speech["audio"].squeeze(), samplerate=speech["sampling_rate"])
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
### Direct Use
|
| 102 |
|
| 103 |
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|