Commit 793b7f5b authored by Gabriela Marica's avatar Gabriela Marica

TTS+WAveRNN

parents
linters:
- pylint:
#!/bin/bash
yes | apt-get install sox
yes | apt-get install ffmpeg
yes | apt-get install espeak
yes | apt-get install tmux
yes | apt-get install zsh
sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
pip3 install https://download.pytorch.org/whl/cu100/torch-1.3.0%2Bcu100-cp36-cp36m-linux_x86_64.whl
sudo sh install.sh
pip install pytorch==1.3.0+cu100
python3 setup.py develop
# python3 distribute.py --config_path config.json --data_path /data/ro/shared/data/keithito/LJSpeech-1.1/
# cp -R ${USER_DIR}/Mozilla_22050 ../tmp/
# python3 distribute.py --config_path config_tacotron_gst.json --data_path ../tmp/Mozilla_22050/
# python3 distribute.py --config_path config.json --data_path /data/rw/home/LibriTTS/train-clean-360
# python3 distribute.py --config_path config.json
while true; do sleep 1000000; done
.git/
\ No newline at end of file
This diff is collapsed.
language: python
git:
quiet: true
matrix:
include:
- name: "Lint check"
python: "3.6"
install: pip install --quiet --upgrade cardboardlint pylint
env: TEST_SUITE="lint"
- name: "Unit tests"
python: "3.6"
install: pip install --quiet -r requirements_tests.txt
env: TEST_SUITE="unittest"
script: ./.travis/script
#!/bin/bash
set -ex
git remote set-branches --add origin $TRAVIS_BRANCH
git fetch
if [[ ( "$TRAVIS_PULL_REQUEST" != "false" ) && ( "$TEST_SUITE" == "lint" ) ]]; then
# Run cardboardlinter, in case of pull requests
cardboardlinter --refspec origin/$TRAVIS_BRANCH -n auto
fi
if [[ "$TEST_SUITE" == "unittest" ]]; then
# Run tests on all pushes
pushd tts_namespace
python -m unittest
popd
# Test server package
./tests/test_server_package.sh
fi
# Ethical Notice
Please consider possible consequences and be mindful of any adversarial use cases of this project. In this regard, please contact us if you have any concerns.
# Community Participation Guidelines
This repository is governed by Mozilla's code of conduct and etiquette guidelines.
For more details, please read the
[Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
## How to Report
For more information on how to report violations of the Community Participation Guidelines, please read our '[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)' page.
<!--
## Project Specific Etiquette
In some cases, there will be additional project etiquette i.e.: (https://bugzilla.mozilla.org/page.cgi?id=etiquette.html).
Please update for your project.
-->
# Contribution guidelines
This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/).
Before making a Pull Request, check your changes for basic mistakes and style problems by using a linter. We have cardboardlinter setup in this repository, so for example, if you've made some changes and would like to run the linter on just the differences between your work and master, you can use the follow command:
```bash
pip install pylint cardboardlint
cardboardlinter --refspec master
```
This will compare the code against master and run the linter on all the changes. To run it automatically as a git pre-commit hook, you can do do the following:
```bash
cat <<\EOF > .git/hooks/pre-commit
#!/bin/bash
if [ ! -x "$(command -v cardboardlinter)" ]; then
exit 0
fi
# First, stash index and work dir, keeping only the
# to-be-committed changes in the working directory.
echo "Stashing working tree changes..." 1>&2
old_stash=$(git rev-parse -q --verify refs/stash)
git stash save -q --keep-index
new_stash=$(git rev-parse -q --verify refs/stash)
# If there were no changes (e.g., `--amend` or `--allow-empty`)
# then nothing was stashed, and we should skip everything,
# including the tests themselves. (Presumably the tests passed
# on the previous commit, so there is no need to re-run them.)
if [ "$old_stash" = "$new_stash" ]; then
echo "No changes, skipping lint." 1>&2
exit 0
fi
# Run tests
cardboardlinter --refspec HEAD -n auto
status=$?
# Restore changes
echo "Restoring working tree changes..." 1>&2
git reset --hard -q && git stash apply --index -q && git stash drop -q
# Exit with status from test-run: nonzero prevents commit
exit $status
EOF
chmod +x .git/hooks/pre-commit
```
This will run the linters on just the changes made in your commit.
\ No newline at end of file
FROM pytorch/pytorch:1.0.1-cuda10.0-cudnn7-runtime
WORKDIR /srv/app
RUN apt-get update && \
apt-get install -y libsndfile1 espeak && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Copy Source later to enable dependency caching
COPY requirements.txt /srv/app/
RUN pip install -r requirements.txt
COPY . /srv/app
# http://bugs.python.org/issue19846
# > At the moment, setting "LANG=C" on a Linux system *fundamentally breaks Python 3*, and that's not OK.
ENV LANG C.UTF-8
CMD python3.6 server/server.py -c server/conf.json
This diff is collapsed.
This diff is collapsed.
#!/bin/bash
yes | apt-get install libsndfile1
pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
pip3 install -r requirements.txt
# python3 preprocess_data.py --config_path config_libri360.json --num_procs 16 --out_path /data/rw/home/LibriTTS/wavernn_360/ --data_path /data/rw/home/LibriTTS/all_wavs_360/
# ln -s /data/rw/home/LibriTTS/all_wavs_360/ wavernn_360/wavs/
# python3 distribute.py --config_path config_libri360.json --data_path /data/rw/home/LibriTTS/wavernn_360/ --output_path ../keep/
while true; do sleep 1000000; done
#!/bin/bash
virtualenv -p python3 ../tmp/venv
source ../tmp/venv/bin/activate
pip install -r requirements.txt
# python setup.py develop
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# WaveRNN - Fit a Sine Wave"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import time, sys, math\n",
"import numpy as np\n",
"import torch\n",
"from torch import optim\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"from scipy.io import wavfile\n",
"from utils.display import *\n",
"from models.wavernn import WaveRNN"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"notebook_name = 'nb1'\n",
"sample_rate = 24000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def sine_wave(freq, length, sample_rate=sample_rate) : \n",
" return np.sin(np.arange(length) * 2 * math.pi * freq / sample_rate).astype(np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"signal = sine_wave(1, 100000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"plot(signal[:sample_rate])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Splitting the Signal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def encode_16bits(x) : \n",
" return np.clip(x * 2**15, -2**15, 2**15 - 1).astype(np.int16)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def split_signal(x) :\n",
" encoded = encode_16bits(x)\n",
" unsigned = encoded + 2**15\n",
" coarse = unsigned // 256\n",
" fine = unsigned % 256\n",
" return coarse, fine"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c, f = split_signal(signal)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(c[30000:32000])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"plot(f[30000:32000])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c.max(), c.min(), f.max(), f.min()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Reconstructing the Signal"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def combine_signal(coarse, fine) :\n",
" signal = coarse * 256 + fine \n",
" signal -= 2**15\n",
" return signal.astype(np.int16) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reconstructed = combine_signal(c, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(reconstructed[:sample_rate])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"before_signal = sine_wave(freq=500, length=sample_rate * 5)\n",
"c, f = split_signal(before_signal)\n",
"before_signal = encode_16bits(before_signal)\n",
"reconstructed = combine_signal(c, f)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wavfile.write('model_outputs/nb1/combine_test_before.wav', sample_rate, before_signal)\n",
"wavfile.write('model_outputs/nb1/combine_test_after.wav', sample_rate, reconstructed)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"model = WaveRNN().cuda()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Check Tensor Shapes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hidden = model.init_hidden()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = torch.rand(1, 2).cuda()\n",
"current_course = torch.rand(1, 1).cuda()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c, f, hidden = model(x, hidden, current_course)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"c.size(), f.size(), hidden.size()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Train Model"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = sine_wave(freq=500, length=sample_rate * 30)\n",
"coarse_classes, fine_classes = split_signal(x)\n",
"coarse_classes = np.reshape(coarse_classes, (1, -1))\n",
"fine_classes = np.reshape(fine_classes, (1, -1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train(model, optimizer, num_steps, seq_len=960) :\n",
" \n",
" start = time.time()\n",
" running_loss = 0\n",
" \n",
" for step in range(num_steps) :\n",
" \n",
" loss = 0\n",
" hidden = model.init_hidden()\n",
" optimizer.zero_grad()\n",
" rand_idx = np.random.randint(0, coarse_classes.shape[1] - seq_len - 1)\n",
" \n",
" for i in range(seq_len) :\n",
" \n",
" j = rand_idx + i\n",
" \n",
" x_coarse = coarse_classes[:, j:j + 1]\n",
" x_fine = fine_classes[:, j:j + 1]\n",
" x_input = np.concatenate([x_coarse, x_fine], axis=1)\n",
" x_input = x_input / 127.5 - 1.\n",
" x_input = torch.FloatTensor(x_input).cuda()\n",
" \n",
" y_coarse = coarse_classes[:, j + 1]\n",
" y_fine = fine_classes[:, j + 1]\n",
" y_coarse = torch.LongTensor(y_coarse).cuda()\n",
" y_fine = torch.LongTensor(y_fine).cuda()\n",
" \n",
" current_coarse = y_coarse.float() / 127.5 - 1.\n",
" current_coarse = current_coarse.unsqueeze(-1)\n",
" \n",
" out_coarse, out_fine, hidden = model(x_input, hidden, current_coarse)\n",
" \n",
" loss_coarse = F.cross_entropy(out_coarse, y_coarse)\n",
" loss_fine = F.cross_entropy(out_fine, y_fine)\n",
" loss += (loss_coarse + loss_fine)\n",
" \n",
" running_loss += (loss.item() / seq_len)\n",
" loss.backward()\n",
" optimizer.step()\n",
" \n",
" speed = (step + 1) / (time.time() - start)\n",
" \n",
" stream('Step: %i/%i --- Loss: %.1f --- Speed: %.1f batches/second ', \n",
" (step + 1, num_steps, running_loss / (step + 1), speed)) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"optimizer = optim.Adam(model.parameters(), lr=1e-3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train(model, optimizer, num_steps=300)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Generate"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"output, c, f = model.generate(10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(output[:300])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(c[:300])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(f[:300])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And compare the fine output to the ground truth..."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plot(fine_classes[0, :300])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"wavfile.write('outputs/nb1/model_output.wav', sample_rate, output)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# WaveRNN - Fit a Short Sample"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"import time, sys, math\n",
"import numpy as np\n",
"import torch\n",
"from torch import optim\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"from scipy.io import wavfile\n",
"from utils.display import *\n",
"from utils.dsp import *\n",
"from models.wavernn import WaveRNN"
]
},
{
"cell_type": "code",
"execution_count": null, <