bin/SuperFluxNN

#!/usr/bin/env python
# encoding: utf-8
"""
SuperFlux with neural network based peak picking onset detection algorithm.

"""

from __future__ import absolute_import, division, print_function

import argparse

import numpy as np

from madmom.audio import (FramedSignalProcessor,
                          LogarithmicSpectrogramProcessor, SignalProcessor,
                          SpectrogramDifferenceProcessor)
from madmom.audio.filters import FilterbankProcessor, LogarithmicFilterbank
from madmom.features import (ActivationsProcessor, OnsetPeakPickingProcessor,
                             SpectralOnsetProcessor)
from madmom.io import write_onsets
from madmom.ml.nn import NeuralNetworkEnsemble
from madmom.models import ONSETS_BRNN_PP
from madmom.processors import IOProcessor, io_arguments


def main():
    """SuperFluxNN"""

    # define parser
    p = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter, description='''
    The SuperFluxNN program detects all onsets in an audio file with the
    SuperFlux algorithm with neural network based peak-picking as described in:

    "Enhanced Peak Picking for Onset Detection with Recurrent Neural Networks"
    Sebastian Böck, Jan Schlüter and Gerhard Widmer.
    Proceedings of the 6th International Workshop on Machine Learning and
    Music (MML), 2013.

    Please note that this implementation uses 100 frames per second (instead
    of 200), because it is faster and produces highly comparable results.

    This program can be run in 'single' file mode to process a single audio
    file and write the detected onsets to STDOUT or the given output file.

      $ SuperFluxNN single INFILE [-o OUTFILE]

    If multiple audio files should be processed, the program can also be run
    in 'batch' mode to save the detected onsets to files with the given suffix.

      $ SuperFluxNN batch [-o OUTPUT_DIR] [-s OUTPUT_SUFFIX] FILES

    If no output directory is given, the program writes the files with the
    detected onsets to the same location as the audio files.

    The 'pickle' mode can be used to store the used parameters to be able to
    exactly reproduce experiments.


    ''')
    # version
    p.add_argument('--version', action='version', version='SuperFluxNN.2016')
    # input/output options
    io_arguments(p, output_suffix='.onsets.txt')
    ActivationsProcessor.add_arguments(p)
    # add signal processing arguments
    SignalProcessor.add_arguments(p, norm=False, gain=0)
    FramedSignalProcessor.add_arguments(p)
    FilterbankProcessor.add_arguments(p, num_bands=24, fmin=30, fmax=17000,
                                      norm_filters=False)
    LogarithmicSpectrogramProcessor.add_arguments(p, log=True, mul=1, add=1)
    SpectrogramDifferenceProcessor.add_arguments(p, diff_ratio=0.5,
                                                 diff_max_bins=3,
                                                 positive_diffs=True)
    # peak picking arguments
    OnsetPeakPickingProcessor.add_arguments(p, threshold=0.4, smooth=0.07,
                                            combine=0.04, delay=0)
    # parse arguments
    args = p.parse_args()

    # set immutable defaults
    args.num_channels = 1
    args.fps = 100
    args.onset_method = 'superflux'
    args.filterbank = LogarithmicFilterbank

    # print arguments
    if args.verbose:
        print(args)

    # input processor
    if args.load:
        # load the activations from file
        in_processor = ActivationsProcessor(mode='r', **vars(args))
    else:
        in_processor = SpectralOnsetProcessor(**vars(args))

    # output processor
    if args.save:
        # save the Onset activations to file
        out_processor = ActivationsProcessor(mode='w', **vars(args))
    else:
        # process everything with multiple RNNs and average the predictions
        rnn = NeuralNetworkEnsemble.load(ONSETS_BRNN_PP, **vars(args))
        # detect the onsets and output them
        pp = OnsetPeakPickingProcessor(**vars(args))
        # Note: we need np.atleast_2d and np.transpose before the RNN, since
        #       it expects the data in 2D (1D means framewise processing)
        out_processor = [np.atleast_2d, np.transpose, rnn, pp, write_onsets]

    # create an IOProcessor
    processor = IOProcessor(in_processor, out_processor)

    # and call the processing function
    args.func(processor, **vars(args))


if __name__ == '__main__':
    main()