Source code for malaya_speech.speechsplit_conversion

from herpetologist import check_type
from malaya_speech.supervised import speechsplit_conversion
from malaya_speech.utils import describe_availability

_availability = {
    'pysptk': {
        'fastspeechsplit-vggvox-v2': {
            'Size (MB)': 232,
            'Quantized Size (MB)': 59.2,
        },
        'fastspeechsplit-v2-vggvox-v2': {
            'Size (MB)': 411,
            'Quantized Size (MB)': 105,
        },
    },
    'pyworld': {
        'fastspeechsplit-vggvox-v2': {
            'Size (MB)': 232,
            'Quantized Size (MB)': 59.2,
        },
        'fastspeechsplit-v2-vggvox-v2': {
            'Size (MB)': 411,
            'Quantized Size (MB)': 105,
        },
    },
}


f0_modes = ['pysptk', 'pyworld']


def check_f0_mode(f0_mode='pysptk'):
    f0_mode = f0_mode.lower()
    if f0_mode not in f0_modes:
        raise ValueError("`f0_mode` only support one of ['pysptk', 'pyworld']")
    return f0_mode


[docs]def available_deep_conversion(f0_mode: str = 'pysptk'): """ List available Voice Conversion models. Parameters ---------- f0_mode : str, optional (default='pysptk') F0 conversion supported. Allowed values: * ``'pysptk'`` - https://github.com/r9y9/pysptk, sensitive towards gender. * ``'pyworld'`` - https://pypi.org/project/pyworld/ """ f0_mode = check_f0_mode(f0_mode=f0_mode) return describe_availability(_availability[f0_mode])
[docs]def deep_conversion( model: str = 'fastspeechsplit-v2-vggvox-v2', f0_mode: str = 'pysptk', quantized: bool = False, **kwargs, ): """ Load Voice Conversion model. Parameters ---------- model : str, optional (default='fastspeechsplit-v2-vggvox-v2') Check available models at `malaya_speech.speechsplit_conversion.available_deep_conversion(f0_mode = '{f0_mode}')` f0_mode : str, optional (default='pysptk') F0 conversion supported. Allowed values: * ``'pysptk'`` - https://github.com/r9y9/pysptk, sensitive towards gender. * ``'pyworld'`` - https://pypi.org/project/pyworld/ quantized : bool, optional (default=False) if True, will load 8-bit quantized model. Quantized model not necessary faster, totally depends on the machine. Returns ------- result : malaya_speech.model.splitter.FastSpeechSplit class """ model = model.lower() f0_mode = check_f0_mode(f0_mode=f0_mode) if model not in _availability[f0_mode]: raise ValueError( "model not supported, please check supported models from `malaya_speech.speechsplit_conversion.available_deep_conversion(f0_mode = '{f0_mode}')`." ) return speechsplit_conversion.load( model=model, module=f'speechsplit-conversion-{f0_mode}', f0_mode=f0_mode, quantized=quantized, **kwargs, )