Source code for malaya_speech.super_resolution

from malaya_speech.supervised import (
    unet as load_unet,
    enhancement as load_enhancement,
)
from malaya_speech.utils import describe_availability
from herpetologist import check_type
import logging

logger = logging.getLogger(__name__)

_availability_unet = {
    'srgan-128': {
        'Size (MB)': 7.37,
        'Quantized Size (MB)': 2.04,
        'SDR': 17.03345,
        'ISR': 22.33026,
        'SAR': 17.454372,
    },
    'srgan-256': {
        'Size (MB)': 29.5,
        'Quantized Size (MB)': 7.55,
        'SDR': 16.34558,
        'ISR': 22.067493,
        'SAR': 17.02439,
    },
}

_availability_vocoder = {
    'voicefixer': {
        'Size (MB)': 489.0,
        'original from': 'https://github.com/haoheliu/voicefixer',
    },
    'nvsr': {
        'Size (MB)': 468.0,
        'original from': 'https://github.com/haoheliu/ssr_eval/tree/main/examples/NVSR',
    },
    'hifigan-bwe': {
        'size (MB)': 4.3,
        'original from': 'https://github.com/brentspell/hifi-gan-bwe',
    }
}

_availability_diffusion = {
    'nuwave2': {
        'Size (MB)': 20.9,
        'original from': 'https://github.com/mindslab-ai/nuwave2',
    }
}


[docs]def available_unet(): """ List available Super Resolution 4x deep learning UNET models. """ logger.info('Only calculate SDR, ISR, SAR on voice sample. Higher is better.') return describe_availability(_availability_unet)
[docs]def available_vocoder(): """ List available Super Resolution deep learning vocoder models. """ return describe_availability(_availability_vocoder)
[docs]def available_diffusion(): """ List available Super Resolution deep learning diffusion models. """ return describe_availability(_availability_diffusion)
[docs]@check_type def unet(model: str = 'srgan-256', quantized: bool = False, **kwargs): """ Load Super Resolution 4x deep learning UNET model. Parameters ---------- model : str, optional (default='srgan-256') Check available models at `malaya_speech.super_resolution.available_unet()`. quantized : bool, optional (default=False) if True, will load 8-bit quantized model. Quantized model not necessary faster, totally depends on the machine. Returns ------- result : malaya_speech.model.tf.UNET1D class """ model = model.lower() if model not in _availability_unet: raise ValueError( 'model not supported, please check supported models from `malaya_speech.super_resolution.available_unet()`.' ) return load_unet.load_1d( model=model, module='super-resolution', quantized=quantized, **kwargs )
[docs]def vocoder(model: str = 'hifigan-bwe', **kwargs): """ Load vocoder based super resolution. Parameters ---------- model : str, optional (default='hifigan-bwe') Check available models at `malaya_speech.super_resolution.available_vocoder()`. Returns ------- result : malaya_speech.torch_model.super_resolution.* """ model = model.lower() if model not in _availability_vocoder: raise ValueError( 'model not supported, please check supported models from `malaya_speech.super_resolution.available_tfgan()`.' ) loads = { 'voicefixer': load_enhancement.load_tfgan, 'nvsr': load_enhancement.load_tfgan, 'hifigan-bwe': load_enhancement.load_vocoder, } return loads[model](model=model)
[docs]def diffusion(model: str = 'nuwave2', **kwargs): """ Load audio diffusion based super resolution. Parameters ---------- model : str, optional (default='nuwave2') Check available models at `malaya_speech.super_resolution.available_diffusion()`. Returns ------- result : malaya_speech.torch_model.super_resolution.NuWave2 """ model = model.lower() if model not in _availability_diffusion: raise ValueError( 'model not supported, please check supported models from `malaya_speech.super_resolution.available_diffusion()`.' ) return load_enhancement.load_diffusion(model=model)