Source code for malaya_speech.noise_reduction

from malaya_speech.supervised import unet
from malaya_speech.utils.astype import int_to_float
from herpetologist import check_type
from malaya_speech.utils import describe_availability
import librosa
import numpy as np
import logging

logger = logging.getLogger(__name__)

# https://github.com/sigsep/sigsep-mus-eval/blob/master/museval/__init__.py#L364
# Only calculate SDR, ISR, SAR on voice sample

_availability = {
    'unet': {
        'Size (MB)': 78.9,
        'Quantized Size (MB)': 20,
        'SUM MAE': 0.862316,
        'MAE_SPEAKER': 0.460676,
        'MAE_NOISE': 0.401640,
        'SDR': 9.17312,
        'ISR': 13.92435,
        'SAR': 13.20592,
    },
    'resnet-unet': {
        'Size (MB)': 96.4,
        'Quantized Size (MB)': 24.6,
        'SUM MAE': 0.82535,
        'MAE_SPEAKER': 0.43885,
        'MAE_NOISE': 0.38649,
        'SDR': 9.45413,
        'ISR': 13.9639,
        'SAR': 13.60276,
    },
    'resnext-unet': {
        'Size (MB)': 75.4,
        'Quantized Size (MB)': 19,
        'SUM MAE': 0.81102,
        'MAE_SPEAKER': 0.44719,
        'MAE_NOISE': 0.363830,
        'SDR': 8.992832,
        'ISR': 13.49194,
        'SAR': 13.13210,
    },
}


[docs]def available_model(): """ List available Noise Reduction deep learning models. """ logger.info('Only calculate SDR, ISR, SAR on voice sample. Higher is better.') return describe_availability(_availability)
[docs]@check_type def deep_model(model: str = 'resnet-unet', quantized: bool = False, **kwargs): """ Load Noise Reduction deep learning model. Parameters ---------- model : str, optional (default='resnet-unet') Check available models at `malaya_speech.noise_reduction.available_model()`. quantized : bool, optional (default=False) if True, will load 8-bit quantized model. Quantized model not necessary faster, totally depends on the machine. Returns ------- result : malaya_speech.model.tf.UNET_STFT class """ model = model.lower() if model not in _availability: raise ValueError( 'model not supported, please check supported models from `malaya_speech.noise_reduction.available_model()`.' ) return unet.load_stft( model=model, module='noise-reduction', instruments=['voice', 'noise'], quantized=quantized, **kwargs )
# https://github.com/dodiku/noise_reduction/blob/master/noise.py def reduce_noise_power(y, sr=16000): from pysndfx import AudioEffectsChain y = int_to_float(y) cent = librosa.feature.spectral_centroid(y=y, sr=sr) threshold_h = round(np.median(cent)) * 1.5 threshold_l = round(np.median(cent)) * 0.1 less_noise = ( AudioEffectsChain() .lowshelf(gain=-30.0, frequency=threshold_l, slope=0.8) .highshelf(gain=-12.0, frequency=threshold_h, slope=0.5) ) y_clean = less_noise(y) return y_clean # https://github.com/dodiku/noise_reduction/blob/master/noise.py def reduce_noise_centroid_s(y, sr=16000): try: from pysndfx import AudioEffectsChain except Exception as e: raise ModuleNotFoundError( 'pysndfx not installed. Please install it by `pip install pysndfx` and try again.' ) y = int_to_float(y) cent = librosa.feature.spectral_centroid(y=y, sr=sr) threshold_h = np.max(cent) threshold_l = np.min(cent) less_noise = ( AudioEffectsChain() .lowshelf(gain=-12.0, frequency=threshold_l, slope=0.5) .highshelf(gain=-12.0, frequency=threshold_h, slope=0.5) .limiter(gain=6.0) ) y_cleaned = less_noise(y) return y_cleaned # https://github.com/dodiku/noise_reduction/blob/master/noise.py def reduce_noise_centroid_mb(y, sr=16000): try: from pysndfx import AudioEffectsChain except Exception as e: raise ModuleNotFoundError( 'pysndfx not installed. Please install it by `pip install pysndfx` and try again.' ) y = int_to_float(y) cent = librosa.feature.spectral_centroid(y=y, sr=sr) threshold_h = np.max(cent) threshold_l = np.min(cent) less_noise = ( AudioEffectsChain() .lowshelf(gain=-30.0, frequency=threshold_l, slope=0.5) .highshelf(gain=-30.0, frequency=threshold_h, slope=0.5) .limiter(gain=10.0) ) y_cleaned = less_noise(y) cent_cleaned = librosa.feature.spectral_centroid(y=y_cleaned, sr=sr) columns, rows = cent_cleaned.shape boost_h = math.floor(rows / 3 * 2) boost_l = math.floor(rows / 6) boost = math.floor(rows / 3) boost_bass = AudioEffectsChain().lowshelf( gain=16.0, frequency=boost_h, slope=0.5 ) y_clean_boosted = boost_bass(y_cleaned) return y_clean_boosted # https://github.com/dodiku/noise_reduction/blob/master/noise.py def reduce_noise_mfcc_down(y, sr=16000): try: from pysndfx import AudioEffectsChain import python_speech_features except Exception as e: raise ModuleNotFoundError( 'pysndfx, python_speech_features not installed. Please install it by `pip install pysndfx python_speech_features` and try again.' ) y = int_to_float(y) hop_length = 512 mfcc = python_speech_features.base.logfbank(y) mfcc = python_speech_features.base.lifter(mfcc) sum_of_squares = [] index = -1 for r in mfcc: sum_of_squares.append(0) index = index + 1 for n in r: sum_of_squares[index] = sum_of_squares[index] + n ** 2 strongest_frame = sum_of_squares.index(max(sum_of_squares)) hz = python_speech_features.base.mel2hz(mfcc[strongest_frame]) max_hz = max(hz) min_hz = min(hz) speech_booster = ( AudioEffectsChain() .highshelf(frequency=min_hz * (-1) * 1.2, gain=-12.0, slope=0.6) .limiter(gain=8.0) ) y_speach_boosted = speech_booster(y) return y_speach_boosted # https://github.com/dodiku/noise_reduction/blob/master/noise.py def reduce_noise_mfcc_up(y, sr=16000): try: from pysndfx import AudioEffectsChain import python_speech_features except Exception as e: raise ModuleNotFoundError( 'pysndfx, python_speech_features not installed. Please install it by `pip install pysndfx python_speech_features` and try again.' ) y = int_to_float(y) hop_length = 512 mfcc = python_speech_features.base.logfbank(y) mfcc = python_speech_features.base.lifter(mfcc) sum_of_squares = [] index = -1 for r in mfcc: sum_of_squares.append(0) index = index + 1 for n in r: sum_of_squares[index] = sum_of_squares[index] + n ** 2 strongest_frame = sum_of_squares.index(max(sum_of_squares)) hz = python_speech_features.base.mel2hz(mfcc[strongest_frame]) max_hz = max(hz) min_hz = min(hz) speech_booster = AudioEffectsChain().lowshelf( frequency=min_hz * (-1), gain=12.0, slope=0.5 ) y_speach_boosted = speech_booster(y) return y_speach_boosted def trim_silence( y, top_db=20, frame_length=2, hop_length=500, return_trimmed_length=False, ): y = int_to_float(y) y_trimmed, index = librosa.effects.trim( y, top_db=top_db, frame_length=frame_length, hop_length=hop_length ) trimmed_length = librosa.get_duration(y) - librosa.get_duration(y_trimmed) if return_trimmed_length: return y_trimmed, trimmed_length else: return y_trimmed