Source code for malaya_speech.force_alignment

from malaya_speech.supervised import stt
from malaya_speech.stt import _ctc_availability, _huggingface_availability
from malaya_speech.utils import describe_availability
from herpetologist import check_type

_availability = {
    'conformer-transducer': {
        'Size (MB)': 120,
        'Quantized Size (MB)': 32.3,
        'Language': ['malay'],
    },
    'conformer-transducer-mixed': {
        'Size (MB)': 120,
        'Quantized Size (MB)': 32.3,
        'Language': ['malay', 'singlish'],
    },
    'conformer-transducer-singlish': {
        'Size (MB)': 120,
        'Quantized Size (MB)': 32.3,
        'Language': ['singlish'],
    },
}


[docs]def available_transducer(): """ List available Encoder-Transducer Aligner models. """ return describe_availability(_availability)
[docs]def available_ctc(): """ List available Encoder-CTC Aligner models. """ return describe_availability(_ctc_availability)
[docs]def available_huggingface(): """ List available HuggingFace Malaya-Speech Aligner models. """ return describe_availability(_huggingface_availability)
[docs]@check_type def deep_transducer( model: str = 'conformer-transducer', quantized: bool = False, **kwargs ): """ Load Encoder-Transducer Aligner model. Parameters ---------- model : str, optional (default='conformer-transducer') Check available models at `malaya_speech.force_alignment.available_aligner()`. quantized : bool, optional (default=False) if True, will load 8-bit quantized model. Quantized model not necessary faster, totally depends on the machine. Returns ------- result : malaya_speech.model.transducer.TransducerAligner class """ model = model.lower() if model not in _availability: raise ValueError( 'model not supported, please check supported models from `malaya_speech.force_alignment.available_aligner()`.' ) return stt.transducer_load( model=model, module='force-alignment', languages=_availability[model]['Language'], quantized=quantized, stt=False, **kwargs )
[docs]@check_type def deep_ctc( model: str = 'hubert-conformer', quantized: bool = False, **kwargs ): """ Load Encoder-CTC ASR model. Parameters ---------- model : str, optional (default='hubert-conformer') Check available models at `malaya_speech.stt.available_ctc()`. quantized : bool, optional (default=False) if True, will load 8-bit quantized model. Quantized model not necessary faster, totally depends on the machine. Returns ------- result : malaya_speech.model.wav2vec.Wav2Vec2_Aligner class """ model = model.lower() if model not in _ctc_availability: raise ValueError( 'model not supported, please check supported models from `malaya_speech.stt.available_ctc()`.' ) return stt.wav2vec2_ctc_load( model=model, module='speech-to-text-ctc-v2', quantized=quantized, mode=_ctc_availability[model], stt=False, **kwargs )
[docs]@check_type def huggingface(model: str = 'mesolitica/wav2vec2-xls-r-300m-mixed'): """ Load Finetuned models from HuggingFace. Parameters ---------- model : str, optional (default='mesolitica/wav2vec2-xls-r-300m-mixed') Check available models at `malaya_speech.stt.available_huggingface()`. Returns ------- result : malaya_speech.model.huggingface.CTC class """ model = model.lower() if model not in _huggingface_availability: raise ValueError( 'model not supported, please check supported models from `malaya_speech.stt.available_huggingface()`.' ) return stt.huggingface_load(model=model, stt=False)