Source code for malaya_speech.utils.generator

from malaya_speech.model.frame import Frame
from herpetologist import check_type
from typing import List


[docs]@check_type def frames( audio, frame_duration_ms: int = 30, sample_rate: int = 16000, append_ending_trail: bool = True, ): """ Generates audio frames from audio. Takes the desired frame duration in milliseconds, the audio, and the sample rate. Parameters ---------- audio: np.array frame_duration_ms: int, optional (default=30) sample_rate: int, optional (default=16000) append_ending_trail: bool, optional (default=True) if True, will append last trail and this last trail might not same length as `frame_duration_ms`. Returns ------- result: List[malaya_speech.model.frame.Frame] """ n = int(sample_rate * (frame_duration_ms / 1000.0)) offset = 0 timestamp = 0.0 duration = float(n) / sample_rate results = [] while offset + n <= len(audio): results.append(Frame(audio[offset: offset + n], timestamp, duration)) timestamp += duration offset += n if append_ending_trail and offset < len(audio): results.append( Frame( audio[offset:], timestamp, len(audio) / sample_rate - timestamp ) ) return results
[docs]def mel_sampling( audio, frame_duration_ms=1200, overlap_ms=200, sample_rate=16000 ): """ Generates audio frames from audio. This is for melspectrogram generative model. Takes the desired frame duration in milliseconds, the audio, and the sample rate. Parameters ---------- audio: np.array frame_duration_ms: int, optional (default=1200) overlap_ms: int, optional (default=200) sample_rate: int, optional (default=16000) Returns ------- result: List[np.array] """ n = int(sample_rate * (frame_duration_ms / 1000.0)) n_overlap = int(sample_rate * (overlap_ms / 1000.0)) offset = 0 results = [] while offset + n <= len(audio): results.append(audio[offset: offset + n]) offset += n - n_overlap if offset < len(audio): results.append(audio[offset:]) return results
[docs]def combine_mel_sampling( samples, overlap_ms=200, sample_rate=16000, padded_ms=50 ): """ To combine results from `mel_sampling`, output from melspectrogram generative model. Parameters ---------- samples: List[np.array] overlap_ms: int, optional (default=200) sample_rate: int, optional (default=16000) Returns ------- result: List[np.array] """ n_overlap = int(sample_rate * (overlap_ms / 1000.0)) n_padded = int(sample_rate * (padded_ms / 1000.0)) results = [] for no, sample in enumerate(samples): if no: sample = sample[n_overlap - n_padded:] results.append(sample[:-n_padded]) return results