Source code for malaya_speech.utils.combine
import numpy as np
from typing import List
from malaya_speech.utils.split import group_frames, group_frames_threshold
[docs]def without_silent(
frames, threshold_to_stop: float = 0.1, silent_trail: int = 500
):
"""
Group multiple frames based on label and threshold to stop.
Parameters
----------
frames: List[Tuple[Frame, label]]
Output from VAD.
threshold_to_stop: float, optional (default = 0.1)
If `threshold_to_stop` is 0.1, means that, length same label samples must at least 0.1 second.
silent_trail: int, optional (default = 500)
if detected a silent, will append first N frames and last N frames.
Returns
-------
result : np.array
"""
grouped = group_frames(frames)
grouped = group_frames_threshold(grouped, threshold_to_stop)
r = []
for g in grouped:
if g[1]:
g = g[0].array
else:
g = np.concatenate(
[g[0].array[:silent_trail], g[0].array[-silent_trail:]]
)
r.append(g)
audio = np.concatenate(r)
return audio