Source code for malaya_speech.utils.split

from malaya_speech.model.frame import Frame
from malaya_speech.utils.group import (
    combine_frames,
    group_frames,
    group_frames_threshold,
)
import numpy as np


[docs]def split_vad(frames, n: int = 3, negative_threshold: float = 0.1):
    """
    Split a sample into multiple samples based `n` size of negative VAD.

    Parameters
    ----------
    frames: List[Tuple[Frame, label]]
    n: int, optional (default=3)
        `n` size of negative VAD to assume in one subsample.
    negative_threshold: float, optional (default = 0.1)
        If `negative_threshold` is 0.1, means that, length negative samples must at least 0.1 second.

    Returns
    -------
    result : List[Frame]
    """
    grouped = group_frames(frames)
    grouped = group_frames_threshold(
        grouped, threshold_to_stop=negative_threshold
    )
    results, temp, not_activities = [], [], 0
    for no, g in enumerate(grouped):
        a = g[0]
        if not g[1]:
            not_activities += 1
        temp.append(a)
        if not_activities >= n:
            results.append(combine_frames(temp))
            temp = [g[0]]
            not_activities = 0

    if len(temp):
        results.append(combine_frames(temp))
    return results


[docs]def split_vad_duration(
    frames,
    max_duration: float = 5.0,
    negative_threshold: float = 0.1,
):
    """
    Split a sample into multiple samples based maximum duration of voice activities.

    Parameters
    ----------
    frames: List[Tuple[Frame, label]]
    max_duration: float, optional (default = 5.0)
        Maximum duration to assume one sample combined from voice activities.
    negative_threshold: float, optional (default = 0.1)
        If `negative_threshold` is 0.1, means that, length negative samples must at least 0.1 second.

    Returns
    -------
    result : List[Frame]
    """
    grouped = group_frames(frames)
    grouped = group_frames_threshold(
        grouped, threshold_to_stop=negative_threshold
    )
    results, temp, lengths = [], [], 0
    for no, g in enumerate(grouped):
        a = g[0]
        l = a.duration
        lengths += l
        temp.append(a)
        if lengths >= max_duration:
            results.append(combine_frames(temp))
            temp = []
            lengths = 0

    if len(temp):
        results.append(combine_frames(temp))
    return results