Source code for malaya_speech.extra.rttm

from herpetologist import check_type
from malaya_speech.model.annotation import Annotation
from malaya_speech.model.frame import Segment


[docs]@check_type def load(file: str): """ Load RTTM file. Parameters ---------- file: str Returns ------- result : Dict[str, malaya_speech.model.annotation.Annotation] """ try: import pandas as pd except BaseException: raise ValueError( 'pandas not installed. Please install it by `pip install pandas` and try again.' ) names = [ 'NA1', 'uri', 'NA2', 'start', 'duration', 'NA3', 'NA4', 'speaker', 'NA5', 'NA6', ] dtype = {'uri': str, 'start': float, 'duration': float, 'speaker': str} data = pd.read_csv( file, names=names, dtype=dtype, delim_whitespace=True, keep_default_na=False, ) annotations = {} for uri, turns in data.groupby('uri'): annotation = Annotation(uri) for i, turn in turns.iterrows(): segment = Segment(turn['start'], turn['start'] + turn['duration']) annotation[segment, i] = turn['speaker'] annotations[uri] = annotation return annotations