Load RTTM#

[1]:
# !wget https://raw.githubusercontent.com/pyannote/pyannote-audio/develop/tutorials/data_preparation/AMI/MixHeadset.test.rttm

This tutorial is available as an IPython notebook at malaya-speech/example/load-rrtm.

[2]:
%matplotlib inline

import malaya_speech

Load .rttm#

def load(file: str):
    """
    Load RTTM file.

    Parameters
    ----------
    file: str

    Returns
    -------
    result : Dict[str, malaya_speech.model.annotation.Annotation]
    """
[3]:
annotations = malaya_speech.extra.rttm.load('example/load-rttm/MixHeadset.test.rttm')

Get available samples#

[4]:
annotations.keys()
[4]:
dict_keys(['EN2002a.Mix-Headset', 'EN2002b.Mix-Headset', 'EN2002c.Mix-Headset', 'EN2002d.Mix-Headset', 'ES2004a.Mix-Headset', 'ES2004b.Mix-Headset', 'ES2004c.Mix-Headset', 'ES2004d.Mix-Headset', 'ES2014a.Mix-Headset', 'ES2014b.Mix-Headset', 'ES2014c.Mix-Headset', 'ES2014d.Mix-Headset', 'IS1009a.Mix-Headset', 'IS1009b.Mix-Headset', 'IS1009c.Mix-Headset', 'IS1009d.Mix-Headset', 'TS3003a.Mix-Headset', 'TS3003b.Mix-Headset', 'TS3003c.Mix-Headset', 'TS3003d.Mix-Headset', 'TS3007a.Mix-Headset', 'TS3007b.Mix-Headset', 'TS3007c.Mix-Headset', 'TS3007d.Mix-Headset'])

Get a sample#

[5]:
sample = annotations['ES2004a.Mix-Headset']
sample
[5]:
<malaya_speech.model.annotation.Annotation at 0x149711810>

Get minimum frame#

[6]:
sample.min
[6]:
0.0

Get maximum frame#

[7]:
sample.max
[7]:
1048.672

Get tracks#

[8]:
for segment, track, label in sample.itertracks():
    print(segment, track, label)
<Segment(0, 1.575)> 5531 FEE013
<Segment(0, 1.792)> 5530 MEO015
<Segment(10.944, 14.737)> 5532 FEE013
<Segment(17.619, 18.391)> 5533 MEO015
<Segment(18.722, 20.32)> 5534 FEE013
<Segment(22.352, 23.872)> 5535 FEE013
<Segment(24.992, 27.024)> 5536 FEE016
<Segment(25.163, 25.98)> 5537 FEE013
<Segment(28.56, 30.59)> 5538 FEE016
<Segment(29.098, 32.297)> 5539 FEE013
<Segment(31.28, 33.48)> 5540 FEE016
<Segment(34.112, 34.764)> 5541 FEE013
<Segment(42.017, 43.386)> 5542 FEE016
<Segment(44.48, 47.136)> 5543 FEE013
<Segment(49.613, 53.228)> 5544 FEE016
<Segment(63.063, 66.416)> 5545 MEO015
<Segment(71.087, 72.624)> 5546 MEO015
<Segment(80.272, 94.743)> 5547 FEE013
<Segment(80.443, 81.985)> 5548 MEO015
<Segment(83.2, 84.332)> 5549 MEO015
<Segment(95.664, 115.952)> 5550 FEE013
<Segment(117.6, 161.04)> 5551 FEE013
<Segment(162.288, 167.904)> 5552 FEE013
<Segment(168.768, 169.04)> 5553 MEO015
<Segment(170.288, 186.128)> 5554 FEE013
<Segment(180.246, 181.722)> 5555 MEO015
<Segment(180.288, 180.8)> 5556 FEE016
<Segment(187.957, 188.461)> 5557 MEO015
<Segment(188.577, 192.976)> 5558 FEE013
<Segment(211.44, 212.016)> 5559 FEE013
<Segment(243.2, 244.432)> 5560 FEE013
<Segment(246.912, 250.64)> 5561 FEE013
<Segment(252.157, 252.833)> 5562 MEO015
<Segment(254.512, 257.44)> 5563 FEE013
<Segment(255.725, 256.426)> 5564 MEO015
<Segment(257.875, 258.688)> 5565 MEO015
<Segment(259.872, 262.388)> 5566 FEE013
<Segment(266.16, 267.072)> 5567 FEE013
<Segment(268.688, 269.216)> 5568 FEE013
<Segment(272.208, 275.84)> 5569 FEE013
<Segment(274.992, 275.6)> 5570 MEO015
<Segment(275.76, 276.624)> 5571 FEE016
<Segment(277.312, 279.795)> 5572 MEO015
<Segment(284.816, 287.36)> 5573 MEO015
<Segment(287.482, 290.368)> 5574 FEE013
<Segment(289.072, 296.176)> 5575 MEO015
<Segment(306.208, 307.248)> 5576 FEE016
<Segment(307.225, 308.062)> 5577 FEE013
<Segment(308.144, 310.262)> 5578 MEO015
<Segment(312.984, 322.112)> 5579 MEO015
<Segment(315.593, 316.535)> 5580 FEE016
<Segment(315.696, 317.696)> 5581 MEE014
<Segment(317.872, 318.896)> 5582 FEE016
<Segment(323.904, 352.368)> 5583 MEO015
<Segment(344.616, 345.49)> 5584 FEE016
<Segment(349.968, 351.33)> 5585 FEE013
<Segment(353.461, 355.754)> 5586 MEE014
<Segment(353.873, 354.929)> 5587 MEO015
<Segment(356.083, 357.213)> 5588 MEO015
<Segment(356.257, 362.224)> 5589 FEE016
<Segment(356.773, 357.433)> 5590 MEE014
<Segment(358.816, 360.912)> 5591 FEE013
<Segment(359.088, 367.697)> 5592 MEE014
<Segment(364.336, 368.077)> 5593 MEO015
<Segment(364.608, 366.423)> 5594 FEE013
<Segment(371.376, 375.525)> 5595 MEE014
<Segment(373.52, 374.626)> 5596 FEE016
<Segment(373.576, 376.864)> 5597 FEE013
<Segment(374.885, 375.843)> 5598 MEO015
<Segment(376.784, 379.584)> 5599 FEE016
<Segment(377.914, 379.452)> 5600 FEE013
<Segment(378.48, 384.768)> 5601 MEE014
<Segment(382.048, 383.223)> 5602 FEE013
<Segment(390.412, 406.304)> 5603 MEE014
<Segment(390.818, 391.776)> 5604 FEE016
<Segment(393.968, 395.001)> 5605 FEE013
<Segment(396.063, 400.142)> 5606 FEE013
<Segment(397.008, 398.565)> 5607 FEE016
<Segment(397.65, 399.178)> 5608 MEO015
<Segment(403.097, 403.707)> 5609 FEE016
<Segment(405.28, 407.376)> 5610 FEE016
<Segment(405.306, 407.104)> 5611 FEE013
<Segment(416.688, 417.376)> 5612 FEE013
<Segment(420.08, 420.992)> 5613 FEE013
<Segment(421.182, 422.34)> 5614 FEE016
<Segment(421.184, 426.592)> 5615 MEE014
<Segment(428.096, 430.32)> 5616 MEE014
<Segment(432.464, 434.447)> 5617 MEE014
<Segment(438.344, 442.4)> 5618 MEE014
<Segment(439.803, 441.808)> 5619 FEE013
<Segment(440.149, 442.369)> 5620 FEE016
<Segment(443.136, 450.928)> 5621 FEE016
<Segment(445.082, 446.682)> 5622 MEE014
<Segment(450.144, 451.744)> 5623 MEE014
<Segment(453.968, 457.059)> 5624 FEE016
<Segment(456.505, 456.992)> 5625 MEE014
<Segment(456.912, 458.633)> 5626 FEE013
<Segment(457.872, 460.64)> 5627 FEE016
<Segment(461.072, 462.192)> 5628 FEE013
<Segment(465.28, 466.028)> 5629 MEE014
<Segment(465.36, 470.88)> 5630 FEE016
<Segment(467.92, 468.768)> 5631 MEE014
<Segment(474.384, 479.472)> 5632 FEE016
<Segment(476.672, 477.792)> 5633 MEE014
<Segment(481.018, 484.848)> 5634 FEE016
<Segment(481.248, 481.719)> 5635 FEE013
<Segment(484.274, 484.884)> 5636 MEE014
<Segment(486.928, 487.747)> 5637 FEE016
<Segment(487.408, 487.92)> 5638 MEE014
<Segment(488.752, 491.648)> 5639 FEE016
<Segment(488.812, 491.385)> 5640 MEE014
<Segment(489.66, 491.405)> 5641 FEE013
<Segment(492.88, 518.752)> 5642 FEE016
<Segment(497.126, 500.592)> 5643 MEE014
<Segment(500.94, 505.375)> 5644 FEE013
<Segment(501.616, 502.854)> 5645 MEO015
<Segment(501.744, 503.474)> 5646 MEE014
<Segment(519.744, 521.013)> 5647 MEE014
<Segment(520.256, 536.8)> 5648 FEE016
<Segment(524.131, 526)> 5649 FEE013
<Segment(524.288, 529.024)> 5650 MEE014
<Segment(527.237, 529.002)> 5651 FEE013
<Segment(541.12, 549.664)> 5652 FEE016
<Segment(545.078, 545.68)> 5653 FEE013
<Segment(547.101, 547.952)> 5654 FEE013
<Segment(553.476, 556.832)> 5655 FEE016
<Segment(554.412, 555.402)> 5656 FEE013
<Segment(558.112, 559.777)> 5657 FEE013
<Segment(558.528, 559.51)> 5658 FEE016
<Segment(559.504, 559.872)> 5659 MEE014
<Segment(561.055, 561.632)> 5660 FEE013
<Segment(563.104, 575.536)> 5661 FEE013
<Segment(574.6, 577.751)> 5662 MEE014
<Segment(576.323, 577.554)> 5663 FEE016
<Segment(576.454, 581.547)> 5664 MEO015
<Segment(580.784, 587.648)> 5665 MEE014
<Segment(582.538, 583.04)> 5666 MEO015
<Segment(583.728, 588.016)> 5667 FEE013
<Segment(584.698, 586.17)> 5668 FEE016
<Segment(588.24, 592.368)> 5669 FEE016
<Segment(590.677, 591.37)> 5670 MEE014
<Segment(590.755, 595.995)> 5671 FEE013
<Segment(593.792, 594.517)> 5672 FEE016
<Segment(594.038, 608.506)> 5673 MEE014
<Segment(603.488, 604.316)> 5674 FEE016
<Segment(603.61, 604.256)> 5675 FEE013
<Segment(605.824, 606.223)> 5676 FEE016
<Segment(606.492, 607.072)> 5677 FEE013
<Segment(608.016, 614.106)> 5678 FEE016
<Segment(608.062, 609.291)> 5679 FEE013
<Segment(613.489, 617.856)> 5680 FEE013
<Segment(614.016, 615.056)> 5681 MEE014
<Segment(617.335, 619.75)> 5682 MEO015
<Segment(619.072, 629.44)> 5683 FEE013
<Segment(621.2, 622.416)> 5684 MEE014
<Segment(622, 622.416)> 5685 FEE016
<Segment(623.824, 624.288)> 5686 FEE016
<Segment(630.352, 631.248)> 5687 FEE016
<Segment(630.896, 637.195)> 5688 FEE013
<Segment(635.408, 635.936)> 5689 FEE016
<Segment(636.16, 637.104)> 5690 MEO015
<Segment(638, 651.52)> 5691 FEE016
<Segment(645.152, 645.632)> 5692 FEE013
<Segment(648.883, 650.357)> 5693 FEE013
<Segment(653.072, 655.552)> 5694 FEE016
<Segment(653.549, 670.376)> 5695 FEE013
<Segment(661.776, 662.272)> 5696 FEE016
<Segment(666.576, 666.976)> 5697 FEE016
<Segment(675.184, 704.832)> 5698 FEE013
<Segment(687.149, 687.873)> 5699 MEO015
<Segment(687.803, 688.544)> 5700 MEE014
<Segment(690.978, 692.231)> 5701 MEE014
<Segment(690.982, 692.467)> 5702 MEO015
<Segment(691.104, 692.011)> 5703 FEE016
<Segment(700.641, 701.952)> 5704 MEE014
<Segment(700.979, 701.878)> 5705 FEE016
<Segment(704.937, 707.28)> 5706 MEE014
<Segment(705.781, 708.976)> 5707 FEE013
<Segment(707.968, 708.672)> 5708 FEE016
<Segment(708.486, 711.04)> 5709 MEO015
<Segment(711.067, 713.328)> 5710 MEE014
<Segment(711.856, 736.096)> 5711 FEE016
<Segment(712.272, 712.768)> 5712 MEO015
<Segment(727.648, 728.209)> 5713 FEE013
<Segment(734.731, 751.93)> 5714 FEE013
<Segment(737.248, 737.888)> 5715 FEE016
<Segment(746.352, 746.876)> 5716 FEE016
<Segment(747.488, 748.36)> 5717 MEE014
<Segment(749.639, 755.328)> 5718 FEE016
<Segment(752.4, 752.986)> 5719 FEE013
<Segment(754.864, 755.952)> 5720 FEE013
<Segment(754.938, 758.268)> 5721 MEO015
<Segment(757.408, 779.84)> 5722 FEE013
<Segment(768.48, 769.024)> 5723 FEE016
<Segment(772.304, 772.72)> 5724 FEE016
<Segment(775.552, 775.872)> 5725 FEE016
<Segment(777.296, 777.736)> 5726 FEE016
<Segment(777.776, 778.721)> 5727 MEE014
<Segment(778.928, 794.512)> 5728 FEE016
<Segment(784.96, 785.387)> 5729 MEO015
<Segment(787.056, 787.87)> 5730 MEO015
<Segment(791.899, 793.054)> 5731 MEE014
<Segment(792.661, 793.601)> 5732 MEO015
<Segment(794.432, 812.064)> 5733 MEO015
<Segment(803.776, 805.952)> 5734 FEE016
<Segment(811.072, 811.856)> 5735 FEE016
<Segment(811.44, 815.12)> 5736 FEE013
<Segment(815.216, 819.161)> 5737 MEO015
<Segment(816.128, 823.872)> 5738 FEE013
<Segment(819.832, 821.056)> 5739 MEE014
<Segment(821.333, 828.176)> 5740 FEE016
<Segment(825.949, 826.456)> 5741 MEO015
<Segment(828.096, 831.516)> 5742 FEE013
<Segment(829.265, 834.544)> 5743 MEE014
<Segment(830.032, 830.747)> 5744 FEE016
<Segment(833.2, 834.052)> 5745 FEE016
<Segment(834.608, 844.688)> 5746 FEE016
<Segment(838.557, 839.344)> 5747 MEE014
<Segment(840.952, 843.364)> 5748 FEE013
<Segment(843.392, 849.648)> 5749 MEE014
<Segment(850.96, 862.224)> 5750 MEE014
<Segment(862.288, 862.8)> 5751 FEE013
<Segment(863.168, 863.584)> 5752 MEO015
<Segment(864.07, 865.885)> 5753 MEE014
<Segment(864.208, 871.84)> 5754 FEE013
<Segment(870.16, 870.864)> 5755 MEE014
<Segment(872.176, 872.64)> 5756 FEE016
<Segment(872.928, 883.28)> 5757 FEE013
<Segment(878.336, 878.922)> 5758 FEE016
<Segment(884.288, 885.2)> 5759 FEE013
<Segment(884.976, 899.344)> 5760 MEE014
<Segment(885.286, 885.89)> 5761 FEE016
<Segment(893.28, 894.581)> 5762 FEE013
<Segment(895.376, 895.979)> 5763 FEE016
<Segment(898.688, 912.407)> 5764 FEE016
<Segment(905.221, 905.764)> 5765 MEO015
<Segment(905.487, 907.78)> 5766 FEE013
<Segment(911.84, 912.384)> 5767 FEE013
<Segment(913.516, 935.076)> 5768 FEE013
<Segment(923.632, 924.08)> 5769 FEE016
<Segment(930.704, 931.532)> 5770 FEE016
<Segment(932.96, 933.408)> 5771 FEE016
<Segment(934.332, 947.76)> 5772 FEE016
<Segment(945.413, 946.031)> 5773 FEE013
<Segment(946, 946.744)> 5774 MEO015
<Segment(948.278, 949.177)> 5775 MEO015
<Segment(948.464, 953.165)> 5776 FEE013
<Segment(949.62, 950.52)> 5777 MEE014
<Segment(950.694, 959.392)> 5778 MEO015
<Segment(951.04, 951.44)> 5779 FEE016
<Segment(954.896, 955.41)> 5780 FEE016
<Segment(956, 958.152)> 5781 FEE013
<Segment(956.752, 958.64)> 5782 FEE016
<Segment(957.152, 957.856)> 5783 MEE014
<Segment(959.456, 960.373)> 5784 FEE013
<Segment(959.461, 972.975)> 5785 FEE016
<Segment(965.056, 965.6)> 5786 FEE013
<Segment(966.192, 966.731)> 5787 MEO015
<Segment(968.816, 969.264)> 5788 FEE013
<Segment(970.384, 971.79)> 5789 MEE014
<Segment(970.976, 980.032)> 5790 FEE013
<Segment(981.049, 989.552)> 5791 FEE013
<Segment(984.944, 1014.85)> 5792 MEE014
<Segment(990.987, 991.663)> 5793 FEE013
<Segment(1004.98, 1006.02)> 5794 FEE016
<Segment(1005.52, 1006.43)> 5795 MEO015
<Segment(1005.62, 1006.5)> 5796 FEE013
<Segment(1006.66, 1007.57)> 5797 FEE016
<Segment(1008.78, 1011.34)> 5798 FEE016
<Segment(1013.68, 1024.51)> 5799 FEE016
<Segment(1018.34, 1019.57)> 5800 MEE014
<Segment(1019.96, 1020.63)> 5801 FEE013
<Segment(1021.92, 1023.54)> 5802 MEE014
<Segment(1022.3, 1024.03)> 5803 FEE013
<Segment(1025.02, 1041.62)> 5804 FEE013
<Segment(1032.9, 1033.5)> 5805 FEE016
<Segment(1037.07, 1038.08)> 5806 FEE016
<Segment(1042, 1042.82)> 5807 FEE016
<Segment(1042.33, 1044.21)> 5808 FEE013
<Segment(1042.33, 1043.63)> 5809 MEE014
<Segment(1046.4, 1048.58)> 5810 FEE016
<Segment(1047.49, 1049.21)> 5811 FEE013
<Segment(1048.02, 1048.67)> 5812 MEO015

So we able to know which speakers for which segments.

Crop sample by time#

def crop(
    self,
    from_t: float = None,
    to_t: float = None,
    mode: str = 'intersection',
):

    """
    Crop sample by time.

    Parameters
    ----------
    from_t: float, optional (default=None)
        if None, will take self.min
    to_t: float, optional (default=None)
        if None, will take self.max
    mode: str, optional (default='intersection')
        crop mode supported. Allowed values:

        * ``'intersection'`` - sampling with crop if middle of the track.
        * ``'strict'`` - sampling with strictly method, will not crop the track.
        * ``'loose'`` - sampling with loose method, will take entire track.

    Returns
    -------
    result : malaya_speech.model.annotation.ANNOTATION class
    """

Visualize tracks#

[9]:
a = sample.crop(600, 660, mode = 'strict')
a.plot()
[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1488a0bd0>
_images/load-load-rttm_19_1.png
[10]:
a = sample.crop(600, 660, mode = 'loose')
a.plot()
[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x1489eb350>
_images/load-load-rttm_20_1.png
[18]:
a = sample.crop(600, 660, mode = 'intersection')
a.plot()
_images/load-load-rttm_21_0.png
[ ]: