{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Speech-to-Text RNNT" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Encoder model + RNNT loss" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This tutorial is available as an IPython notebook at [malaya-speech/example/stt-transducer-model](https://github.com/huseinzol05/malaya-speech/tree/master/example/stt-transducer-model).\n", " \n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "\n", "This module is not language independent, so it not save to use on different languages. Pretrained models trained on hyperlocal languages.\n", " \n", "
" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ['CUDA_VISIBLE_DEVICES'] = ''" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "`pyaudio` is not available, `malaya_speech.streaming.stream` is not able to use.\n" ] } ], "source": [ "import malaya_speech\n", "import numpy as np\n", "from malaya_speech import Pipeline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import logging\n", "\n", "logging.basicConfig(level=logging.INFO)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List available RNNT model" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:malaya_speech.stt:for `malay-fleur102` language, tested on FLEURS102 `ms_my` test set, https://github.com/huseinzol05/malaya-speech/tree/master/pretrained-model/prepare-stt\n", "INFO:malaya_speech.stt:for `malay-malaya` language, tested on malaya-speech test set, https://github.com/huseinzol05/malaya-speech/tree/master/pretrained-model/prepare-stt\n", "INFO:malaya_speech.stt:for `singlish` language, tested on IMDA malaya-speech test set, https://github.com/huseinzol05/malaya-speech/tree/master/pretrained-model/prepare-stt\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Size (MB)Quantized Size (MB)malay-malayamalay-fleur102Languagesinglish
tiny-conformer24.49.14{'WER': 0.2128108, 'CER': 0.08136871, 'WER-LM'...{'WER': 0.2682816, 'CER': 0.13052725, 'WER-LM'...[malay]NaN
small-conformer49.218.1{'WER': 0.19853302, 'CER': 0.07449528, 'WER-LM...{'WER': 0.23412149, 'CER': 0.1138314813, 'WER-...[malay]NaN
conformer12537.1{'WER': 0.16340855635999124, 'CER': 0.05897205...{'WER': 0.20090442596, 'CER': 0.09616901, 'WER...[malay]NaN
large-conformer404107{'WER': 0.1566839, 'CER': 0.0619715, 'WER-LM':...{'WER': 0.1711028238, 'CER': 0.077953559, 'WER...[malay]NaN
conformer-stack-2mixed13038.5{'WER': 0.1889883954, 'CER': 0.0726845531, 'WE...{'WER': 0.244836948, 'CER': 0.117409327, 'WER-...[malay, singlish]{'WER': 0.08535878149, 'CER': 0.0452357273822,...
small-conformer-singlish49.218.1NaNNaN[singlish]{'WER': 0.087831, 'CER': 0.0456859, 'WER-LM': ...
conformer-singlish12537.1NaNNaN[singlish]{'WER': 0.07779246, 'CER': 0.0403616, 'WER-LM'...
large-conformer-singlish404107NaNNaN[singlish]{'WER': 0.07014733, 'CER': 0.03587201, 'WER-LM...
\n", "
" ], "text/plain": [ " Size (MB) Quantized Size (MB) \\\n", "tiny-conformer 24.4 9.14 \n", "small-conformer 49.2 18.1 \n", "conformer 125 37.1 \n", "large-conformer 404 107 \n", "conformer-stack-2mixed 130 38.5 \n", "small-conformer-singlish 49.2 18.1 \n", "conformer-singlish 125 37.1 \n", "large-conformer-singlish 404 107 \n", "\n", " malay-malaya \\\n", "tiny-conformer {'WER': 0.2128108, 'CER': 0.08136871, 'WER-LM'... \n", "small-conformer {'WER': 0.19853302, 'CER': 0.07449528, 'WER-LM... \n", "conformer {'WER': 0.16340855635999124, 'CER': 0.05897205... \n", "large-conformer {'WER': 0.1566839, 'CER': 0.0619715, 'WER-LM':... \n", "conformer-stack-2mixed {'WER': 0.1889883954, 'CER': 0.0726845531, 'WE... \n", "small-conformer-singlish NaN \n", "conformer-singlish NaN \n", "large-conformer-singlish NaN \n", "\n", " malay-fleur102 \\\n", "tiny-conformer {'WER': 0.2682816, 'CER': 0.13052725, 'WER-LM'... \n", "small-conformer {'WER': 0.23412149, 'CER': 0.1138314813, 'WER-... \n", "conformer {'WER': 0.20090442596, 'CER': 0.09616901, 'WER... \n", "large-conformer {'WER': 0.1711028238, 'CER': 0.077953559, 'WER... \n", "conformer-stack-2mixed {'WER': 0.244836948, 'CER': 0.117409327, 'WER-... \n", "small-conformer-singlish NaN \n", "conformer-singlish NaN \n", "large-conformer-singlish NaN \n", "\n", " Language \\\n", "tiny-conformer [malay] \n", "small-conformer [malay] \n", "conformer [malay] \n", "large-conformer [malay] \n", "conformer-stack-2mixed [malay, singlish] \n", "small-conformer-singlish [singlish] \n", "conformer-singlish [singlish] \n", "large-conformer-singlish [singlish] \n", "\n", " singlish \n", "tiny-conformer NaN \n", "small-conformer NaN \n", "conformer NaN \n", "large-conformer NaN \n", "conformer-stack-2mixed {'WER': 0.08535878149, 'CER': 0.0452357273822,... \n", "small-conformer-singlish {'WER': 0.087831, 'CER': 0.0456859, 'WER-LM': ... \n", "conformer-singlish {'WER': 0.07779246, 'CER': 0.0403616, 'WER-LM'... \n", "large-conformer-singlish {'WER': 0.07014733, 'CER': 0.03587201, 'WER-LM... " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "malaya_speech.stt.transducer.available_transformer()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'malay-malaya': {'WER': 0.16477548774, 'CER': 0.05973209121},\n", " 'malay-fleur102': {'WER': 0.109588779, 'CER': 0.047891527},\n", " 'singlish': {'WER': 0.4941349, 'CER': 0.3026296}}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "malaya_speech.stt.google_accuracy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load RNNT model\n", "\n", "```python\n", "def transformer(\n", " model: str = 'conformer',\n", " quantized: bool = False,\n", " **kwargs,\n", "):\n", " \"\"\"\n", " Load Encoder-Transducer ASR model.\n", "\n", " Parameters\n", " ----------\n", " model : str, optional (default='conformer')\n", " Check available models at `malaya_speech.stt.transducer.available_transformer()`.\n", " quantized : bool, optional (default=False)\n", " if True, will load 8-bit quantized model.\n", " Quantized model not necessary faster, totally depends on the machine.\n", "\n", " Returns\n", " -------\n", " result : malaya_speech.model.transducer.Transducer class\n", " \"\"\"\n", "```" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-02-01 11:53:39.010188: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-01 11:53:39.015470: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n", "2023-02-01 11:53:39.015489: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: husein-MS-7D31\n", "2023-02-01 11:53:39.015496: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: husein-MS-7D31\n", "2023-02-01 11:53:39.015568: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: Not found: was unable to find libcuda.so DSO loaded into this program\n", "2023-02-01 11:53:39.015587: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.161.3\n" ] } ], "source": [ "small_model = malaya_speech.stt.transducer.transformer(model = 'small-conformer')\n", "model = malaya_speech.stt.transducer.transformer(model = 'conformer')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Quantized deep model\n", "\n", "To load 8-bit quantized model, simply pass `quantized = True`, default is `False`.\n", "\n", "We can expect slightly accuracy drop from quantized model, and not necessary faster than normal 32-bit float model, totally depends on machine." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "quantized_small_model = malaya_speech.stt.transducer.transformer(model = 'small-conformer', quantized = True)\n", "quantized_model = malaya_speech.stt.transducer.transformer(model = 'conformer', quantized = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load sample" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "ceramah, sr = malaya_speech.load('speech/khutbah/wadi-annuar.wav')\n", "record1, sr = malaya_speech.load('speech/record/savewav_2020-11-26_22-36-06_294832.wav')\n", "record2, sr = malaya_speech.load('speech/record/savewav_2020-11-26_22-40-56_929661.wav')\n", "shafiqah_idayu, sr = malaya_speech.load('speech/example-speaker/shafiqah-idayu.wav')\n", "mas_aisyah, sr = malaya_speech.load('speech/example-speaker/mas-aisyah.wav')\n", "khalil, sr = malaya_speech.load('speech/example-speaker/khalil-nooh.wav')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import IPython.display as ipd\n", "\n", "ipd.Audio(ceramah, rate = sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As we can hear, the speaker speaks in kedahan dialects plus some arabic words, let see how good our model is." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(record1, rate = sr)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(record2, rate = sr)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(shafiqah_idayu, rate = sr)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(mas_aisyah, rate = sr)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(khalil, rate = sr)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Predict using greedy decoder\n", "\n", "```python\n", "def greedy_decoder(self, inputs):\n", " \"\"\"\n", " Transcribe inputs using greedy decoder.\n", "\n", " Parameters\n", " ----------\n", " inputs: List[np.array]\n", " List[np.array] or List[malaya_speech.model.frame.Frame].\n", "\n", " Returns\n", " -------\n", " result: List[str]\n", " \"\"\"\n", "```" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 7.09 s, sys: 1.8 s, total: 8.88 s\n", "Wall time: 6.05 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni allah maha ini',\n", " 'helo nama saya husin saya tak suka mandi ketat saya masak',\n", " 'helo nama saya husin saya suka mandi saya mandi tetek hari',\n", " 'nama saya syafiqah hidayah',\n", " 'sebut perkataan uncle',\n", " 'tolong sebut anti kata']" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "small_model.greedy_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 11.3 s, sys: 3.47 s, total: 14.8 s\n", "Wall time: 11.2 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni alah maaf ini',\n", " 'helo nama saya send saya tak suka mandi ke tak saya masam',\n", " 'helo nama saya husin saya suka mandi saya mandi setiap hari',\n", " 'nama saya syafiqah idayu',\n", " 'sebut perkataan angka',\n", " 'tolong sebut antika']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "model.greedy_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 6.58 s, sys: 1.34 s, total: 7.92 s\n", "Wall time: 5.24 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni allah maha ini',\n", " 'helo nama saya husin saya tak suka mandi ketat saya masak',\n", " 'helo nama saya husin saya suka mandi saya mandi tetek hari',\n", " 'nama saya syafiqah hidayah',\n", " 'sebut perkataan uncle',\n", " 'tolong sebut anti kata']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_small_model.greedy_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 10.8 s, sys: 3.02 s, total: 13.8 s\n", "Wall time: 8.91 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni alah maaf ini',\n", " 'helo nama saya send saya tak suka mandi ke tak saya masam',\n", " 'helo nama saya pusing saya suka mandi saya mandi setiap hari',\n", " 'nama saya syafiqah idayu',\n", " 'sebut perkataan angka',\n", " 'tolong sebut antika']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_model.greedy_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Predict using beam decoder\n", "\n", "```python\n", "def beam_decoder(self, inputs, beam_width: int = 5,\n", " temperature: float = 0.0,\n", " score_norm: bool = True):\n", " \"\"\"\n", " Transcribe inputs using beam decoder.\n", "\n", " Parameters\n", " ----------\n", " inputs: List[np.array]\n", " List[np.array] or List[malaya_speech.model.frame.Frame].\n", " beam_width: int, optional (default=5)\n", " beam size for beam decoder.\n", " temperature: float, optional (default=0.0)\n", " apply temperature function for logits, can help for certain case,\n", " logits += -np.log(-np.log(uniform_noise_shape_logits)) * temperature\n", " score_norm: bool, optional (default=True)\n", " descending sort beam based on score / length of decoded.\n", "\n", " Returns\n", " -------\n", " result: List[str]\n", " \"\"\"\n", "```" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 11.2 s, sys: 1.97 s, total: 13.2 s\n", "Wall time: 8.14 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni allah maha ini',\n", " 'helo nama saya pusing saya tak suka mandi ketat saya masak',\n", " 'helo nama saya husin saya suka mandi saya mandi tetek hari',\n", " 'nama saya syafiqah hidayah',\n", " 'sebut perkataan uncle',\n", " 'tolong sebut anti kata']" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "small_model.beam_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil], beam_width = 5)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 21.3 s, sys: 3.23 s, total: 24.6 s\n", "Wall time: 13.6 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni alah maaf ini',\n", " 'helo nama saya pusing saya tak suka mandi ke tak saya masam',\n", " 'helo nama saya husin saya suka mandi saya mandi tiap tiap hari',\n", " 'nama saya syafiqah idayu',\n", " 'sebut perkataan angka',\n", " 'tolong sebut antika']" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "model.beam_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil], beam_width = 5)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 10.6 s, sys: 1.71 s, total: 12.3 s\n", "Wall time: 7.53 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni allah maha ini',\n", " 'helo nama saya pusing saya tak suka mandi ketat saya masak',\n", " 'helo nama saya husin saya suka mandi saya mandi tetek hari',\n", " 'nama saya syafiqah hidayah',\n", " 'sebut perkataan uncle',\n", " 'tolong sebut anti kata']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_small_model.beam_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil], beam_width = 5)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 16.8 s, sys: 1.67 s, total: 18.5 s\n", "Wall time: 6.45 s\n" ] }, { "data": { "text/plain": [ "['jadi dalam perjalanan ini dunia yang susah ini ketika nabi mengajar muaz bin jabal tadi ni alah maaf ini',\n", " 'helo nama saya pusing saya tak suka mandi ke tak saya masam',\n", " 'helo nama saya pusing saya suka mandi saya mandi tiap tiap hari',\n", " 'nama saya syafiqah idayu',\n", " 'sebut perkataan angka',\n", " 'tolong sebut antika']" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_model.beam_decoder([ceramah, record1, record2, shafiqah_idayu, mas_aisyah, khalil], beam_width = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**RNNT model beam decoder not able to utilise batch processing, if feed a batch, it will process one by one**." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Predict alignment\n", "\n", "We want to know when the speakers speak certain words, so we can use `predict_timestamp`,\n", "\n", "```python\n", "def predict_alignment(self, input, combined = True):\n", " \"\"\"\n", " Transcribe input and get timestamp, only support greedy decoder.\n", "\n", " Parameters\n", " ----------\n", " input: np.array\n", " np.array or malaya_speech.model.frame.Frame.\n", " combined: bool, optional (default=True)\n", " If True, will combined subwords to become a word.\n", "\n", " Returns\n", " -------\n", " result: List[Dict[text, start, end]]\n", " \"\"\"\n", "```" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3.7 s, sys: 784 ms, total: 4.48 s\n", "Wall time: 4.11 s\n" ] }, { "data": { "text/plain": [ "[{'text': 'nama', 'start': 0.28, 'end': 0.57},\n", " {'text': 'saya', 'start': 0.68, 'end': 0.97},\n", " {'text': 'syafiqah', 'start': 1.28, 'end': 1.69},\n", " {'text': 'idri', 'start': 1.8, 'end': 2.01}]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "small_model.predict_alignment(shafiqah_idayu)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 405 ms, sys: 84.8 ms, total: 489 ms\n", "Wall time: 128 ms\n" ] }, { "data": { "text/plain": [ "[{'text': 'nam', 'start': 0.28, 'end': 0.29},\n", " {'text': 'a_', 'start': 0.56, 'end': 0.57},\n", " {'text': 'say', 'start': 0.68, 'end': 0.69},\n", " {'text': 'a_', 'start': 0.96, 'end': 0.97},\n", " {'text': 'sya', 'start': 1.28, 'end': 1.29},\n", " {'text': 'fi', 'start': 1.44, 'end': 1.45},\n", " {'text': 'q', 'start': 1.52, 'end': 1.53},\n", " {'text': 'ah_', 'start': 1.68, 'end': 1.69},\n", " {'text': 'id', 'start': 1.8, 'end': 1.81},\n", " {'text': 'ri', 'start': 2.0, 'end': 2.01}]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "small_model.predict_alignment(shafiqah_idayu, combined = False)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 1.25 s, sys: 324 ms, total: 1.58 s\n", "Wall time: 348 ms\n" ] }, { "data": { "text/plain": [ "[{'text': 'jadi', 'start': 0.36, 'end': 0.53},\n", " {'text': 'dalam', 'start': 0.6, 'end': 0.73},\n", " {'text': 'perjalanan', 'start': 0.84, 'end': 1.33},\n", " {'text': 'ini', 'start': 1.4, 'end': 1.41},\n", " {'text': 'dunia', 'start': 2.44, 'end': 2.65},\n", " {'text': 'yang', 'start': 2.76, 'end': 2.81},\n", " {'text': 'susah', 'start': 2.88, 'end': 3.13},\n", " {'text': 'ini', 'start': 3.24, 'end': 3.25},\n", " {'text': 'ketika', 'start': 5.64, 'end': 5.85},\n", " {'text': 'nabi', 'start': 6.12, 'end': 6.37},\n", " {'text': 'mengajar', 'start': 6.44, 'end': 6.81},\n", " {'text': 'muaz', 'start': 6.96, 'end': 7.21},\n", " {'text': 'bin', 'start': 7.28, 'end': 7.29},\n", " {'text': 'jabal', 'start': 7.44, 'end': 7.73},\n", " {'text': 'tadi', 'start': 7.84, 'end': 8.05},\n", " {'text': 'ni', 'start': 8.12, 'end': 8.13},\n", " {'text': 'allah', 'start': 8.52, 'end': 8.69},\n", " {'text': 'maha', 'start': 8.8, 'end': 9.01},\n", " {'text': 'ini', 'start': 9.4, 'end': 9.41}]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "small_model.predict_alignment(ceramah)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 5.89 s, sys: 2.04 s, total: 7.93 s\n", "Wall time: 7.37 s\n" ] }, { "data": { "text/plain": [ "[{'text': 'nama', 'start': 0.28, 'end': 0.57},\n", " {'text': 'saya', 'start': 0.64, 'end': 0.97},\n", " {'text': 'syafiqah', 'start': 1.28, 'end': 1.69},\n", " {'text': 'idayu', 'start': 1.8, 'end': 2.05}]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "model.predict_alignment(shafiqah_idayu)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.35 s, sys: 515 ms, total: 2.87 s\n", "Wall time: 593 ms\n" ] }, { "data": { "text/plain": [ "[{'text': 'jadi', 'start': 0.36, 'end': 0.53},\n", " {'text': 'dalam', 'start': 0.6, 'end': 0.73},\n", " {'text': 'perjalanan', 'start': 0.8, 'end': 1.29},\n", " {'text': 'ini', 'start': 1.4, 'end': 1.41},\n", " {'text': 'dunia', 'start': 2.44, 'end': 2.65},\n", " {'text': 'yang', 'start': 2.72, 'end': 2.81},\n", " {'text': 'susah', 'start': 2.88, 'end': 3.13},\n", " {'text': 'ini', 'start': 3.24, 'end': 3.25},\n", " {'text': 'ketika', 'start': 5.64, 'end': 5.85},\n", " {'text': 'nabi', 'start': 6.12, 'end': 6.37},\n", " {'text': 'mengajar', 'start': 6.44, 'end': 6.81},\n", " {'text': 'muaz', 'start': 6.96, 'end': 7.21},\n", " {'text': 'bin', 'start': 7.28, 'end': 7.29},\n", " {'text': 'jabal', 'start': 7.44, 'end': 7.73},\n", " {'text': 'tadi', 'start': 7.84, 'end': 8.05},\n", " {'text': 'ni', 'start': 8.12, 'end': 8.13},\n", " {'text': 'alah', 'start': 8.52, 'end': 8.69},\n", " {'text': 'maaf', 'start': 8.8, 'end': 9.01}]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "model.predict_alignment(ceramah)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3.82 s, sys: 743 ms, total: 4.56 s\n", "Wall time: 4.19 s\n" ] }, { "data": { "text/plain": [ "[{'text': 'nama', 'start': 0.28, 'end': 0.57},\n", " {'text': 'saya', 'start': 0.68, 'end': 0.97},\n", " {'text': 'syafiqah', 'start': 1.28, 'end': 1.69},\n", " {'text': 'idri', 'start': 1.8, 'end': 2.01}]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_small_model.predict_alignment(shafiqah_idayu)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 5.59 s, sys: 1.85 s, total: 7.44 s\n", "Wall time: 6.8 s\n" ] }, { "data": { "text/plain": [ "[{'text': 'nama', 'start': 0.28, 'end': 0.57},\n", " {'text': 'saya', 'start': 0.64, 'end': 0.97},\n", " {'text': 'syafiqah', 'start': 1.28, 'end': 1.69},\n", " {'text': 'id', 'start': 1.8, 'end': 1.81}]" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%%time\n", "\n", "quantized_model.predict_alignment(shafiqah_idayu)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }