{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Speech-to-Text RNNT PyTorch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Encoder model + RNNT loss using PyTorch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", " | Size (MB) | \n", "malay-malaya | \n", "malay-fleur102 | \n", "Language | \n", "singlish | \n", "whisper-mixed | \n", "
---|---|---|---|---|---|---|
mesolitica/conformer-tiny | \n", "38.5 | \n", "{'WER': 0.17341180814, 'CER': 0.05957485024} | \n", "{'WER': 0.19524478979, 'CER': 0.0830808938} | \n", "[malay] | \n", "NaN | \n", "NaN | \n", "
mesolitica/conformer-base | \n", "121 | \n", "{'WER': 0.122076123261, 'CER': 0.03879606324} | \n", "{'WER': 0.1326737206665, 'CER': 0.05032914857} | \n", "[malay] | \n", "NaN | \n", "NaN | \n", "
mesolitica/conformer-medium | \n", "243 | \n", "{'WER': 0.1054817492564, 'CER': 0.0313518992842} | \n", "{'WER': 0.1172708897486, 'CER': 0.0431050488} | \n", "[malay] | \n", "NaN | \n", "NaN | \n", "
mesolitica/emformer-base | \n", "162 | \n", "{'WER': 0.175762423786, 'CER': 0.06233919000537} | \n", "{'WER': 0.18303839134, 'CER': 0.0773853362} | \n", "[malay] | \n", "NaN | \n", "NaN | \n", "
mesolitica/conformer-base-singlish | \n", "121 | \n", "NaN | \n", "NaN | \n", "[singlish] | \n", "{'WER': 0.06517537334361, 'CER': 0.03265430876} | \n", "NaN | \n", "
mesolitica/conformer-medium-mixed | \n", "243 | \n", "{'WER': 0.111166517935, 'CER': 0.03410958328} | \n", "{'WER': 0.108354748, 'CER': 0.037785722} | \n", "[malay, singlish] | \n", "{'WER': 0.091969755225, 'CER': 0.044627194623} | \n", "NaN | \n", "
mesolitica/conformer-medium-malay-whisper | \n", "243 | \n", "{'WER': 0.092561502, 'CER': 0.0245421736} | \n", "{'WER': 0.097128574, 'CER': 0.03392603} | \n", "[malay, mixed] | \n", "NaN | \n", "{'WER': 0.1705298134, 'CER': 0.10580679153} | \n", "
mesolitica/conformer-large-malay-whisper | \n", "413 | \n", "{'WER': 0.10028492039, 'CER': 0.0310868406} | \n", "{'WER': 0.09544850396, 'CER': 0.03258454692} | \n", "[malay, mixed] | \n", "NaN | \n", "{'WER': 0.20429079189, 'CER': 0.12111372327} | \n", "