Source code for malaya_speech.model.abstract

from malaya_speech.utils.execute import execute_graph
from malaya_speech.utils.astype import float_to_int
from typing import Callable


class Abstract:
    def __str__(self):
        return f'<{self.__name__}: {self.__model__}>'

    def _execute(self, inputs, input_labels, output_labels):
        return execute_graph(
            inputs=inputs,
            input_labels=input_labels,
            output_labels=output_labels,
            sess=self._sess,
            input_nodes=self._input_nodes,
            output_nodes=self._output_nodes,
        )


[docs]class TTS: def __init__(self, e2e=False): self.e2e = e2e
[docs] def gradio(self, vocoder: Callable = None, **kwargs): """ Text-to-Speech on Gradio interface. Parameters ---------- vocoder: Callable, optional (default=None) vocoder object that has `predict` method, prefer from malaya_speech itself. Not required if using End-to-End TTS model such as VITS. **kwargs: keyword arguments for `predict` and `iface.launch`. """ if not self.e2e and vocoder is None: raise ValueError('TTS model is not End-to-End, required vocoder.') try: import gradio as gr except BaseException: raise ModuleNotFoundError( 'gradio not installed. Please install it by `pip install gradio` and try again.' ) def pred(string): r = self.predict(string=string, **kwargs) if self.e2e: y_ = r['y'] else: if 'universal' in str(vocoder): o = r['universal-output'] else: o = r['mel-output'] y_ = vocoder(o) data = float_to_int(y_) return (22050, data) if self.e2e: title = 'End-to-End Text-to-Speech' else: title = 'Text-to-Speech + Neural Vocoder' description = 'It will take sometime for the first time, after that, should be really fast.' iface = gr.Interface(pred, gr.inputs.Textbox(lines=3, label='Input Text'), 'audio', title=title, description=description) return iface.launch(**kwargs)