Source code for malaya_speech.model.abstract

from malaya_speech.utils.execute import execute_graph
from malaya_speech.utils.astype import float_to_int
from typing import Callable


class Abstract:
    def __str__(self):
        return f'<{self.__name__}: {self.__model__}>'

    def _execute(self, inputs, input_labels, output_labels):
        return execute_graph(
            inputs=inputs,
            input_labels=input_labels,
            output_labels=output_labels,
            sess=self._sess,
            input_nodes=self._input_nodes,
            output_nodes=self._output_nodes,
        )


[docs]class TTS:
    def __init__(self, e2e=False):
        self.e2e = e2e

[docs]    def gradio(self, vocoder: Callable = None, **kwargs):
        """
        Text-to-Speech on Gradio interface.

        Parameters
        ----------
        vocoder: Callable, optional (default=None)
            vocoder object that has `predict` method, prefer from malaya_speech itself.
            Not required if using End-to-End TTS model such as VITS.

        **kwargs: keyword arguments for `predict` and `iface.launch`.
        """
        if not self.e2e and vocoder is None:
            raise ValueError('TTS model is not End-to-End, required vocoder.')

        try:
            import gradio as gr
        except BaseException:
            raise ModuleNotFoundError(
                'gradio not installed. Please install it by `pip install gradio` and try again.'
            )

        def pred(string):
            r = self.predict(string=string, **kwargs)

            if self.e2e:
                y_ = r['y']
            else:
                if 'universal' in str(vocoder):
                    o = r['universal-output']
                else:
                    o = r['mel-output']
                y_ = vocoder(o)
            data = float_to_int(y_)
            return (22050, data)

        if self.e2e:
            title = 'End-to-End Text-to-Speech'
        else:
            title = 'Text-to-Speech + Neural Vocoder'
        description = 'It will take sometime for the first time, after that, should be really fast.'

        iface = gr.Interface(pred, gr.inputs.Textbox(lines=3, label='Input Text'),
                             'audio', title=title, description=description)
        return iface.launch(**kwargs)