Source code for malaya_speech.model.classification

import numpy as np
from malaya_speech.model.frame import Frame
from malaya_speech.utils.padding import (
    sequence_nd as padding_sequence_nd,
    sequence_1d,
)
from malaya_speech.utils.activation import softmax
from malaya_speech.model.abstract import Abstract


class Transformer2Vec(Abstract):
    def __init__(
        self,
        input_nodes,
        output_nodes,
        sess,
        model,
        label,
        name,
        vectorizer=None,
        extra=None,
    ):
        self._input_nodes = input_nodes
        self._output_nodes = output_nodes
        self._sess = sess
        self.labels = label
        self.__model__ = model
        self.__name__ = name

    def vectorize(self, inputs):
        """
        Vectorize inputs.

        Parameters
        ----------
        inputs: List[np.array]
            List[np.array] or List[malaya_speech.model.frame.Frame].

        Returns
        -------
        result: np.array
            returned [B, D].
        """

        inputs = [
            input.array if isinstance(input, Frame) else input
            for input in inputs
        ]
        inputs, lengths = padding_sequence_nd(
            inputs, dim=0, return_len=True
        )

        r = self._execute(
            inputs=[inputs, lengths],
            input_labels=['X_placeholder', 'X_len_placeholder'],
            output_labels=['logits'],
        )
        return r['logits']

    def __call__(self, inputs):
        return self.vectorize(inputs)


[docs]class Speakernet(Abstract): def __init__( self, input_nodes, output_nodes, vectorizer, sess, model, extra, label, name, ): self._input_nodes = input_nodes self._output_nodes = output_nodes self._vectorizer = vectorizer self._sess = sess self._extra = extra self.labels = label self.__model__ = model self.__name__ = name
[docs] def vectorize(self, inputs): """ Vectorize inputs. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input) for input in inputs] inputs, lengths = padding_sequence_nd( inputs, dim=0, return_len=True ) r = self._execute( inputs=[inputs, lengths], input_labels=['Placeholder', 'Placeholder_1'], output_labels=['logits'], ) return r['logits']
def __call__(self, inputs): return self.vectorize(inputs)
[docs]class Speaker2Vec(Abstract): def __init__( self, input_nodes, output_nodes, vectorizer, sess, model, extra, label, name, ): self._input_nodes = input_nodes self._output_nodes = output_nodes self._vectorizer = vectorizer self._sess = sess self._extra = extra self.labels = label self.__model__ = model self.__name__ = name
[docs] def vectorize(self, inputs): """ Vectorize inputs. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input, **self._extra) for input in inputs] if self.__model__ == 'deep-speaker': dim = 0 else: dim = 1 inputs = padding_sequence_nd(inputs, dim=dim) inputs = np.expand_dims(inputs, -1) r = self._execute( inputs=[inputs], input_labels=['Placeholder'], output_labels=['logits'], ) return r['logits']
def __call__(self, inputs): return self.vectorize(inputs)
[docs]class SpeakernetClassification(Abstract): def __init__( self, input_nodes, output_nodes, vectorizer, sess, model, extra, label, name, ): self._input_nodes = input_nodes self._output_nodes = output_nodes self._vectorizer = vectorizer self._sess = sess self._extra = extra self.labels = label self.__model__ = model self.__name__ = name
[docs] def predict_proba(self, inputs): """ Predict inputs, will return probability. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input) for input in inputs] inputs, lengths = padding_sequence_nd( inputs, dim=0, return_len=True ) r = self._execute( inputs=[inputs, lengths], input_labels=['Placeholder', 'Placeholder_1'], output_labels=['logits'], ) return softmax(r['logits'], axis=-1)
[docs] def predict(self, inputs): """ Predict inputs, will return labels. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: List[str] returned [B]. """ probs = np.argmax(self.predict_proba(inputs), axis=1) return [self.labels[p] for p in probs]
def __call__(self, input): """ Predict input, will return label. Parameters ---------- inputs: np.array np.array or malaya_speech.model.frame.Frame. Returns ------- result: str """ return self.predict([input])[0]
[docs]class Classification(Abstract): def __init__( self, input_nodes, output_nodes, vectorizer, sess, model, extra, label, name, ): self._input_nodes = input_nodes self._output_nodes = output_nodes self._vectorizer = vectorizer self._sess = sess self._extra = extra self.labels = label self.__model__ = model self.__name__ = name
[docs] def predict_proba(self, inputs): """ Predict inputs, will return probability. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs = [self._vectorizer(input, **self._extra) for input in inputs] if self.__model__ == 'deep-speaker': dim = 0 else: dim = 1 inputs = padding_sequence_nd(inputs, dim=dim) inputs = np.expand_dims(inputs, -1) r = self._execute( inputs=[inputs], input_labels=['Placeholder'], output_labels=['logits'], ) return softmax(r['logits'], axis=-1)
[docs] def predict(self, inputs): """ Predict inputs, will return labels. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: List[str] returned [B]. """ probs = np.argmax(self.predict_proba(inputs), axis=1) return [self.labels[p] for p in probs]
def __call__(self, input): """ Predict input, will return label. Parameters ---------- inputs: np.array np.array or malaya_speech.model.frame.Frame. Returns ------- result: str """ return self.predict([input])[0]
class MarbleNetClassification(Abstract): def __init__( self, input_nodes, output_nodes, vectorizer, sess, model, extra, label, name, ): self._input_nodes = input_nodes self._output_nodes = output_nodes self._vectorizer = vectorizer self._sess = sess self._extra = extra self.labels = label self.__model__ = model self.__name__ = name def predict_proba(self, inputs): """ Predict inputs, will return probability. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: np.array returned [B, D]. """ inputs = [ input.array if isinstance(input, Frame) else input for input in inputs ] inputs, lengths = sequence_1d( inputs, return_len=True ) r = self._execute( inputs=[inputs, lengths], input_labels=['X_placeholder', 'X_len_placeholder'], output_labels=['logits'], ) return softmax(r['logits'], axis=-1) def predict(self, inputs): """ Predict inputs, will return labels. Parameters ---------- inputs: List[np.array] List[np.array] or List[malaya_speech.model.frame.Frame]. Returns ------- result: List[str] returned [B]. """ probs = np.argmax(self.predict_proba(inputs), axis=1) return [self.labels[p] for p in probs] def __call__(self, input): """ Predict input, will return label. Parameters ---------- inputs: np.array np.array or malaya_speech.model.frame.Frame. Returns ------- result: str """ return self.predict([input])[0]