Source code for malaya_speech.stack
import numpy as np
from scipy.stats.mstats import gmean
from typing import List, Callable
[docs]class Stack:
def __str__(self):
return f'<{self.__name__}: {self.__model__}>'
def __init__(self, models):
self._models = models
self.__name__ = self._models[0].__name__
self.__model__ = self._models[0].__model__
[docs] def predict_proba(self, inputs, aggregate: Callable = gmean):
"""
Stacking for predictive models, will return probability.
Parameters
----------
inputs: List[np.array]
aggregate : Callable, optional (default=scipy.stats.mstats.gmean)
Aggregate function.
Returns
-------
result: np.array
"""
results = []
for i in range(len(self._models)):
results.append(self._models[i].predict_proba(inputs))
mode = aggregate
results = mode(np.array(results), axis=0)
return results
[docs] def predict(self, inputs, aggregate: Callable = gmean):
"""
Stacking for predictive models, will return labels.
Parameters
----------
inputs: List[np.array]
aggregate : Callable, optional (default=scipy.stats.mstats.gmean)
Aggregate function.
Returns
-------
result: List[str]
"""
probs = np.argmax(
self.predict_proba(inputs, aggregate=aggregate), axis=1
)
return [self._models[0].labels[p] for p in probs]
def __call__(self, input):
return self.predict([input])[0]
[docs]def classification_stack(models):
"""
Stacking for classification models. All models should be in the same domain classification.
Parameters
----------
models: List[Callable]
list of models.
Returns
-------
result: malaya_speech.stack.Stack class
"""
labels = None
for i in range(len(models)):
if 'predict_proba' not in dir(models[i]):
raise ValueError('all models must able to `predict_proba`')
if labels is None:
labels = models[i].labels
else:
if labels != models[i].labels:
raise ValueError('domain classification must be same!')
return Stack(models)