from matplotlib import pyplot
from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_val_score
from sklearn.datasets import make_classification, make_regression
Voting Ensemble
Stacking
def make_clas_dataset(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=6):
= make_classification(n_samples=n_samples, n_features=n_features,
X,y =n_informative, n_redundant=n_redundant,
n_informative=random_state)
random_statereturn X,y
def make_reg_dataset(n_samples=1000, n_features=20, n_informative=15, noise=0.1, random_state=6):
= make_regression(n_samples=n_samples, n_features=n_features,
X,y =n_informative, noise=noise,
n_informative=random_state)
random_statereturn X,y
def show_results(X, y, models, scoring, cv):
= [], []
results, names for name, model in models.items():
= cross_val_score(model, X, y, scoring=scoring, cv=cv)
scores
results.append(scores)
names.append(name)print(f'{name}: {scores.mean():.3f} ({scores.std():.3f})')
=names, showmeans=True)
pyplot.boxplot(results, labels pyplot.show()
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
Voting Ensemble for Classification
Hard Voting Ensemble for Classification
def get_models():
= dict()
models = [1, 3, 5, 7, 9]
neighbors for n in neighbors:
= 'knn' + str(n)
key = KNeighborsClassifier(n_neighbors=n)
models[key] = [(n,m) for n,m in models.items()]
members 'hard_voting'] = VotingClassifier(estimators=members, voting='hard')
models[return models
= make_clas_dataset() X, y
= get_models()
models models
= RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) cv
show_results(X, y, models)
= []
models = [1, 3, 5, 7, 9] neighbors
for n in neighbors:
'knn'+str(n), KNeighborsClassifier(n_neighbors=n))) models.append((
= VotingClassifier(estimators=models, voting='hard') ensemble
ensemble.fit(X, y)
= [5.88891819, 2.64867662, -0.42728226, -1.24988856, -0.00822, -3.57895574, 2.87938412,
row -1.55614691, -0.38168784, 7.50285659, -1.16710354, -5.02492712, -0.46196105,
-0.64539455, -1.71297469, 0.25987852, -0.193401, -5.52022952, 0.0364453, -1.960039]
= ensemble.predict([row])
yhat print(f'{yhat[0]}')
Soft Voting Ensemble for Classification
from sklearn.svm import SVC
def get_models():
= dict()
models for n in range(1,6):
= 'svm' + str(n)
key = SVC(probability=True, kernel='poly', degree=n)
models[key] = [(n,m) for n,m in models.items()]
members 'soft_voting'] = VotingClassifier(estimators=members, voting='soft')
models[return models
= make_clas_dataset() X, y
= get_models()
models models
= RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1) cv
'accuracy') show_results(X, y, models, cv,
= []
models for n in range(1,6):
'svm'+str(n), SVC(probability=True, kernel='poly', degree=n)))
models.append(( models
= VotingClassifier(estimators=models, voting='soft') ensemble
ensemble.fit(X, y)
= [5.88891819, 2.64867662, -0.42728226, -1.24988856, -0.00822, -3.57895574, 2.87938412,
row -1.55614691, -0.38168784, 7.50285659, -1.16710354, -5.02492712, -0.46196105,-0.64539455,
-1.71297469, 0.25987852, -0.193401, -5.52022952, 0.0364453, -1.960039]
= ensemble.predict([row])
yhat print(f'{yhat[0]}')
Voting Ensemble for Regression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
def get_models():
= dict()
models for n in range(1,6):
= 'cart' + str(n)
key = DecisionTreeRegressor(max_depth=n)
models[key] = [(n,m) for n,m in models.items()]
members 'voting'] = VotingRegressor(estimators=members)
models[return models
= make_reg_dataset() X, y
= get_models()
models models
= RepeatedKFold(n_splits=10 ,n_repeats=3, random_state=1) cv
'neg_mean_absolute_error', cv) show_results(X, y, models,
= [] models
for n in range(1,6):
'cart'+str(n), DecisionTreeRegressor(max_depth=n)))
models.append(( models
= VotingRegressor(estimators=models) ensemble
ensemble.fit(X, y)
= [0.59332206, -0.56637507, 1.34808718, -0.57054047, -0.72480487, 1.05648449,
row 0.77744852, 0.07361796, 0.88398267, 2.02843157, 1.01902732, 0.11227799, 0.94218853,
0.26741783, 0.91458143, -0.72759572, 1.08842814, -0.61450942, -0.69387293, 1.69169009]
= ensemble.predict([row])
yhat print(f'{yhat[0]}')
Weighted Average Ensemble
Weighted Average Ensemble for Classification
= make_classification() X, y