from matplotlib import pyplot
from sklearn.model_selection import RepeatedStratifiedKFold, RepeatedKFold, cross_val_score
from sklearn.datasets import make_classification, make_regressionVoting Ensemble
Stacking
def make_clas_dataset(n_samples=1000, n_features=20, n_informative=15, n_redundant=5, random_state=6):
X,y = make_classification(n_samples=n_samples, n_features=n_features,
n_informative=n_informative, n_redundant=n_redundant,
random_state=random_state)
return X,ydef make_reg_dataset(n_samples=1000, n_features=20, n_informative=15, noise=0.1, random_state=6):
X,y = make_regression(n_samples=n_samples, n_features=n_features,
n_informative=n_informative, noise=noise,
random_state=random_state)
return X,ydef show_results(X, y, models, scoring, cv):
results, names = [], []
for name, model in models.items():
scores = cross_val_score(model, X, y, scoring=scoring, cv=cv)
results.append(scores)
names.append(name)
print(f'{name}: {scores.mean():.3f} ({scores.std():.3f})')
pyplot.boxplot(results, labels=names, showmeans=True)
pyplot.show()from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifierVoting Ensemble for Classification
Hard Voting Ensemble for Classification
def get_models():
models = dict()
neighbors = [1, 3, 5, 7, 9]
for n in neighbors:
key = 'knn' + str(n)
models[key] = KNeighborsClassifier(n_neighbors=n)
members = [(n,m) for n,m in models.items()]
models['hard_voting'] = VotingClassifier(estimators=members, voting='hard')
return modelsX, y = make_clas_dataset()models = get_models()
modelscv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)show_results(X, y, models)models = []
neighbors = [1, 3, 5, 7, 9]for n in neighbors:
models.append(('knn'+str(n), KNeighborsClassifier(n_neighbors=n)))ensemble = VotingClassifier(estimators=models, voting='hard')ensemble.fit(X, y)row = [5.88891819, 2.64867662, -0.42728226, -1.24988856, -0.00822, -3.57895574, 2.87938412,
-1.55614691, -0.38168784, 7.50285659, -1.16710354, -5.02492712, -0.46196105,
-0.64539455, -1.71297469, 0.25987852, -0.193401, -5.52022952, 0.0364453, -1.960039]yhat = ensemble.predict([row])
print(f'{yhat[0]}')Soft Voting Ensemble for Classification
from sklearn.svm import SVCdef get_models():
models = dict()
for n in range(1,6):
key = 'svm' + str(n)
models[key] = SVC(probability=True, kernel='poly', degree=n)
members = [(n,m) for n,m in models.items()]
models['soft_voting'] = VotingClassifier(estimators=members, voting='soft')
return modelsX, y = make_clas_dataset()models = get_models()
modelscv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)show_results(X, y, models, cv, 'accuracy')models = []
for n in range(1,6):
models.append(('svm'+str(n), SVC(probability=True, kernel='poly', degree=n)))
modelsensemble = VotingClassifier(estimators=models, voting='soft')ensemble.fit(X, y)row = [5.88891819, 2.64867662, -0.42728226, -1.24988856, -0.00822, -3.57895574, 2.87938412,
-1.55614691, -0.38168784, 7.50285659, -1.16710354, -5.02492712, -0.46196105,-0.64539455,
-1.71297469, 0.25987852, -0.193401, -5.52022952, 0.0364453, -1.960039]yhat = ensemble.predict([row])
print(f'{yhat[0]}')Voting Ensemble for Regression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressordef get_models():
models = dict()
for n in range(1,6):
key = 'cart' + str(n)
models[key] = DecisionTreeRegressor(max_depth=n)
members = [(n,m) for n,m in models.items()]
models['voting'] = VotingRegressor(estimators=members)
return modelsX, y = make_reg_dataset()models = get_models()
modelscv = RepeatedKFold(n_splits=10 ,n_repeats=3, random_state=1)show_results(X, y, models, 'neg_mean_absolute_error', cv)models = []for n in range(1,6):
models.append(('cart'+str(n), DecisionTreeRegressor(max_depth=n)))
modelsensemble = VotingRegressor(estimators=models)ensemble.fit(X, y)row = [0.59332206, -0.56637507, 1.34808718, -0.57054047, -0.72480487, 1.05648449,
0.77744852, 0.07361796, 0.88398267, 2.02843157, 1.01902732, 0.11227799, 0.94218853,
0.26741783, 0.91458143, -0.72759572, 1.08842814, -0.61450942, -0.69387293, 1.69169009]yhat = ensemble.predict([row])
print(f'{yhat[0]}')Weighted Average Ensemble
Weighted Average Ensemble for Classification
X, y = make_classification()