result_1=pd.DataFrame(columns=["决策树深度(max_depth)","梯度提升回归树待测模型评分"]) for i inrange(1,5): gbcf=GradientBoostingClassifier(max_depth=i,random_state=1) gbcf.fit(X_train,y_train) result_1=result_1.append([{"决策树深度(max_depth)":i,"梯度提升回归树待测模型评分":accuracy_score(y_test,gbcf.predict(X_test))}]) result_1[result_1["梯度提升回归树待测模型评分"]==result_1["梯度提升回归树待测模型评分"].max()]
result_2=pd.DataFrame(columns=["集成树的个数(n_estimators)","梯度提升回归树待测模型评分"]) for i inrange(1,500,10): gbcf=GradientBoostingClassifier(max_depth=4,n_estimators=i,random_state=1) gbcf.fit(X_train,y_train) result_2=result_2.append([{"集成树的个数(n_estimators)":i,"梯度提升回归树待测模型评分":accuracy_score(y_test,gbcf.predict(X_test))}]) result_2[result_2["梯度提升回归树待测模型评分"]==result_2["梯度提升回归树待测模型评分"].max()]
n_estimators的调节结果如上图所示,那么接下来我们在上面的参数基础上继续调节学习率:
1 2 3 4 5 6 7
result_3=pd.DataFrame(columns=["学习率(learning_rate)","梯度提升回归树待测模型评分"]) for i inrange(1,10): m=i/10 gbcf=GradientBoostingClassifier(max_depth=4,n_estimators=161,learning_rate=m,random_state=1) gbcf.fit(X_train,y_train) result_3=result_3.append([{"学习率(learning_rate)":m,"梯度提升回归树待测模型评分":accuracy_score(y_test,gbcf.predict(X_test))}]) result_3[result_3["梯度提升回归树待测模型评分"]==result_3["梯度提升回归树待测模型评分"].max()]
接下来,我们还可以对学习率的参数调节进行进一步的区间划分,代码及结果如下所示:
1 2 3 4 5 6 7
result_4=pd.DataFrame(columns=["学习率(learning_rate)","梯度提升回归树待测模型评分"]) for i inrange(1,20): m=i/100 gbcf=GradientBoostingClassifier(max_depth=4,n_estimators=161,learning_rate=m,random_state=1) gbcf.fit(X_train,y_train) result_4=result_4.append([{"学习率(learning_rate)":m,"梯度提升回归树待测模型评分":accuracy_score(y_test,gbcf.predict(X_test))}]) result_4[result_4["梯度提升回归树待测模型评分"]==result_4["梯度提升回归树待测模型评分"].max()]