python机器学习之decisiontreeclassifier
#决策树算法的原理是一系列if_else的逻辑迭代。适用于对数据进行分类和回归,优点是对于数据的本身要求不高,直观容易理解,缺点是容易过拟合和泛化能力不强。对于回归而言,不能外推。
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer=load_breast_cancer()
x_train,x_test,y_train,y_test=train_test_split(cancer.data,cancer.target,stratify=cancer,random_state=42)
tree=DecisionTreeClassifier()
tree.fit(x_train,y_train)
tree.score(x_train,y_train)
tree.score(x_test,y_test)
#结果显示过拟合,预剪枝max_depth
tree04=DecisionTreeClassifier(max_depth=4,random_state=0)
tree04.fit(x_train,y_train)
tree04.score(x_train,y_train)
tree04.score(x_test,y_test)
#针对决策树缺点的集成