决策树学习笔记 Posted on 2018-09-07 | In DeepLearning | csv文件构建决策树并进行预测 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374from sklearn.feature_extraction import DictVectorizerimport csvfrom sklearn import preprocessingfrom sklearn import tree# from sklearn.externals.six import StringIO allElectronicsDate = open(r'E:\Python\practice\Decision_Tree\Class_buys_computer.csv','rt')reader = csv.reader(allElectronicsDate)#CSV模块自带的reader方法,可按行读取内容# print('reader:'+ str(reader))headers = next(reader) print(headers) featureList = []labelList = [] for row in reader: print(row) labelList.append(row[len(row)-1]) rowDict = {} for i in range(1,len(row)-1): rowDict[headers[i]]=row[i] featureList.append(rowDict)print(featureList)print(labelList) vec = DictVectorizer()#python自带模块dummyX = vec.fit_transform(featureList).toarray()#调用方法fit_transform将字典类型的[{'a':'b'},{'c':'d'}]数据中的'b','d'数据转换成0,1的矩阵形式 print("dummyX:"+str(dummyX))print(vec.get_feature_names())#调用此方法得到'b','d'对应的特征名 print("labellist:"+str(labelList)) lb = preprocessing.LabelBinarizer()#python内部模块dummyY = lb.fit_transform(labelList)#调用fit_transform方法将标签列表中的数据转成0,1格式print("dummyY:"+str(dummyY)) clf = tree.DecisionTreeClassifier(criterion='entropy')#tree模块,创建clf分类器,entropy表示度量标准信息熵clf = clf.fit(dummyX,dummyY)# 用训练数据dummyX,dummyY拟合分类器模型print("clf:"+str(clf)) with open("allElectronicInformationGainOri.dot",'w') as f: f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file = f) #通过export_graphviz模块导出dot文件中,后通过cmd命令dot -Tpdf 1.dot -o 1.pdf #将dot文件转化成pdf视图oneRowX = dummyX[0,:]#取X矩阵数组里面的第一行print("oneRowX:" + str(oneRowX)) newRowX = oneRowX#赋给新标签 newRowX[0] = 1newRowX[2] = 0print("newRowX:" + str(newRowX))#newRowX:[ 1. 0. 0. 0. 1. 1. 0. 0. 1. 0.] newRowX = newRowX.reshape(1,-1)#将列表转化为矩阵,共predict调用print("newRowX:" + str(newRowX))#newRowX:[[ 1. 0. 0. 0. 1. 1. 0. 0. 1. 0.]] predictedY = clf.predict(newRowX)#用之前创建好的分类器clf(classifier),newRowX必须是矩阵类型 输出的dot文件可以使用graphvize软件转为PDF