logistic回归梯度上升优化算法
- 1 # Author Qian Chenglong
- 2
- 3 from numpy import *
- 4 from numpy.ma import arange
- 5
- 6
- 7 def loadDataSet():
- 8 dataMat = []
- 9 labelMat = []
- 10 fr = open(\'testSet.txt\')
- 11 for line in fr.readlines():
- 12 lineArr = line.strip().split()
- 13 dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
- 14 labelMat.append(int(lineArr[2]))
- 15 return dataMat, labelMat
- 16
- 17 #sigmoid归一化函数
- 18 #输入:z=w1x1+w2x2+w3x3......
- 19 #s输出:归一化结果
- 20 def sigmoid(inX):
- 21 return 1.0 / (1 + exp(-inX))
- 22
- 23
- 24 \'\'\'
- 25 logistic回归梯度上升优化算法
- 26 param dataMatIn: 处理后的数据集
- 27 param classLabels: 分类标签
- 28 return: 权重值
- 29 \'\'\'
- 30 def gradAscent(dataMatIn, classLabels):
- 31 dataMatrix = mat(dataMatIn) # convert to NumPy matrix(矩阵)
- 32 labelMat = mat(classLabels).transpose() # convert to NumPy matrix
- 33 m, n = shape(dataMatrix) #m行 n列
- 34 alpha = 0.001 #步长
- 35 maxCycles = 500
- 36 weights = ones((n, 1)) #系数,权重
- 37 for k in range(maxCycles): # heavy on matrix operations
- 38 h = sigmoid(dataMatrix * weights) # matrix mult
- 39 error = (labelMat - h) # vector subtraction
- 40 weights = weights + alpha * dataMatrix.transpose() * error # transpose()矩阵转置
- 41 return weights
- 42
- 43 \'\'\'
- 44 画出数据集和logisitic回归最佳拟合直线的函数
- 45 param weights:
- 46 return:
- 47 最后的分割方程是y=(-w0-w1*x)/w2
- 48 \'\'\'
- 49 def plotBestFit(weights):
- 50 import matplotlib.pyplot as plt
- 51 dataMat, labelMat = loadDataSet()
- 52 dataArr = array(dataMat)
- 53 n = shape(dataArr)[0]
- 54 xcord1 = []
- 55 ycord1 = []
- 56 xcord2 = []
- 57 ycord2 = []
- 58 for i in range(n):
- 59 if int(labelMat[i]) == 1:
- 60 xcord1.append(dataArr[i, 1]);
- 61 ycord1.append(dataArr[i, 2])
- 62 else:
- 63 xcord2.append(dataArr[i, 1]);
- 64 ycord2.append(dataArr[i, 2])
- 65 fig = plt.figure()
- 66 ax = fig.add_subplot(111)
- 67 ax.scatter(xcord1, ycord1, s=30, c=\'red\', marker=\'s\')
- 68 ax.scatter(xcord2, ycord2, s=30, c=\'green\')
- 69 x = arange(-3.0, 3.0, 0.1)
- 70 y = (-weights[0] - weights[1] * x) / weights[2]
- 71 ax.plot(x, y)
- 72 plt.xlabel(\'X1\')
- 73 plt.ylabel(\'X2\')
- 74 plt.show()
- 75
- 76 \'\'\'随机梯度上升
- 77 param dataMatIn: 处理后的数据集
- 78 param classLabels: 分类标签
- 79 return: 权重值\'\'\'
- 80 def stocGradAscent0(dataMatrix, classLabels):
- 81 m, n = shape(dataMatrix)
- 82 alpha = 0.01
- 83 weights = ones(n) # initialize to all ones
- 84 for i in range(m):
- 85 h = sigmoid(sum(dataMatrix[i] * weights))
- 86 error = classLabels[i] - h
- 87 weights = weights + alpha * error * dataMatrix[i]
- 88 return weights
- 89
- 90 \'\'\'改进的随机梯度上升
- 91 param dataMatIn: 处理后的数据集
- 92 param classLabels: 分类标签
- 93 return: 权重值\'\'\'
- 94 def stocGradAscent1(dataMatrix, classLabels, numIter=150):
- 95 m, n = shape(dataMatrix)
- 96 weights = ones(n) # initialize to all ones
- 97 for j in range(numIter):
- 98 dataIndex = range(m)
- 99 for i in range(m):
- 100 alpha = 4 / (1.0 + j + i) + 0.0001 # apha decreases with iteration, does not
- 101 randIndex = int(random.uniform(0, len(dataIndex))) # go to 0 because of the constant
- 102 h = sigmoid(sum(dataMatrix[randIndex] * weights))
- 103 error = classLabels[randIndex] - h
- 104 weights = weights + alpha * error * dataMatrix[randIndex]
- 105 del (dataIndex[randIndex])
- 106 return weights
版权声明:本文为long5683原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。