1. 1 # Author Qian Chenglong
  2. 2
  3. 3 from numpy import *
  4. 4 from numpy.ma import arange
  5. 5
  6. 6
  7. 7 def loadDataSet():
  8. 8 dataMat = []
  9. 9 labelMat = []
  10. 10 fr = open(\'testSet.txt\')
  11. 11 for line in fr.readlines():
  12. 12 lineArr = line.strip().split()
  13. 13 dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
  14. 14 labelMat.append(int(lineArr[2]))
  15. 15 return dataMat, labelMat
  16. 16
  17. 17 #sigmoid归一化函数
  18. 18 #输入:z=w1x1+w2x2+w3x3......
  19. 19 #s输出:归一化结果
  20. 20 def sigmoid(inX):
  21. 21 return 1.0 / (1 + exp(-inX))
  22. 22
  23. 23
  24. 24 \'\'\'
  25. 25 logistic回归梯度上升优化算法
  26. 26 param dataMatIn: 处理后的数据集
  27. 27 param classLabels: 分类标签
  28. 28 return: 权重值
  29. 29 \'\'\'
  30. 30 def gradAscent(dataMatIn, classLabels):
  31. 31 dataMatrix = mat(dataMatIn) # convert to NumPy matrix(矩阵)
  32. 32 labelMat = mat(classLabels).transpose() # convert to NumPy matrix
  33. 33 m, n = shape(dataMatrix) #m行 n列
  34. 34 alpha = 0.001 #步长
  35. 35 maxCycles = 500
  36. 36 weights = ones((n, 1)) #系数,权重
  37. 37 for k in range(maxCycles): # heavy on matrix operations
  38. 38 h = sigmoid(dataMatrix * weights) # matrix mult
  39. 39 error = (labelMat - h) # vector subtraction
  40. 40 weights = weights + alpha * dataMatrix.transpose() * error # transpose()矩阵转置
  41. 41 return weights
  42. 42
  43. 43 \'\'\'
  44. 44 画出数据集和logisitic回归最佳拟合直线的函数
  45. 45 param weights:
  46. 46 return:
  47. 47 最后的分割方程是y=(-w0-w1*x)/w2
  48. 48 \'\'\'
  49. 49 def plotBestFit(weights):
  50. 50 import matplotlib.pyplot as plt
  51. 51 dataMat, labelMat = loadDataSet()
  52. 52 dataArr = array(dataMat)
  53. 53 n = shape(dataArr)[0]
  54. 54 xcord1 = []
  55. 55 ycord1 = []
  56. 56 xcord2 = []
  57. 57 ycord2 = []
  58. 58 for i in range(n):
  59. 59 if int(labelMat[i]) == 1:
  60. 60 xcord1.append(dataArr[i, 1]);
  61. 61 ycord1.append(dataArr[i, 2])
  62. 62 else:
  63. 63 xcord2.append(dataArr[i, 1]);
  64. 64 ycord2.append(dataArr[i, 2])
  65. 65 fig = plt.figure()
  66. 66 ax = fig.add_subplot(111)
  67. 67 ax.scatter(xcord1, ycord1, s=30, c=\'red\', marker=\'s\')
  68. 68 ax.scatter(xcord2, ycord2, s=30, c=\'green\')
  69. 69 x = arange(-3.0, 3.0, 0.1)
  70. 70 y = (-weights[0] - weights[1] * x) / weights[2]
  71. 71 ax.plot(x, y)
  72. 72 plt.xlabel(\'X1\')
  73. 73 plt.ylabel(\'X2\')
  74. 74 plt.show()
  75. 75
  76. 76 \'\'\'随机梯度上升
  77. 77 param dataMatIn: 处理后的数据集
  78. 78 param classLabels: 分类标签
  79. 79 return: 权重值\'\'\'
  80. 80 def stocGradAscent0(dataMatrix, classLabels):
  81. 81 m, n = shape(dataMatrix)
  82. 82 alpha = 0.01
  83. 83 weights = ones(n) # initialize to all ones
  84. 84 for i in range(m):
  85. 85 h = sigmoid(sum(dataMatrix[i] * weights))
  86. 86 error = classLabels[i] - h
  87. 87 weights = weights + alpha * error * dataMatrix[i]
  88. 88 return weights
  89. 89
  90. 90 \'\'\'改进的随机梯度上升
  91. 91 param dataMatIn: 处理后的数据集
  92. 92 param classLabels: 分类标签
  93. 93 return: 权重值\'\'\'
  94. 94 def stocGradAscent1(dataMatrix, classLabels, numIter=150):
  95. 95 m, n = shape(dataMatrix)
  96. 96 weights = ones(n) # initialize to all ones
  97. 97 for j in range(numIter):
  98. 98 dataIndex = range(m)
  99. 99 for i in range(m):
  100. 100 alpha = 4 / (1.0 + j + i) + 0.0001 # apha decreases with iteration, does not
  101. 101 randIndex = int(random.uniform(0, len(dataIndex))) # go to 0 because of the constant
  102. 102 h = sigmoid(sum(dataMatrix[randIndex] * weights))
  103. 103 error = classLabels[randIndex] - h
  104. 104 weights = weights + alpha * error * dataMatrix[randIndex]
  105. 105 del (dataIndex[randIndex])
  106. 106 return weights

 

版权声明:本文为long5683原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://www.cnblogs.com/long5683/p/9383574.html