模型简介
逻辑回归(logistic回归)即对数几率回归,它虽然被称作“回归”,但却是一种用于二分类的分类方法。逻辑回归是通过分析一个样本被分为各个类的概率比较后得出该样本最有可能属于的类的一种分类方法。
逻辑回归公式推导
训练、测试过程简述
代码实现
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import math
import numpy as np
'''定义sigmoid函数'''
def sigmoid(x):
if x>0:
return 1.0/(1.0+np.exp(-x))
else:
return np.exp(x)/(1.0+np.exp(x))
class LogisticRegression:
def __init__(self,learning_rate=0.01,num_iterations=1000):
self.learning_rate=learning_rate
self.num_iterations=num_iterations
self.weights=None
self.bias=None
def fit(self,X,y,weights,bias=0):
#num_samples是样本的数量,num_features是样本特征的数量
num_samples,num_features=X.shape
self.weights=weights
self.bias=bias
for _ in range(self.num_iterations):
y_pred=[]
for x in X:
dot=np.dot(x, self.weights)
linear_model = dot + self.bias
y_pred.append(sigmoid(linear_model))
y_pred=np.array(y_pred)
dw = (1 / num_samples) * np.dot(X.T, (y_pred - y))
db = (1 / num_samples) * np.sum(y_pred - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict_prob(self, X):
y_pred = []
for x in X:
dot = np.dot(x, self.weights)
linear_model = dot + self.bias
y_pred.append(sigmoid(linear_model))
y_pred = np.array(y_pred)
return y_pred
def predict(self, X, threshold=0.5):
y_pred_prob = self.predict_prob(X)
y_pred = np.zeros_like(y_pred_prob)
y_pred[y_pred_prob >= threshold] = 1
return y_pred
def calculate_accuracy(self,y_pred,y_test):
sum=0
for p,t in zip(y_pred,y_test):
if p==t:
sum+=1
return float(sum)/float(len(y_pred))
if __name__ == "__main__":
l=LogisticRegression()
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
num_samples, num_features = X.shape
#w = np.ones(num_features)
w = np.zeros(num_features)
l.fit(X_train,y_train,w)
y_pred=l.predict(X_test)
print(y_pred)
print(y_test)
print("准确率:",l.calculate_accuracy(y_pred,y_test))