Python实现逻辑回归Logistic Regression


import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

data = load_iris()
iris_target = data.target
iris_features = pd.DataFrame(data=data.data, columns=data.feature_names)

## 选择类别为0和1的样本
iris_features_part = iris_features.iloc[:100]
iris_target_part = iris_target[:100]

## 将数据集划分为训练集与测试集
x_train, x_test, y_train, y_test = train_test_split(iris_features_part, iris_target_part, test_size=0.2, random_state=2020)

## 模型搭建
model = LogisticRegression()
model.fit(x_train, y_train)

## 查看w0
print("The intercept(w0) of Logistic Regression:", model.intercept_)
## 查看w
print("The weight of Logistic Regression:", model.coef_)
print("The score of Logistic Regression:", model.score(x_train, y_train))

print("n", "=*" * 30, "n")
## 用训练好的模型进行预测
y_predict = model.predict(x_test)

## 用accuracy(准确度)预测正确的样本数占总预测样本数目的比例，评估模型效果
print("The accuracy of the Logistic Regression:", metrics.accuracy_score(y_test, y_predict))
## 准确度为1，代表所有的样本都预测正确

## 查看混淆矩阵 (预测值和真实值的各类情况统计矩阵)
confusion_matrix_result = metrics.confusion_matrix(y_predict, y_test)
print("The confusion matrix result:n", confusion_matrix_result)
df = pd.DataFrame()
df["预测值"] = list(y_predict)
df["实际值"] = list(y_test)
df["是否预测正确"] = df["预测值"] == df["实际值"]
print(df)

## 利用热力图对结果进行可视化
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix_result, annot=True, cmap="Blues")
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.show()