共计 1430 个字符,预计需要花费 4 分钟才能阅读完成。
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
data = load_iris()
iris_target = data.target
iris_features = pd.DataFrame(data=data.data, columns=data.feature_names)
## 选择类别为 0 和 1 的样本
iris_features_part = iris_features.iloc[:100]
iris_target_part = iris_target[:100]
## 将数据集划分为训练集与测试集
x_train, x_test, y_train, y_test = train_test_split(iris_features_part, iris_target_part, test_size=0.2, random_state=2020)
## 模型搭建
model = LogisticRegression()
model.fit(x_train, y_train)
## 查看 w0
print("The intercept(w0) of Logistic Regression:", model.intercept_)
## 查看 w
print("The weight of Logistic Regression:", model.coef_)
print("The score of Logistic Regression:", model.score(x_train, y_train))
print("n", "=*" * 30, "n")
## 用训练好的模型进行预测
y_predict = model.predict(x_test)
## 用 accuracy(准确度) 预测正确的样本数占总预测样本数目的比例,评估模型效果
print("The accuracy of the Logistic Regression:", metrics.accuracy_score(y_test, y_predict))
## 准确度为 1,代表所有的样本都预测正确
## 查看混淆矩阵 (预测值和真实值的各类情况统计矩阵)
confusion_matrix_result = metrics.confusion_matrix(y_predict, y_test)
print("The confusion matrix result:n", confusion_matrix_result)
df = pd.DataFrame()
df[" 预测值 "] = list(y_predict)
df[" 实际值 "] = list(y_test)
df[" 是否预测正确 "] = df[" 预测值 "] == df[" 实际值 "]
print(df)
## 利用热力图对结果进行可视化
plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix_result, annot=True, cmap="Blues")
plt.xlabel("Predicted labels")
plt.ylabel("True labels")
plt.show()
正文完