K-Means
# 使用K-means进行聚类,和这个参数random_state相关 kmeans = KMeans(n_clusters=4, random_state=42)
random_state
参数是用于控制随机数生成器的种子(seed),用于确定初始质心的随机选择方式。通过指定random_state
参数,可以使每次运行KMeans算法时得到相同的初始质心,从而得到可重复的聚类结果。K-means准确率: 28.57%
K-means准确率: 19.05%
SVM
svm准确率: 100.00%
[0 0 0 0 1 1 2 2 2 2 2 2 2 2 2 2 2 3 3 3 3]
svm准确率: 90.48%
[0 0 0 2 1 1 2 0 2 2 2 2 2 2 2 2 2 3 3 3 3]
SVM[80%,20%]
svm_pre准确率: 40.00%
svm_pre准确率: 60.00%
Gauss
Gauss准确率: 23.81%
Gauss准确率: 19.05%
Bayes_Gauss
Bayes_Gauss准确率: 23.81%
Bayes_Gauss准确率: 14.29%
''' Description: henggao_note version: v1.0.0 Date: 2023-07-09 10:36:58 LastEditors: henggao LastEditTime: 2023-07-09 17:11:02 ''' from sklearn.mixture import BayesianGaussianMixture from sklearn.mixture import GaussianMixture from sklearn.preprocessing import StandardScaler from sklearn.model_selection import cross_val_score from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn.svm import SVC import pandas as pd import numpy as np from sklearn.cluster import KMeans import matplotlib.pyplot as plt # 1. 读取Excel文件,假设数据位于名为"Sheet1"的工作表中 dataframe = pd.read_excel('data230709.xlsx', sheet_name='Sheet1') # 将数据转换为矩阵 data = dataframe.to_numpy() # print(data) # 2. Z-score标准化,创建StandardScaler对象 scaler = StandardScaler() # 对数据进行Z-score标准化 data_scaled = scaler.fit_transform(data) # 输出标准化后的数据 # print(data_scaled) # 分类结果 true_labels = [1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5] # true_labels = [1, 1, 1, 1, 2, 3, 5, 5, 5, 5, 4, 4, 4, 6, 6, 6, 6, 7, 7, 7, 7] def k_means(data, true_labels): # 创建KMeans聚类器对象,指定要分成的簇数为4 kmeans_labels = [] # 使用K-means进行聚类 kmeans = KMeans(n_clusters=4, random_state=42) kmeans.fit(data) # 获取聚类结果 kmeans_labels = kmeans.labels_ # 计算准确率 accuracy = accuracy_score(true_labels, kmeans_labels) print("K-means准确率: {:.2f}%".format(accuracy * 100)) # 绘制数据点 plt.figure(figsize=(8, 6)) # 绘制真实类别 plt.scatter(data[:, 0], data[:, 1], c=true_labels, cmap='Set1', label='True Labels') # 绘制K-means聚类结果 plt.scatter(data[:, 0], data[:, 1], c=kmeans_labels, cmap='viridis', label='K-means Clustering') plt.title('Comparison of True Labels and K-means Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.legend() plt.show() # k_means(data) # k_means(data_scaled) def svm_demo(data, true_labels): # 多类别支持向量聚类,我们将decision_function_shape参数设置为ovr(一对多)。 # 使用SVC进行多类别支持向量聚类 svm = SVC(kernel='linear', decision_function_shape='ovr') svm.fit(data, true_labels) # 预测类别 predicted_labels = svm.predict(data) print(predicted_labels) # 计算准确率 accuracy = accuracy_score(true_labels, predicted_labels) print("svm准确率: {:.2f}%".format(accuracy * 100)) # 绘制数据点 plt.figure(figsize=(8, 6)) scatter = plt.scatter(data[:, 0], data[:, 1], c=predicted_labels, cmap='viridis') plt.title('Support Vector Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.colorbar(scatter, shrink=0.8) # 添加颜色条 plt.show() # svm_demo(data, true_labels) # svm_demo(data_scaled, true_labels) def svm_demo_predict(data, true_labels): # 将数据分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split( data, true_labels, test_size=0.2, random_state=42) # 使用SVC进行多类别支持向量聚类 svm = SVC(kernel='linear', decision_function_shape='ovr') svm.fit(X_train, y_train) # 在测试集上进行预测 y_pred = svm.predict(X_test) # 计算准确率 accuracy = accuracy_score(y_test, y_pred) # print("准确率:", accuracy) print("svm准确率: {:.2f}%".format(accuracy * 100)) # svm_demo_predict(data) # svm_demo_predict(data_scaled) def gauss_demo(data, true_labels): # 创建高斯混合模型并拟合数据 gmm = GaussianMixture(n_components=4) gmm.fit(data) # 预测数据的聚类标签 labels = gmm.predict(data) # 计算准确率 accuracy = accuracy_score(true_labels, labels) # print("准确率:", accuracy) print("Gauss准确率: {:.2f}%".format(accuracy * 100)) # 绘制数据点和聚类结果 plt.figure(figsize=(8, 6)) plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis') plt.title('GMM Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show() # gauss_demo(data,true_labels) # gauss_demo(data_scaled,true_labels) def bayes_gauss(data, true_labels): # 创建BGMM对象 bgmm = BayesianGaussianMixture(n_components=5, covariance_type='full') # 对数据进行聚类分析 bgmm.fit(data) # 预测数据的聚类标签 labels = bgmm.predict(data) # 计算准确率 accuracy = accuracy_score(true_labels, labels) # print("准确率:", accuracy) print("Bayes_Gauss准确率: {:.2f}%".format(accuracy * 100)) # 绘制数据点和聚类结果 plt.figure(figsize=(8, 6)) plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis') plt.title('BGMM Clustering') plt.xlabel('Feature 1') plt.ylabel('Feature 2') plt.show() bayes_gauss(data,true_labels) bayes_gauss(data_scaled,true_labels)v