论文实现方法(基于MNIST数据集)
对训练集的图像进行随机切块处理,将每张图像随机切割为20张大小为6*6的块
def randomcut(x_train,k,size): #随机切片成size*size大小的图片
m,nx,ny=x_train.shape
x=np.zeros((m,k,size,size))
for i in range(m):
for j in range(k):
idx=np.random.randint(0,27-size)
idy=np.random.randint(0,27-size)
x[i][j]= x_train[i][idx:idx+size,idy:idy+size]
return x
对数据进行预处理(均值归一化),其公式为$x=\frac{x-u}{s}$ u为均值,s为标准差
def init(x_train): # 进行数据预处理
u = np.mean(x_train, axis=0) # 计算均值
s = np.std(x_train, axis=0)
x_train = (x_train - u) / s
return x_train
对随机切片的数据集用kmeans算法进行聚类,聚成30类
def init_c(x,k): #初始化聚,从样本中随机选择k个
m,n=x.shape
c=np.zeros((k,n))
idx=np.random.randint(0,m,k)
for i in range(k):
c[i,:]=x[idx[i],:]
return c
计算$\min_{k}||x^{(i)}-c_k||^2$
def compute_c(x,c): #计算每个样本到所有聚的距离的最小值
m,n=x.shape
k=c.shape[0]
idx=np.zeros(m) #idx存放的是每个样本的预测标签
for i in range(m):
min_distance=100000
for j in range(k):
distance=np.sum((x[i,:]-c[j,:])**2)
if(distance<min_distance):
min_distance = distance
idx[i]=j
return idx
def updata_c(x,k,idx): #更新聚
m,n=x.shape
c=np.zeros((k,n)) #存放的是聚
for i in range(k):
indices=np.where(idx==i) #取出所有为同一类标签的样本
c[i,:]=(np.sum(x[indices,:],axis=1)/len(indices[0])).ravel()
return c
def run_one_kmeans(x,k,repeate): #不断进行迭代,找出最优的聚中心
c=init_c(x,k) #初始化矩阵聚
m,n=x.shape
idx=np.zeros(m)
for i in range(repeate):
idx=compute_c(x,c)
c=updata_c(x,k,idx)
return idx,c
计算样本点到该样本点的聚的最小值$\min\frac{1}{m}\sum_{i=0}^{m}||x^{(i)}-c_{idx[j]}||^2$
def run_all_kmeans(x,k,repeate,r_min): #重复随机生成聚,获取最小的
m,n=x.shape
min_cost=1000000000
result_idx=np.zeros(m)
result_c=np.zeros((k,n))
for i in range(r_min):
idx,c=run_one_kmeans(x,k,repeate)
cost=0
for j in range(m):
cost+= np.sum((x[j,:] - c[int(idx[j]),:]) ** 2)
cost=cost*1.0/m
if(min_cost>cost):
min_cost=cost
result_idx=idx
result_c=c
return result_idx,result_c
对训练集的图像进行规则切块处理,将每张图像切割为64张大小为6*6的块
def rulecut(x_test,k,size): #有规律的切片,k必须为平方
m,nx,ny=x_test.shape
x=np.zeros((m,k,size,size))
for i in range(m):
for j in range(k//8):
for t in range(8):
x[i][j*8+t]=x_test[i][t*3:t*3+size,j*3:j*3+size]
return x
计算规则切割后的每张图像到聚类中心的距离,将最小的距离设为1,其他为0
def xtrain_work(x, m): # m为样本数
pool_param = {'pool_height': 4, 'pool_width': 4, 'stride': 4}
x_cut = ac.randomcut(x, 20, 6) # 随机切割,将一张图片切割为20张6*6
X_cut = ac.rulecut(x, 64, 6) # 规则切割,将一张图片切割成64张6*6
x_cut = x_cut.reshape(x_cut.shape[0] * x_cut.shape[1], x_cut.shape[2] * x_cut.shape[3])
X_cut = X_cut.reshape(X_cut.shape[0] * X_cut.shape[1], X_cut.shape[2] * X_cut.shape[3])
x_cut_init = init(x_cut) # 数据预处理
X_cut_init = init(X_cut)
jnum = 30 # 聚类数目
# result_idx,result_c=kmeans.run_all_kmeans(x_cut_init,jnum,30,10) #进行聚类
clf_kmeans = KMeans(n_clusters=jnum).fit(x_cut_init)
result_c = clf_kmeans.cluster_centers_
result = np.zeros((m, jnum, 64))
for i in range(m): # 进行转化
for j in range(64):
min = 1000000000
index = -1
for k in range(jnum):
dist = np.sum((X_cut_init[i * 64 + j] - result_c[k]) ** 2)
if min > dist:
min = dist
index = k
result[i][index][j] = 1
result = result.reshape(m, jnum, 8, 8)
x_result = average_pooling_forward(result, pool_param)
x_result = x_result.reshape(x_result.shape[0], x_result.shape[1] * x_result.shape[2] * x_result.shape[3])
return x_result, result_c
对新的数据进行池化(平均值)
def average_pooling_forward(x, pool_param): # 向前池化
N, C, H, W = x.shape
HH, WW, stride = pool_param['pool_height'], pool_param['pool_width'], pool_param['stride']
H_out = int((H - HH) / stride + 1)
W_out = int((W - WW) / stride + 1)
out = np.zeros((N, C, H_out, W_out))
for i in range(H_out):
for j in range(W_out):
x_mask = x[:, :, i * stride:i * stride + HH, j * stride:j * stride + WW]
out[:, :, i, j] = np.mean(x_mask, axis=(2, 3))
return out
对测试集的图像进行规则切块处理,将每张图像切割为64张大小为6*6的块,并计算每张图像到聚类中心的距离,将最小的距离设为1,其他为0,再进行池化
def xtest_work(x, m, result_c):
pool_param = {'pool_height': 4, 'pool_width': 4, 'stride': 4}
jnum = result_c.shape[0]
X_cut = ac.rulecut(x, 64, 6) # 规则切割,将一张图片切割成64张6*6
X_cut = X_cut.reshape(X_cut.shape[0] * X_cut.shape[1], X_cut.shape[2] * X_cut.shape[3])
X_cut_init = init(X_cut)
result = np.zeros((m, jnum, 64))
for i in range(m): # 进行转化
for j in range(64):
min = 1000000000
index = -1
for k in range(jnum):
dist = np.sum((X_cut_init[i * 64 + j] - result_c[k]) ** 2)
if min > dist:
min = dist
index = k
result[i][index][j] = 1
result = result.reshape(m, jnum, 8, 8)
x_result = average_pooling_forward(result, pool_param)
x_result = x_result.reshape(x_result.shape[0], x_result.shape[1] * x_result.shape[2] * x_result.shape[3])
return x_result
用SVM分类器进行分类
x_train_result, result_c = xtrain_work(X_train, mt)
clf = SVC(gamma='auto',kernel='rbf')
clf.fit(x_train_result, y_train)
x_test_result = xtest_work(X_test, ms, result_c)
print("SVM准确率为{}".format(clf.score(x_test_result,y_test)))
clf=tree.DecisionTreeClassifier() #决策树
clf=clf.fit(x_train_result,y_train)
print("决策树准确率为{}".format(clf.score(x_test_result,y_test)))
实验结果
实验数据采用训练集20000张图片,测试集20000张图片进行操作,通过修改聚类中心的数目,使用Scikit-Learn框架中的kmeans算法,以及使用不同的分类器等操作进行测试,可以得到不同的结果,通过统计可以得到的表格如下
通过实验可以发现,不同的聚类中心,不同的聚类算法以及不同的分类器得到的准确率不一样,总体来说用SVM的分类器算出的准确率会较高