186 lines
6.1 KiB
Python
186 lines
6.1 KiB
Python
|
# -*- coding: UTF-8 -*-
|
|||
|
"""
|
|||
|
@Project:SalinityMain.py
|
|||
|
@File:MonteCarloSampling.py
|
|||
|
@Function:基于蒙特卡洛随机抽样的最优特征选择算法
|
|||
|
@Contact:
|
|||
|
@Author:SHJ
|
|||
|
@Date:2021/10/19 11:30
|
|||
|
@Version:1.0.0
|
|||
|
"""
|
|||
|
import numpy as np
|
|||
|
from numpy import random
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
import seaborn as sns
|
|||
|
import logging
|
|||
|
logger = logging.getLogger("mylog")
|
|||
|
|
|||
|
|
|||
|
def api_sel_feature(x_list, iter=100, alpha=0.5, ts=-0.5, iter_ratio=0.2):
|
|||
|
"""
|
|||
|
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],
|
|||
|
Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
|
|||
|
:para iter: 迭代次数
|
|||
|
:para alpha: 调节因子
|
|||
|
:para ts: com_sep_coef的阈值
|
|||
|
:para iter_ratio : 迭代次数阈值
|
|||
|
:return : True-特征与类别相关度高,False-特征与类别相关度低
|
|||
|
"""
|
|||
|
com_sep_coef_old = cal_com_sep_coef(x_list, alpha)
|
|||
|
# print('com_sep_coef_old:', com_sep_coef_old)
|
|||
|
if com_sep_coef_old < ts:
|
|||
|
return False, com_sep_coef_old
|
|||
|
|
|||
|
X = np.zeros(1) # x_list组合为行向量X
|
|||
|
x_len_list = [] # 记录每个类别x的位置
|
|||
|
num_sampler = 0 # 样本总数
|
|||
|
t = 0
|
|||
|
flag = 0
|
|||
|
for x in x_list:
|
|||
|
len_x = len(x)
|
|||
|
if t == 0:
|
|||
|
X = x
|
|||
|
x_len_list.append(len_x)
|
|||
|
else:
|
|||
|
X = np.hstack([X, x])
|
|||
|
x_len_list.append(x_len_list[t - 1] + len_x)
|
|||
|
num_sampler += len_x
|
|||
|
t += 1
|
|||
|
x_len_list.pop()
|
|||
|
num = int(np.ceil(num_sampler / 3))
|
|||
|
|
|||
|
for i in range(iter):
|
|||
|
# 生成随机数组
|
|||
|
randmtx = np.random.rand(1, num)
|
|||
|
randmtx_ceil = np.ceil(randmtx * num_sampler).astype(int)
|
|||
|
randmtx_ceil = np.sort(randmtx_ceil[0, :]) - 1
|
|||
|
|
|||
|
# 随机取值,重排后,替换原来的数据,组成新数组
|
|||
|
X_new_sel = X.copy()
|
|||
|
X_new_sel[randmtx_ceil] = np.random.permutation(X[randmtx_ceil])
|
|||
|
|
|||
|
X_new_list = np.split(X_new_sel, x_len_list)
|
|||
|
com_sep_coef_new = cal_com_sep_coef(X_new_list, alpha)
|
|||
|
if com_sep_coef_new <= com_sep_coef_old:
|
|||
|
flag += 1
|
|||
|
# print('com_sep_coef_new:', com_sep_coef_new)
|
|||
|
logger.info('flag:' + str(flag) +', iter:' + str(iter) + ', falg/iter:' + str(int(flag)/int(iter)))
|
|||
|
if flag > (iter * iter_ratio):
|
|||
|
return False, com_sep_coef_old
|
|||
|
return True, com_sep_coef_old
|
|||
|
|
|||
|
def cal_com_coef(x_list):
|
|||
|
"""
|
|||
|
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
|
|||
|
:return com_coef : 类内聚合因子(Compactness Coefficient)
|
|||
|
"""
|
|||
|
class_num = len(x_list)
|
|||
|
coef_array = np.full((1, class_num), 0.0)
|
|||
|
for m in range(class_num):
|
|||
|
sample_num = len(x_list[m])
|
|||
|
c = np.full((1, sample_num), 0.0)
|
|||
|
for u in range(sample_num):
|
|||
|
l = np.full((1, sample_num), x_list[m][u])
|
|||
|
c[0, u] = np.sum(np.abs(l - x_list[m]))
|
|||
|
coef_array[0, m] = np.sum(c) / (sample_num * (sample_num - 1))
|
|||
|
com_coef = np.sum(coef_array) / class_num
|
|||
|
return com_coef
|
|||
|
|
|||
|
def cal_sep_coef(x_list):
|
|||
|
"""
|
|||
|
:para x_list : k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
|
|||
|
:return sep_coef : 类间离散度(Separation Coefficient)
|
|||
|
"""
|
|||
|
class_num = len(x_list)
|
|||
|
coef_list = []
|
|||
|
coef_sum = 0
|
|||
|
for m in range(class_num):
|
|||
|
xm = x_list[m]
|
|||
|
l_xm = len(xm)
|
|||
|
for n in range(class_num):
|
|||
|
if not n == m:
|
|||
|
xn = x_list[n]
|
|||
|
l_xn = len(xn)
|
|||
|
xm = np.expand_dims(xm, 1)
|
|||
|
coef_list.append(np.sum(np.abs(xm - xn)) / (l_xm * l_xn))
|
|||
|
for coef in coef_list:
|
|||
|
coef_sum = coef_sum + coef
|
|||
|
|
|||
|
if class_num == 1 or class_num == 0:
|
|||
|
sep_coef = coef_sum
|
|||
|
else:
|
|||
|
sep_coef = coef_sum / (class_num * (class_num - 1))
|
|||
|
return sep_coef
|
|||
|
|
|||
|
def cal_com_sep_coef(x_list, alpha = 0.5):
|
|||
|
"""
|
|||
|
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
|
|||
|
:para alpha : 调节因子
|
|||
|
:return com_sep_coef: 类内聚合度和类间离散度的因子(Compactness- Separation Coeffcient)
|
|||
|
"""
|
|||
|
if not alpha >= 0 and alpha <= 1:
|
|||
|
raise ('input_para_alpha beyond (0,1)!')
|
|||
|
com_coef = cal_com_coef(x_list)
|
|||
|
sep_coef = cal_sep_coef(x_list)
|
|||
|
com_sep_coef = alpha * com_coef - (1-alpha) * sep_coef
|
|||
|
return com_sep_coef
|
|||
|
|
|||
|
def get_logistic_rand_number(num, u=0.4): #弃用
|
|||
|
randmtx = np.full((1, num), 0.0)
|
|||
|
# randmtx[0,0] = np.random.rand(1, 1) #随机初始值
|
|||
|
randmtx[0, 0] = 0.5 #初始值
|
|||
|
|
|||
|
for i in range(1, num):
|
|||
|
randmtx[0, i] = u * randmtx[0, i-1]*(1-randmtx[0, i-1])
|
|||
|
randmtx = randmtx * 3 * num
|
|||
|
randmtx_ceil = np.ceil(randmtx)
|
|||
|
|
|||
|
# 绘制随机数分布图
|
|||
|
# randmty = np.arange(0,num,1)
|
|||
|
# randmty = np.expand_dims( randmty, 1)
|
|||
|
# fig, axes = plt.subplots(1, 1, figsize=(5, 5))
|
|||
|
# axes.scatter(randmty, randmtx_ceil, alpha=.3, label='ground truth')
|
|||
|
# axes.legend()
|
|||
|
# plt.tight_layout()
|
|||
|
# plt.show()
|
|||
|
return randmtx_ceil
|
|||
|
|
|||
|
def test():
|
|||
|
'''测试生成随机数'''
|
|||
|
# 插入
|
|||
|
# a = np.array([3.4, 2.5, 1.8, 4.7, 5.6, 2.1])
|
|||
|
# b = np.array([2.5, 4.7, 5.6])
|
|||
|
# c = a[[0,1]]
|
|||
|
# a[[0,1]] = np.array([1, 1])
|
|||
|
|
|||
|
# 随机排列
|
|||
|
random.shuffle()
|
|||
|
|
|||
|
# logist随机数
|
|||
|
sns.distplot(random.normal(scale=2, size=1000), hist=False, label='normal')
|
|||
|
sns.distplot(random.logistic(loc=2, scale=0.5, size=1000), hist=False, label='logistic')
|
|||
|
plt.show()
|
|||
|
|
|||
|
# 绘制随机数
|
|||
|
randmtx = random.logistic(loc=0.5, scale=0.5, size=100)
|
|||
|
randmtx.sort(axis=0)
|
|||
|
randmty = np.arange(0,100,1)
|
|||
|
randmty = np.expand_dims(randmty, 1)
|
|||
|
fig, axes = plt.subplots(1, 1, figsize=(5, 5))
|
|||
|
axes.scatter(randmty, randmtx, alpha=.3, label='ground truth')
|
|||
|
axes.legend()
|
|||
|
plt.tight_layout()
|
|||
|
plt.show()
|
|||
|
|
|||
|
# if __name__ == '__main__':
|
|||
|
# 例子
|
|||
|
# x1 = np.array([1, 1.1])
|
|||
|
# x2 = np.array([2, 2.1, 2.2])
|
|||
|
# x3 = np.array([3, 3.4, 3.1])
|
|||
|
# x_list = [x1, x2, x3]
|
|||
|
# com_sep_coef = cal_com_sep_coef(x_list, 0.5)
|
|||
|
# flag = api_sel_feature(x_list)
|
|||
|
# print('done')
|
|||
|
|
|||
|
|