microproduct/atmosphericDelay-C-SAR/tool/algorithm/ml/MonteCarloSampling.py

186 lines
6.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: UTF-8 -*-
"""
@Project:SalinityMain.py
@File:MonteCarloSampling.py
@Function:基于蒙特卡洛随机抽样的最优特征选择算法
@Contact:
@Author:SHJ
@Date:2021/10/19 11:30
@Version:1.0.0
"""
import numpy as np
from numpy import random
import matplotlib.pyplot as plt
import seaborn as sns
import logging
logger = logging.getLogger("mylog")
def api_sel_feature(x_list, iter=100, alpha=0.5, ts=-0.5, iter_ratio=0.2):
"""
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],
Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
:para iter: 迭代次数
:para alpha: 调节因子
:para ts: com_sep_coef的阈值
:para iter_ratio : 迭代次数阈值
:return : True-特征与类别相关度高False-特征与类别相关度低
"""
com_sep_coef_old = cal_com_sep_coef(x_list, alpha)
# print('com_sep_coef_old:', com_sep_coef_old)
if com_sep_coef_old < ts:
return False, com_sep_coef_old
X = np.zeros(1) # x_list组合为行向量X
x_len_list = [] # 记录每个类别x的位置
num_sampler = 0 # 样本总数
t = 0
flag = 0
for x in x_list:
len_x = len(x)
if t == 0:
X = x
x_len_list.append(len_x)
else:
X = np.hstack([X, x])
x_len_list.append(x_len_list[t - 1] + len_x)
num_sampler += len_x
t += 1
x_len_list.pop()
num = int(np.ceil(num_sampler / 3))
for i in range(iter):
# 生成随机数组
randmtx = np.random.rand(1, num)
randmtx_ceil = np.ceil(randmtx * num_sampler).astype(int)
randmtx_ceil = np.sort(randmtx_ceil[0, :]) - 1
# 随机取值,重排后,替换原来的数据,组成新数组
X_new_sel = X.copy()
X_new_sel[randmtx_ceil] = np.random.permutation(X[randmtx_ceil])
X_new_list = np.split(X_new_sel, x_len_list)
com_sep_coef_new = cal_com_sep_coef(X_new_list, alpha)
if com_sep_coef_new <= com_sep_coef_old:
flag += 1
# print('com_sep_coef_new:', com_sep_coef_new)
logger.info('flag:' + str(flag) +', iter:' + str(iter) + ', falg/iter:' + str(int(flag)/int(iter)))
if flag > (iter * iter_ratio):
return False, com_sep_coef_old
return True, com_sep_coef_old
def cal_com_coef(x_list):
"""
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
:return com_coef : 类内聚合因子Compactness Coefficient
"""
class_num = len(x_list)
coef_array = np.full((1, class_num), 0.0)
for m in range(class_num):
sample_num = len(x_list[m])
c = np.full((1, sample_num), 0.0)
for u in range(sample_num):
l = np.full((1, sample_num), x_list[m][u])
c[0, u] = np.sum(np.abs(l - x_list[m]))
coef_array[0, m] = np.sum(c) / (sample_num * (sample_num - 1))
com_coef = np.sum(coef_array) / class_num
return com_coef
def cal_sep_coef(x_list):
"""
:para x_list : k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
:return sep_coef : 类间离散度Separation Coefficient
"""
class_num = len(x_list)
coef_list = []
coef_sum = 0
for m in range(class_num):
xm = x_list[m]
l_xm = len(xm)
for n in range(class_num):
if not n == m:
xn = x_list[n]
l_xn = len(xn)
xm = np.expand_dims(xm, 1)
coef_list.append(np.sum(np.abs(xm - xn)) / (l_xm * l_xn))
for coef in coef_list:
coef_sum = coef_sum + coef
if class_num == 1 or class_num == 0:
sep_coef = coef_sum
else:
sep_coef = coef_sum / (class_num * (class_num - 1))
return sep_coef
def cal_com_sep_coef(x_list, alpha = 0.5):
"""
:para x_list: k类别的单个特征的训练样本 [X1,X2,X3,...,Xi,...,Xk],Xi = np.array([x1,x2,x3...xn]), 第i类别的训练样本数为n
:para alpha : 调节因子
:return com_sep_coef: 类内聚合度和类间离散度的因子Compactness- Separation Coeffcient
"""
if not alpha >= 0 and alpha <= 1:
raise ('input_para_alpha beyond (0,1)!')
com_coef = cal_com_coef(x_list)
sep_coef = cal_sep_coef(x_list)
com_sep_coef = alpha * com_coef - (1-alpha) * sep_coef
return com_sep_coef
def get_logistic_rand_number(num, u=0.4): #弃用
randmtx = np.full((1, num), 0.0)
# randmtx[0,0] = np.random.rand(1, 1) #随机初始值
randmtx[0, 0] = 0.5 #初始值
for i in range(1, num):
randmtx[0, i] = u * randmtx[0, i-1]*(1-randmtx[0, i-1])
randmtx = randmtx * 3 * num
randmtx_ceil = np.ceil(randmtx)
# 绘制随机数分布图
# randmty = np.arange(0,num,1)
# randmty = np.expand_dims( randmty, 1)
# fig, axes = plt.subplots(1, 1, figsize=(5, 5))
# axes.scatter(randmty, randmtx_ceil, alpha=.3, label='ground truth')
# axes.legend()
# plt.tight_layout()
# plt.show()
return randmtx_ceil
def test():
'''测试生成随机数'''
# 插入
# a = np.array([3.4, 2.5, 1.8, 4.7, 5.6, 2.1])
# b = np.array([2.5, 4.7, 5.6])
# c = a[[0,1]]
# a[[0,1]] = np.array([1, 1])
# 随机排列
random.shuffle()
# logist随机数
sns.distplot(random.normal(scale=2, size=1000), hist=False, label='normal')
sns.distplot(random.logistic(loc=2, scale=0.5, size=1000), hist=False, label='logistic')
plt.show()
# 绘制随机数
randmtx = random.logistic(loc=0.5, scale=0.5, size=100)
randmtx.sort(axis=0)
randmty = np.arange(0,100,1)
randmty = np.expand_dims(randmty, 1)
fig, axes = plt.subplots(1, 1, figsize=(5, 5))
axes.scatter(randmty, randmtx, alpha=.3, label='ground truth')
axes.legend()
plt.tight_layout()
plt.show()
# if __name__ == '__main__':
# 例子
# x1 = np.array([1, 1.1])
# x2 = np.array([2, 2.1, 2.2])
# x3 = np.array([3, 3.4, 3.1])
# x_list = [x1, x2, x3]
# com_sep_coef = cal_com_sep_coef(x_list, 0.5)
# flag = api_sel_feature(x_list)
# print('done')