修复一维fftshift 函数
parent
7557d35968
commit
982922506c
|
@ -213,6 +213,45 @@ __global__ void CUDA_GridPoint_Linear_Interp1(float* v, float* q, float* qv, lon
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// 一维FFTShift核函数
|
||||
__global__ void fftshift_1d_kernel(cuComplex* data, int batch_size, int signal_length) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (idx >= batch_size * signal_length) return;
|
||||
int batch_id = idx / signal_length;
|
||||
int signal_id = idx % signal_length;
|
||||
|
||||
int half = (signal_length + 1) / 2; // 兼容奇偶长度
|
||||
if (signal_id >= half) return;
|
||||
|
||||
int new_pos = (signal_id + half) % signal_length;
|
||||
int src_idx = batch_id * signal_length + new_pos;
|
||||
// 数据交换
|
||||
|
||||
cuComplex temp = data[idx];
|
||||
data[idx] = data[src_idx];
|
||||
data[src_idx] = temp;
|
||||
|
||||
}
|
||||
|
||||
// 批量一维FFTShift函数
|
||||
extern "C" void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length) {
|
||||
if (signal_length <= 1) return; // 无需处理
|
||||
|
||||
// 启动核函数
|
||||
int total_elements = batch_size * signal_length;
|
||||
int threads_per_block = 256;
|
||||
int blocks_per_grid = (total_elements + threads_per_block - 1) / threads_per_block;
|
||||
|
||||
fftshift_1d_kernel << <blocks_per_grid, threads_per_block >> > (d_data, batch_size, signal_length);
|
||||
|
||||
// 错误检查
|
||||
PrintLasterError("FFTShift1D");
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
extern __global__ void CUDA_D_sin(double* y, double* X, int n) {
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (idx < n) {
|
||||
|
|
|
@ -108,6 +108,6 @@ extern "C" GPUBASELIBAPI void CUDAIFFTScale(cuComplex* inArr, cuComplex* outArr,
|
|||
|
||||
extern "C" GPUBASELIBAPI void CUDAIFFT(cuComplex* inArr, cuComplex* outArr, long InRowCount, long InColCount, long outColCount);
|
||||
|
||||
|
||||
extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length);
|
||||
#endif
|
||||
#endif
|
||||
|
|
|
@ -126,7 +126,7 @@ void CreatePixelXYZ(std::shared_ptr<EchoL0Dataset> echoL0ds, QString outPixelXYZ
|
|||
d_AntDirectX.get(), d_AntDirectY.get(), d_AntDirectZ.get(),
|
||||
d_demx.get(), d_demy.get(), d_demz.get(),
|
||||
prfcount, tempechocol, 1000,
|
||||
Rnear, dx, refRange
|
||||
Rnear+dx* startcolidx, dx, refRange
|
||||
);
|
||||
|
||||
DeviceToHost(h_demx.get(), d_demx.get(), sizeof(double) * prfcount * tempechocol);
|
||||
|
@ -467,6 +467,8 @@ void TBPImageAlgCls::EchoFreqToTime( )
|
|||
HostToDevice(host_IFFTechoArr.get(), device_IFFTechoArr.get(), sizeof(cuComplex) * tempechoBlockline * outColCount);
|
||||
CUDAIFFT(device_echoArr.get(), device_IFFTechoArr.get(), tempechoBlockline, outColCount, outColCount);
|
||||
|
||||
FFTShift1D(device_IFFTechoArr.get(), tempechoBlockline, outColCount);
|
||||
|
||||
DeviceToHost(host_IFFTechoArr.get(), device_IFFTechoArr.get(), sizeof(cuComplex) * tempechoBlockline * outColCount);
|
||||
|
||||
#pragma omp parallel for
|
||||
|
|
Loading…
Reference in New Issue