2025-03-03 08:25:50 +00:00
|
|
|
|
#include <cstdio>
|
|
|
|
|
#include <cufft.h>
|
|
|
|
|
#include <cmath>
|
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
|
#include <iostream>
|
|
|
|
|
#include <memory>
|
2025-03-03 09:50:28 +00:00
|
|
|
|
#include <vector>
|
2025-03-03 08:25:50 +00:00
|
|
|
|
#include <cmath>
|
|
|
|
|
#include <complex>
|
|
|
|
|
#include <device_launch_parameters.h>
|
|
|
|
|
#include <cuda_runtime.h>
|
|
|
|
|
#include <cublas_v2.h>
|
|
|
|
|
#include <cuComplex.h>
|
2025-03-03 09:50:28 +00:00
|
|
|
|
#include <cufft.h>
|
|
|
|
|
#include <cufftw.h>
|
|
|
|
|
#include <cufftXt.h>
|
|
|
|
|
#include <cublas_v2.h>
|
|
|
|
|
#include <cuComplex.h>
|
2025-03-03 08:25:50 +00:00
|
|
|
|
#include "BaseConstVariable.h"
|
|
|
|
|
#include "GPUTool.cuh"
|
|
|
|
|
#include "GPUBPTool.cuh"
|
|
|
|
|
#include "BPBasic0_CUDA.cuh"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-03-03 09:50:28 +00:00
|
|
|
|
__global__ void phaseCompensationKernel(cufftComplex* phdata, const double* Freq, double r, int K, int Na) {
|
2025-03-03 08:25:50 +00:00
|
|
|
|
int freqIdx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
|
|
|
|
|
|
if (freqIdx >= K || pulseIdx >= Na) return;
|
|
|
|
|
|
|
|
|
|
int idx = pulseIdx * K + freqIdx;
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double phase = 4 * PI * Freq[freqIdx] * r / c;
|
|
|
|
|
double cos_phase = cosf(phase);
|
|
|
|
|
double sin_phase = sinf(phase);
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
cufftComplex ph = phdata[idx];
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double new_real = ph.x * cos_phase - ph.y * sin_phase;
|
|
|
|
|
double new_imag = ph.x * sin_phase + ph.y * cos_phase;
|
2025-03-03 08:25:50 +00:00
|
|
|
|
phdata[idx] = make_cuComplex(new_real, new_imag);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__global__ void fftshiftKernel(cufftComplex* data, int Nfft, int Np) {
|
|
|
|
|
int pulse = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
if (pulse >= Np) return;
|
|
|
|
|
|
|
|
|
|
int half = Nfft / 2;
|
|
|
|
|
for (int i = 0; i < half; ++i) {
|
|
|
|
|
cufftComplex temp = data[pulse * Nfft + i];
|
|
|
|
|
data[pulse * Nfft + i] = data[pulse * Nfft + i + half];
|
|
|
|
|
data[pulse * Nfft + i + half] = temp;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-03 09:50:28 +00:00
|
|
|
|
__global__ void processPulseKernel(int nx, int ny, const double* x_mat, const double* y_mat, const double* z_mat,
|
|
|
|
|
double AntX, double AntY, double AntZ, double R0, double minF,
|
|
|
|
|
const cufftComplex* rc_pulse, double r_start, double dr, int nR,
|
2025-03-03 08:25:50 +00:00
|
|
|
|
cufftComplex* im_final) {
|
|
|
|
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
|
|
|
|
|
|
if (x >= nx || y >= ny) return;
|
|
|
|
|
|
|
|
|
|
int idx = x * ny + y;
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double dx = AntX - x_mat[idx];
|
|
|
|
|
double dy = AntY - y_mat[idx];
|
|
|
|
|
double dz = AntZ - z_mat[idx];
|
|
|
|
|
double dR = sqrtf(dx * dx + dy * dy + dz * dz) - R0;
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
// Range check
|
|
|
|
|
if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return;
|
|
|
|
|
|
|
|
|
|
// Linear interpolation
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double pos = (dR - r_start) / dr;
|
2025-03-03 08:25:50 +00:00
|
|
|
|
int index = (int)floorf(pos);
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double weight = pos - index;
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
if (index < 0 || index >= nR - 1) return;
|
|
|
|
|
|
|
|
|
|
cufftComplex rc_low = rc_pulse[index];
|
|
|
|
|
cufftComplex rc_high = rc_pulse[index + 1];
|
|
|
|
|
cufftComplex rc_interp;
|
|
|
|
|
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
|
|
|
|
|
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
|
|
|
|
|
|
|
|
|
|
// Phase correction
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double phase = 4 * PI * minF * dR / c;
|
|
|
|
|
double cos_phase = cosf(phase);
|
|
|
|
|
double sin_phase = sinf(phase);
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
cufftComplex phCorr;
|
|
|
|
|
phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase;
|
|
|
|
|
phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase;
|
|
|
|
|
|
|
|
|
|
// Accumulate
|
|
|
|
|
im_final[idx].x += phCorr.x;
|
|
|
|
|
im_final[idx].y += phCorr.y;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void bpBasic0CUDA(GPUDATA& data, int flag) {
|
|
|
|
|
// Phase compensation
|
|
|
|
|
if (flag == 1) {
|
|
|
|
|
dim3 block(16, 16);
|
|
|
|
|
dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16);
|
|
|
|
|
phaseCompensationKernel << <grid, block >> > (data.phdata, data.Freq, data.R0, data.K, data.Np);
|
|
|
|
|
cudaCheckError(cudaDeviceSynchronize());
|
2025-03-03 09:50:28 +00:00
|
|
|
|
//data.R0 = data.r; // <20><><EFBFBD><EFBFBD>data.r<><72><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
|
2025-03-03 08:25:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// FFT<46><54><EFBFBD><EFBFBD>
|
|
|
|
|
cufftHandle plan;
|
|
|
|
|
cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np);
|
|
|
|
|
cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE);
|
|
|
|
|
cufftDestroy(plan);
|
|
|
|
|
|
|
|
|
|
// FFT<46><54>λ
|
|
|
|
|
dim3 blockShift(256);
|
|
|
|
|
dim3 gridShift((data.Np + 255) / 256);
|
|
|
|
|
fftshiftKernel << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
|
|
|
|
|
cudaCheckError(cudaDeviceSynchronize());
|
|
|
|
|
|
|
|
|
|
// ͼ<><CDBC><EFBFBD>ؽ<EFBFBD>
|
2025-03-03 09:50:28 +00:00
|
|
|
|
double r_start = data.r_vec[0];
|
|
|
|
|
double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
dim3 block(16, 16);
|
|
|
|
|
dim3 grid((data.nx + 15) / 16, (data.ny + 15) / 16);
|
|
|
|
|
|
|
|
|
|
for (int ii = 0; ii < data.Np; ++ii) {
|
|
|
|
|
processPulseKernel << <grid, block >> > (
|
|
|
|
|
data.nx, data.ny,
|
|
|
|
|
data.x_mat, data.y_mat, data.z_mat,
|
|
|
|
|
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
|
|
|
|
|
data.R0, data.minF[ii],
|
|
|
|
|
data.phdata + ii * data.Nfft,
|
|
|
|
|
r_start, dr, data.Nfft,
|
|
|
|
|
data.im_final
|
|
|
|
|
);
|
|
|
|
|
cudaCheckError(cudaPeekAtLastError());
|
|
|
|
|
}
|
|
|
|
|
cudaCheckError(cudaDeviceSynchronize());
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-03 09:50:28 +00:00
|
|
|
|
|
|
|
|
|
void computeRvec(GPUDATA& data) {
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>maxWr<57><72><EFBFBD><EFBFBD>Ҫ<EFBFBD>ȼ<EFBFBD><C8BC><EFBFBD>deltaF<61><46>
|
|
|
|
|
double deltaF = data.deltaF; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ
|
|
|
|
|
double maxWr = 299792458.0f / (2.0f * deltaF);
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>r_vec<65><63><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ˣ<EFBFBD>
|
|
|
|
|
double* r_vec_host=new double[data.Nfft];
|
|
|
|
|
const double step = maxWr / data.Nfft;
|
|
|
|
|
const double start = -data.Nfft / 2.0f * step;
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < data.Nfft; ++i) {
|
|
|
|
|
r_vec_host[i] = start + i * step;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
data.r_vec = r_vec_host;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
|
2025-03-03 09:50:28 +00:00
|
|
|
|
d_data.AntX = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
|
|
|
|
d_data.AntY = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
|
|
|
|
d_data.AntZ = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
|
|
|
|
d_data.minF = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
|
|
|
|
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
d_data.r_vec = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
|
|
|
|
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
|
|
|
|
d_data.phdata = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.K * h_data.Np);
|
|
|
|
|
d_data.im_final = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
|
|
|
|
|
|
|
|
|
HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
|
|
|
|
|
HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
|
|
|
|
|
HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
|
|
|
|
|
HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
|
|
|
|
|
HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
HostToDevice(h_data.z_mat, d_data.z_mat,sizeof(double) * h_data.nx * h_data.ny);
|
|
|
|
|
HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft);
|
|
|
|
|
HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
|
|
|
|
|
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cufftComplex) * h_data.K * h_data.Np);
|
|
|
|
|
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
d_data.Nfft = h_data.Nfft;
|
|
|
|
|
d_data.K = h_data.K;
|
|
|
|
|
d_data.Np = h_data.Np;
|
|
|
|
|
d_data.nx = h_data.nx;
|
|
|
|
|
d_data.ny = h_data.ny;
|
|
|
|
|
d_data.R0 = h_data.R0;
|
|
|
|
|
d_data.deltaF = h_data.deltaF;
|
2025-03-03 08:25:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void freeGPUData(GPUDATA& d_data) {
|
2025-03-03 09:50:28 +00:00
|
|
|
|
|
|
|
|
|
FreeCUDADevice((d_data.AntX));
|
|
|
|
|
FreeCUDADevice((d_data.AntY));
|
|
|
|
|
FreeCUDADevice((d_data.AntZ));
|
|
|
|
|
FreeCUDADevice((d_data.minF));
|
|
|
|
|
FreeCUDADevice((d_data.x_mat));
|
|
|
|
|
FreeCUDADevice((d_data.y_mat));
|
|
|
|
|
FreeCUDADevice((d_data.z_mat));
|
|
|
|
|
FreeCUDADevice((d_data.r_vec));
|
|
|
|
|
FreeCUDADevice((d_data.Freq));
|
|
|
|
|
FreeCUDADevice((d_data.phdata));
|
|
|
|
|
FreeCUDADevice((d_data.im_final));
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
2025-03-03 09:50:28 +00:00
|
|
|
|
void freeHostData(GPUDATA& h_data) {
|
|
|
|
|
FreeCUDAHost((h_data.AntX));
|
|
|
|
|
FreeCUDAHost((h_data.AntY));
|
|
|
|
|
FreeCUDAHost((h_data.AntZ));
|
|
|
|
|
FreeCUDAHost((h_data.minF));
|
|
|
|
|
FreeCUDAHost((h_data.x_mat));
|
|
|
|
|
FreeCUDAHost((h_data.y_mat));
|
|
|
|
|
FreeCUDAHost((h_data.z_mat));
|
|
|
|
|
FreeCUDAHost((h_data.r_vec));
|
|
|
|
|
FreeCUDAHost((h_data.Freq));
|
|
|
|
|
FreeCUDAHost((h_data.phdata));
|
|
|
|
|
FreeCUDAHost((h_data.im_final));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void BPBasic0(GPUDATA& h_data)
|
|
|
|
|
{
|
|
|
|
|
GPUDATA d_data;
|
|
|
|
|
|
|
|
|
|
initGPUData(h_data, d_data);
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
2025-03-03 09:50:28 +00:00
|
|
|
|
bpBasic0CUDA(d_data, 0);
|
|
|
|
|
|
|
|
|
|
DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny);
|
|
|
|
|
freeGPUData(d_data);
|
|
|
|
|
}
|
2025-03-03 08:25:50 +00:00
|
|
|
|
|
|
|
|
|
//int main() {
|
|
|
|
|
// GPUDATA h_data, d_data;
|
|
|
|
|
//
|
|
|
|
|
// // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
// h_data.Nfft = 1024;
|
|
|
|
|
// h_data.K = 512;
|
|
|
|
|
// // ... <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
|
|
|
|
|
//
|
|
|
|
|
// // <20><>ʼ<EFBFBD><CABC><EFBFBD>豸<EFBFBD>ڴ<EFBFBD>
|
|
|
|
|
// initGPUData(h_data, d_data);
|
|
|
|
|
//
|
|
|
|
|
// // ִ<><D6B4><EFBFBD>㷨
|
|
|
|
|
// bpBasic0CUDA(d_data, 0);
|
|
|
|
|
//
|
|
|
|
|
// // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
// cudaCheckError(cudaMemcpy(h_data.im_final, d_data.im_final,
|
|
|
|
|
// sizeof(cufftComplex) * h_data.nx * h_data.ny, cudaMemcpyDeviceToHost));
|
|
|
|
|
//
|
|
|
|
|
// // <20>ͷ<EFBFBD><CDB7><EFBFBD>Դ
|
|
|
|
|
// freeGPUData(d_data);
|
|
|
|
|
//
|
|
|
|
|
// return 0;
|
|
|
|
|
//}
|