#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "BaseConstVariable.h" #include "GPUTool.cuh" #include "GPUBPTool.cuh" #include "BPBasic0_CUDA.cuh" #define c LIGHTSPEED __global__ void phaseCompensationKernel(cufftComplex* phdata, const double* Freq, double r, int K, int Na) { int freqIdx = blockIdx.x * blockDim.x + threadIdx.x; int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y; if (freqIdx >= K || pulseIdx >= Na) return; int idx = pulseIdx * K + freqIdx; double phase = 4 * PI * Freq[freqIdx] * r / c; double cos_phase = cos(phase); double sin_phase = sin(phase); cufftComplex ph = phdata[idx]; double new_real = ph.x * cos_phase - ph.y * sin_phase; double new_imag = ph.x * sin_phase + ph.y * cos_phase; phdata[idx] = make_cuComplex(new_real, new_imag); } __global__ void fftshiftKernel(cufftComplex* data, int Nfft, int Np) { int pulse = blockIdx.x * blockDim.x + threadIdx.x; if (pulse >= Np) return; int half = Nfft / 2; for (int i = 0; i < half; ++i) { cufftComplex temp = data[pulse * Nfft + i]; data[pulse * Nfft + i] = data[pulse * Nfft + i + half]; data[pulse * Nfft + i + half] = temp; } } __global__ void processPulseKernel( long prfid, int nx, int ny, const double* x_mat, const double* y_mat, const double* z_mat, double AntX, double AntY, double AntZ, double R0, double minF, const cufftComplex* rc_pulse, const double r_start, const double dr, const int nR, cufftComplex* im_final ) { // long long idx = blockIdx.x * blockDim.x + threadIdx.x; long long pixelcount = nx * ny; if (idx >= pixelcount) return; //printf("processPulseKernel start!!\n"); //if (x >= nx || y >= ny) return; //int idx = x * ny + y; double dx = AntX - x_mat[idx]; double dy = AntY - y_mat[idx]; double dz = AntZ - z_mat[idx]; //printf("processPulseKernel xmat !!\n"); double R = sqrt(dx * dx + dy * dy + dz * dz); double dR = R - R0; if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return; // Linear interpolation double pos = (dR - r_start) / dr; int index = (int)floor(pos); double weight = pos - index; if (index < 0 || index >= nR - 1) return; cufftComplex rc_low = rc_pulse[prfid * nR +index]; cufftComplex rc_high = rc_pulse[prfid * nR+index + 1]; cufftComplex rc_interp; rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight; rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight; // Phase correction double phase = 4 * PI * minF / c * dR; double cos_phase = cos(phase); double sin_phase = sin(phase); cufftComplex phCorr; phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase; phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase; // Accumulate im_final[idx].x += phCorr.x; im_final[idx].y += phCorr.y; //printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR); if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) { //printf( // "[DEBUG] prfid=%-4ld | idx=%-8lld\n" // " Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n" // " Pix: X=%-18.10e Y=%-18.10e Z=%-18.10e\n" // " R=%-18.10e|dR=%-18.10e | pos=%-8.4f[%-6d+%-8.6f]\n" // " RC: low=(%-18.10e,%-18.10e) high=(%-18.10e,%-18.10e)\n" // " => interp=(%-18.10e,%-18.10e)\n" // " Phase: val=%-18.10e | corr=(%-18.10e,%-18.10e)\n" // " Final: im=(%-18.10e,%-18.10e)\n" // "----------------------------------------\n", // prfid, idx, // AntX, AntY, AntZ, // x_mat[idx], y_mat[idx], z_mat[idx], // R,dR, // pos, index, weight, // rc_low.x, rc_low.y, // rc_high.x, rc_high.y, // rc_interp.x, rc_interp.y, // phase, // phCorr.x, phCorr.y, // im_final[idx].x, im_final[idx].y //); } } void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) { // Phase compensation if (flag == 1) { dim3 block(16, 16); dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16); phaseCompensationKernel << > > (data.phdata, data.Freq, data.R0, data.K, data.Np); PrintLasterError("bpBasic0CUDA Phase compensation"); //data.R0 = data.r; // 假设data.r已正确设置 } // FFT处理 cufftHandle plan; cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np); cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE); cufftDestroy(plan); // FFT移位 dim3 blockShift(256); dim3 gridShift((data.Np + 255) / 256); fftshiftKernel << > > (data.phdata, data.Nfft, data.Np); PrintLasterError("bpBasic0CUDA Phase FFT Process"); printf("fft finished!!\n"); // 图像重建 double r_start = data.r_vec[0]; double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1); printf("dr = %f\n",dr); long pixelcount = data.nx* data.ny; long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; printf("grid finished!!\n"); //double* d_R = (double*)mallocCUDADevice(sizeof(double) * data.nx * data.ny); printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, data.Nfft); printf("BPimage .....\n"); for (long ii = 0; ii < data.Np; ++ii) { processPulseKernel << > > ( ii, data.nx, data.ny, data.x_mat, data.y_mat, data.z_mat, data.AntX[ii], data.AntY[ii], data.AntZ[ii], data.R0, data.minF[ii], data.phdata, r_start, dr, data.Nfft, data.im_final //,d_R ); PrintLasterError("processPulseKernel"); if (ii % 1000==0) { printf("\rPRF(%f %) %d / %d\t\t\t\t",(ii*100.0/data.Np), ii,data.Np); } } //FreeCUDADevice(d_R); PrintLasterError("bpBasic0CUDA Phase BPimage Process finished!!"); } void initGPUData(GPUDATA& h_data, GPUDATA& d_data) { d_data.AntX =h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np); d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np); d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np); d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np); d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft); d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft); d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np); d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny); //HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np); //HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np); //HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np); //HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np); HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n"); HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n"); HostToDevice(h_data.z_mat, d_data.z_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n"); HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft); //HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft); HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n"); HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n"); // 拷贝标量参数 d_data.Nfft = h_data.Nfft; d_data.K = h_data.K; d_data.Np = h_data.Np; d_data.nx = h_data.nx; d_data.ny = h_data.ny; d_data.R0 = h_data.R0; d_data.deltaF = h_data.deltaF; } void freeGPUData(GPUDATA& d_data) { //FreeCUDADevice((d_data.AntX)); //FreeCUDADevice((d_data.AntY)); //FreeCUDADevice((d_data.AntZ)); //FreeCUDADevice((d_data.minF)); FreeCUDADevice((d_data.x_mat)); FreeCUDADevice((d_data.y_mat)); FreeCUDADevice((d_data.z_mat)); //FreeCUDADevice((d_data.r_vec)); FreeCUDADevice((d_data.Freq)); FreeCUDADevice((d_data.phdata)); FreeCUDADevice((d_data.im_final)); } void freeHostData(GPUDATA& h_data) { //FreeCUDAHost((h_data.AntX)); //FreeCUDAHost((h_data.AntY)); //FreeCUDAHost((h_data.AntZ)); FreeCUDAHost((h_data.minF)); //FreeCUDAHost((h_data.x_mat)); //FreeCUDAHost((h_data.y_mat)); //FreeCUDAHost((h_data.z_mat)); FreeCUDAHost((h_data.r_vec)); FreeCUDAHost((h_data.Freq)); FreeCUDAHost((h_data.phdata)); FreeCUDAHost((h_data.im_final)); } void BPBasic0(GPUDATA& h_data) { GPUDATA d_data; initGPUData(h_data, d_data); bpBasic0CUDA(d_data, 0); DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); freeGPUData(d_data); } //int main() { // GPUDATA h_data, d_data; // // // 初始化主机数据 // h_data.Nfft = 1024; // h_data.K = 512; // // ... 其他参数初始化 // // // 初始化设备内存 // initGPUData(h_data, d_data); // // // 执行算法 // bpBasic0CUDA(d_data, 0); // // // 拷贝结果回主机 // cudaCheckError(cudaMemcpy(h_data.im_final, d_data.im_final, // sizeof(cufftComplex) * h_data.nx * h_data.ny, cudaMemcpyDeviceToHost)); // // // 释放资源 // freeGPUData(d_data); // // return 0; //}