RasterProcessTool/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu

313 lines
10 KiB
Plaintext
Raw Normal View History

2025-03-03 08:25:50 +00:00
#include <cstdio>
#include <cufft.h>
#include <cmath>
#include <cuda_runtime.h>
#include <iostream>
#include <memory>
2025-03-03 09:50:28 +00:00
#include <vector>
2025-03-03 08:25:50 +00:00
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
2025-03-03 09:50:28 +00:00
#include <cufft.h>
#include <cufftw.h>
#include <cufftXt.h>
#include <cublas_v2.h>
#include <cuComplex.h>
2025-03-03 08:25:50 +00:00
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#include "GPUBPTool.cuh"
#include "BPBasic0_CUDA.cuh"
2025-03-03 09:50:28 +00:00
__global__ void phaseCompensationKernel(cufftComplex* phdata, const double* Freq, double r, int K, int Na) {
2025-03-03 08:25:50 +00:00
int freqIdx = blockIdx.x * blockDim.x + threadIdx.x;
int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (freqIdx >= K || pulseIdx >= Na) return;
int idx = pulseIdx * K + freqIdx;
2025-03-03 09:50:28 +00:00
double phase = 4 * PI * Freq[freqIdx] * r / c;
double cos_phase = cosf(phase);
double sin_phase = sinf(phase);
2025-03-03 08:25:50 +00:00
cufftComplex ph = phdata[idx];
2025-03-03 09:50:28 +00:00
double new_real = ph.x * cos_phase - ph.y * sin_phase;
double new_imag = ph.x * sin_phase + ph.y * cos_phase;
2025-03-03 08:25:50 +00:00
phdata[idx] = make_cuComplex(new_real, new_imag);
}
__global__ void fftshiftKernel(cufftComplex* data, int Nfft, int Np) {
int pulse = blockIdx.x * blockDim.x + threadIdx.x;
if (pulse >= Np) return;
int half = Nfft / 2;
for (int i = 0; i < half; ++i) {
cufftComplex temp = data[pulse * Nfft + i];
data[pulse * Nfft + i] = data[pulse * Nfft + i + half];
data[pulse * Nfft + i + half] = temp;
}
}
2025-03-03 16:52:41 +00:00
__global__ void processPulseKernel(
long prfid,
2025-03-04 08:18:35 +00:00
int nx, int ny,
const double* x_mat, const double* y_mat, const double* z_mat,
double AntX, double AntY, double AntZ,
double R0, double minF,
const cufftComplex* rc_pulse,
const double r_start, const double dr, const int nR,
cufftComplex* im_final
) {
//
long long idx = blockIdx.x * blockDim.x + threadIdx.x;
long long pixelcount = nx * ny;
if (idx >= pixelcount) return;
2025-03-03 16:52:41 +00:00
//printf("processPulseKernel start!!\n");
//if (x >= nx || y >= ny) return;
//int idx = x * ny + y;
2025-03-03 09:50:28 +00:00
double dx = AntX - x_mat[idx];
double dy = AntY - y_mat[idx];
double dz = AntZ - z_mat[idx];
2025-03-04 08:18:35 +00:00
2025-03-03 16:52:41 +00:00
//printf("processPulseKernel xmat !!\n");
2025-03-03 09:50:28 +00:00
double dR = sqrtf(dx * dx + dy * dy + dz * dz) - R0;
2025-03-04 08:18:35 +00:00
2025-03-03 08:25:50 +00:00
if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return;
// Linear interpolation
2025-03-03 09:50:28 +00:00
double pos = (dR - r_start) / dr;
2025-03-04 08:18:35 +00:00
int index = (int)floor(pos);
2025-03-03 09:50:28 +00:00
double weight = pos - index;
2025-03-03 08:25:50 +00:00
if (index < 0 || index >= nR - 1) return;
2025-03-03 16:52:41 +00:00
cufftComplex rc_low = rc_pulse[prfid * nR +index];
cufftComplex rc_high = rc_pulse[prfid * nR+index + 1];
2025-03-03 08:25:50 +00:00
cufftComplex rc_interp;
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
// Phase correction
2025-03-04 08:18:35 +00:00
double phase = 4 * PI * minF / c * dR;
double cos_phase = cos(phase);
double sin_phase = sin(phase);
2025-03-03 08:25:50 +00:00
cufftComplex phCorr;
phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase;
phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase;
// Accumulate
im_final[idx].x += phCorr.x;
im_final[idx].y += phCorr.y;
2025-03-04 08:18:35 +00:00
//printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR);
if (abs(phCorr.x) > 1e-14 || abs(phCorr.y > 1e-14)) {
printf(
"[DEBUG] prfid=%-4ld | idx=%-8lld\n"
" Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
" Pix: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
" dR=%-18.10e | pos=%-8.4f[%-6d+%-8.6f]\n"
" RC: low=(%-18.10e,%-18.10e) high=(%-18.10e,%-18.10e)\n"
" => interp=(%-18.10e,%-18.10e)\n"
" Phase: val=%-18.10e | corr=(%-18.10e,%-18.10e)\n"
" Final: im=(%-18.10e,%-18.10e)\n"
"----------------------------------------\n",
prfid, idx,
AntX, AntY, AntZ,
x_mat[idx], y_mat[idx], z_mat[idx],
dR,
pos, index, weight,
rc_low.x, rc_low.y,
rc_high.x, rc_high.y,
rc_interp.x, rc_interp.y,
phase,
phCorr.x, phCorr.y,
im_final[idx].x, im_final[idx].y
);
}
2025-03-03 08:25:50 +00:00
}
2025-03-04 08:18:35 +00:00
void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) {
2025-03-03 08:25:50 +00:00
// Phase compensation
if (flag == 1) {
dim3 block(16, 16);
dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16);
phaseCompensationKernel << <grid, block >> > (data.phdata, data.Freq, data.R0, data.K, data.Np);
cudaCheckError(cudaDeviceSynchronize());
2025-03-03 09:50:28 +00:00
//data.R0 = data.r; // <20><><EFBFBD><EFBFBD>data.r<><72><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
2025-03-03 08:25:50 +00:00
}
// FFT<46><54><EFBFBD><EFBFBD>
cufftHandle plan;
cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np);
cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE);
cufftDestroy(plan);
// FFT<46><54>λ
dim3 blockShift(256);
dim3 gridShift((data.Np + 255) / 256);
fftshiftKernel << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
cudaCheckError(cudaDeviceSynchronize());
2025-03-03 16:52:41 +00:00
printf("fft finished!!\n");
2025-03-03 08:25:50 +00:00
// ͼ<><CDBC><EFBFBD>ؽ<EFBFBD>
2025-03-03 09:50:28 +00:00
double r_start = data.r_vec[0];
double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
2025-03-04 08:18:35 +00:00
printf("dr = %f\n",dr);
long pixelcount = data.nx* data.ny;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
2025-03-03 16:52:41 +00:00
printf("grid finished!!\n");
2025-03-03 08:25:50 +00:00
2025-03-04 08:18:35 +00:00
//double* d_R = (double*)mallocCUDADevice(sizeof(double) * data.nx * data.ny);
printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, data.Nfft);
printf("BPimage .....\n");
2025-03-03 16:52:41 +00:00
for (long ii = 0; ii < data.Np; ++ii) {
2025-03-03 08:25:50 +00:00
2025-03-03 16:52:41 +00:00
processPulseKernel << <grid_size, BLOCK_SIZE >> > (
ii,
2025-03-03 08:25:50 +00:00
data.nx, data.ny,
data.x_mat, data.y_mat, data.z_mat,
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
data.R0, data.minF[ii],
2025-03-03 16:52:41 +00:00
data.phdata,
2025-03-03 08:25:50 +00:00
r_start, dr, data.Nfft,
data.im_final
2025-03-04 08:18:35 +00:00
//,d_R
2025-03-03 08:25:50 +00:00
);
2025-03-03 16:52:41 +00:00
PrintLasterError("processPulseKernel");
if (ii % 1000==0) {
printf("\rPRF(%f %) %d / %d\t\t\t\t",(ii*100.0/data.Np), ii,data.Np);
}
2025-03-04 08:18:35 +00:00
// DeviceToHost(h_R, d_R, sizeof(double) * data.nx * data.ny);
// double minR = h_R[0], maxR = h_R[0];
// for (long i = 0; i < data.nx * data.ny; i++) {
// if (minR > h_R[i]) { minR = h_R[i]; }
// if (maxR < h_R[i]) { maxR = h_R[i]; }
// }
//printf("prfid=%d; R=[ %e , %e ]\n", ii,minR, maxR);
//break;
}
//FreeCUDADevice(d_R);
2025-03-03 16:52:41 +00:00
2025-03-03 08:25:50 +00:00
cudaCheckError(cudaDeviceSynchronize());
}
2025-03-03 09:50:28 +00:00
2025-03-03 08:25:50 +00:00
void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
2025-03-03 16:52:41 +00:00
d_data.AntX =h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
2025-03-03 09:50:28 +00:00
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
2025-03-03 16:52:41 +00:00
d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
2025-03-03 09:50:28 +00:00
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
d_data.phdata = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.K * h_data.Np);
d_data.im_final = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.nx * h_data.ny);
2025-03-03 16:52:41 +00:00
//HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
//HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
2025-03-03 09:50:28 +00:00
HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny);
HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny);
HostToDevice(h_data.z_mat, d_data.z_mat,sizeof(double) * h_data.nx * h_data.ny);
HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft);
2025-03-03 16:52:41 +00:00
//HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
2025-03-03 09:50:28 +00:00
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cufftComplex) * h_data.K * h_data.Np);
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cufftComplex) * h_data.nx * h_data.ny);
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
d_data.Nfft = h_data.Nfft;
d_data.K = h_data.K;
d_data.Np = h_data.Np;
d_data.nx = h_data.nx;
d_data.ny = h_data.ny;
d_data.R0 = h_data.R0;
d_data.deltaF = h_data.deltaF;
2025-03-03 08:25:50 +00:00
}
void freeGPUData(GPUDATA& d_data) {
2025-03-03 09:50:28 +00:00
2025-03-03 16:52:41 +00:00
//FreeCUDADevice((d_data.AntX));
//FreeCUDADevice((d_data.AntY));
//FreeCUDADevice((d_data.AntZ));
//FreeCUDADevice((d_data.minF));
2025-03-03 09:50:28 +00:00
FreeCUDADevice((d_data.x_mat));
FreeCUDADevice((d_data.y_mat));
FreeCUDADevice((d_data.z_mat));
2025-03-03 16:52:41 +00:00
//FreeCUDADevice((d_data.r_vec));
2025-03-03 09:50:28 +00:00
FreeCUDADevice((d_data.Freq));
FreeCUDADevice((d_data.phdata));
FreeCUDADevice((d_data.im_final));
2025-03-03 08:25:50 +00:00
}
2025-03-03 09:50:28 +00:00
void freeHostData(GPUDATA& h_data) {
2025-03-03 16:52:41 +00:00
//FreeCUDAHost((h_data.AntX));
//FreeCUDAHost((h_data.AntY));
//FreeCUDAHost((h_data.AntZ));
2025-03-03 09:50:28 +00:00
FreeCUDAHost((h_data.minF));
2025-03-03 16:52:41 +00:00
//FreeCUDAHost((h_data.x_mat));
//FreeCUDAHost((h_data.y_mat));
//FreeCUDAHost((h_data.z_mat));
2025-03-03 09:50:28 +00:00
FreeCUDAHost((h_data.r_vec));
FreeCUDAHost((h_data.Freq));
FreeCUDAHost((h_data.phdata));
FreeCUDAHost((h_data.im_final));
}
void BPBasic0(GPUDATA& h_data)
{
GPUDATA d_data;
initGPUData(h_data, d_data);
2025-03-03 08:25:50 +00:00
2025-03-03 09:50:28 +00:00
bpBasic0CUDA(d_data, 0);
DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny);
freeGPUData(d_data);
}
2025-03-03 08:25:50 +00:00
//int main() {
// GPUDATA h_data, d_data;
//
// // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// h_data.Nfft = 1024;
// h_data.K = 512;
// // ... <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
//
// // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>ڴ<EFBFBD>
// initGPUData(h_data, d_data);
//
// // ִ<><D6B4><EFBFBD>
// bpBasic0CUDA(d_data, 0);
//
// // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// cudaCheckError(cudaMemcpy(h_data.im_final, d_data.im_final,
// sizeof(cufftComplex) * h_data.nx * h_data.ny, cudaMemcpyDeviceToHost));
//
// // <20>ͷ<EFBFBD><CDB7><EFBFBD>Դ
// freeGPUData(d_data);
//
// return 0;
//}