RasterProcessTool/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu

635 lines
21 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <cstdio>
#include <cufft.h>
#include <cmath>
#include <cuda_runtime.h>
#include <iostream>
#include <memory>
#include <vector>
#include <cmath>
#include <complex>
#include <device_launch_parameters.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include <cufft.h>
#include <cufftw.h>
#include <cufftXt.h>
#include <cublas_v2.h>
#include <cuComplex.h>
#include "BaseConstVariable.h"
#include "GPUTool.cuh"
#include "GPUBPTool.cuh"
#include "BPBasic0_CUDA.cuh"
#include <PrintMsgToQDebug.h>
#define c LIGHTSPEED
__global__ void phaseCompensationKernel(cufftComplex* phdata, const double* Freq, double r, int K, int Na) {
int freqIdx = blockIdx.x * blockDim.x + threadIdx.x;
int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (freqIdx >= K || pulseIdx >= Na) return;
int idx = pulseIdx * K + freqIdx;
double phase = 4 * PI * Freq[freqIdx] * r / c;
double cos_phase = cos(phase);
double sin_phase = sin(phase);
cufftComplex ph = phdata[idx];
double new_real = ph.x * cos_phase - ph.y * sin_phase;
double new_imag = ph.x * sin_phase + ph.y * cos_phase;
phdata[idx] = make_cuComplex(new_real, new_imag);
}
__global__ void fftshiftKernel(cufftComplex* data, int Nfft, int Np) {
int pulse = blockIdx.x * blockDim.x + threadIdx.x;
if (pulse >= Np) return;
int half = Nfft / 2;
for (int i = 0; i < half; ++i) {
cufftComplex temp = data[pulse * Nfft + i];
data[pulse * Nfft + i] = data[pulse * Nfft + i + half];
data[pulse * Nfft + i + half] = temp;
}
}
__global__ void processPulseKernel(
long prfid,
int nx, int ny,
const double* x_mat, const double* y_mat, const double* z_mat,
double AntX, double AntY, double AntZ,
double R0, double minF,
const cufftComplex* rc_pulse,
const double r_start, const double dr, const int nR,
cufftComplex* im_final
) {
//
long long idx = blockIdx.x * blockDim.x + threadIdx.x;
long long pixelcount = nx * ny;
if (idx >= pixelcount) return;
//printf("processPulseKernel start!!\n");
//if (x >= nx || y >= ny) return;
//int idx = x * ny + y;
double dx = AntX - x_mat[idx];
double dy = AntY - y_mat[idx];
double dz = AntZ - z_mat[idx];
//printf("processPulseKernel xmat !!\n");
double R = sqrt(dx * dx + dy * dy + dz * dz);
double dR = R - R0;
if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return;
// Linear interpolation
double pos = (dR - r_start) / dr;
int index = (int)floor(pos);
double weight = pos - index;
if (index < 0 || index >= nR - 1) return;
cufftComplex rc_low = rc_pulse[prfid * nR +index];
cufftComplex rc_high = rc_pulse[prfid * nR+index + 1];
cufftComplex rc_interp;
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
// Phase correction
double phase = 4 * PI * minF / c * dR;
double cos_phase = cos(phase);
double sin_phase = sin(phase);
cufftComplex phCorr;
phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase;
phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase;
// Accumulate
im_final[idx].x += phCorr.x;
im_final[idx].y += phCorr.y;
//printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR);
if (abs(phCorr.x) > 1e-100 || abs(phCorr.y > 1e-100)) {
//printf(
// "[DEBUG] prfid=%-4ld | idx=%-8lld\n"
// " Ant: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
// " Pix: X=%-18.10e Y=%-18.10e Z=%-18.10e\n"
// " R=%-18.10e|dR=%-18.10e | pos=%-8.4f[%-6d+%-8.6f]\n"
// " RC: low=(%-18.10e,%-18.10e) high=(%-18.10e,%-18.10e)\n"
// " => interp=(%-18.10e,%-18.10e)\n"
// " Phase: val=%-18.10e | corr=(%-18.10e,%-18.10e)\n"
// " Final: im=(%-18.10e,%-18.10e)\n"
// "----------------------------------------\n",
// prfid, idx,
// AntX, AntY, AntZ,
// x_mat[idx], y_mat[idx], z_mat[idx],
// R,dR,
// pos, index, weight,
// rc_low.x, rc_low.y,
// rc_high.x, rc_high.y,
// rc_interp.x, rc_interp.y,
// phase,
// phCorr.x, phCorr.y,
// im_final[idx].x, im_final[idx].y
//);
}
}
void bpBasic0CUDA(GPUDATA& data, int flag,double* h_R) {
// Phase compensation
if (flag == 1) {
dim3 block(16, 16);
dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16);
phaseCompensationKernel << <grid, block >> > (data.phdata, data.Freq, data.R0, data.K, data.Np);
PrintLasterError("bpBasic0CUDA Phase compensation");
//data.R0 = data.r; // <20><><EFBFBD><EFBFBD>data.r<><72><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
}
// FFT<46><54><EFBFBD><EFBFBD>
cufftHandle plan;
cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np);
cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE);
cufftDestroy(plan);
// FFT<46><54>λ
dim3 blockShift(256);
dim3 gridShift((data.Np + 255) / 256);
fftshiftKernel << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
PrintLasterError("bpBasic0CUDA Phase FFT Process");
printfinfo("fft finished!!\n");
// ͼ<><CDBC><EFBFBD>ؽ<EFBFBD>
double r_start = data.r_vec[0];
double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
printfinfo("dr = %f\n",dr);
long pixelcount = data.nx* data.ny;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printfinfo("grid finished!!\n");
//double* d_R = (double*)mallocCUDADevice(sizeof(double) * data.nx * data.ny);
printfinfo("r_start=%e;dr=%e;nR=%d\n", r_start, dr, data.Nfft);
printfinfo("BPimage .....\n");
for (long ii = 0; ii < data.Np; ++ii) {
processPulseKernel << <grid_size, BLOCK_SIZE >> > (
ii,
data.nx, data.ny,
data.x_mat, data.y_mat, data.z_mat,
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
data.R0, data.minF[ii],
data.phdata,
r_start, dr, data.Nfft,
data.im_final
//,d_R
);
if (ii % 10000 == 0) {
printfinfo("PRF[%f]: %d / %d", ii * 100.0 / data.Np, ii, data.Np);
}
}
//FreeCUDADevice(d_R);
PrintLasterError("bpBasic0CUDA Phase BPimage Process finished!!");
}
void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
d_data.AntX =h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np);
d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny);
//HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
//HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n");
HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n");
HostToDevice(h_data.z_mat, d_data.z_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n");
HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft);
//HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n");
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n");
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
d_data.Nfft = h_data.Nfft;
d_data.K = h_data.K;
d_data.Np = h_data.Np;
d_data.nx = h_data.nx;
d_data.ny = h_data.ny;
d_data.R0 = h_data.R0;
d_data.deltaF = h_data.deltaF;
}
void freeGPUData(GPUDATA& d_data) {
//FreeCUDADevice((d_data.AntX));
//FreeCUDADevice((d_data.AntY));
//FreeCUDADevice((d_data.AntZ));
//FreeCUDADevice((d_data.minF));
FreeCUDADevice((d_data.x_mat));
FreeCUDADevice((d_data.y_mat));
FreeCUDADevice((d_data.z_mat));
//FreeCUDADevice((d_data.r_vec));
FreeCUDADevice((d_data.Freq));
FreeCUDADevice((d_data.phdata));
FreeCUDADevice((d_data.im_final));
}
void freeHostData(GPUDATA& h_data) {
//FreeCUDAHost((h_data.AntX));
//FreeCUDAHost((h_data.AntY));
//FreeCUDAHost((h_data.AntZ));
FreeCUDAHost((h_data.minF));
//FreeCUDAHost((h_data.x_mat));
//FreeCUDAHost((h_data.y_mat));
//FreeCUDAHost((h_data.z_mat));
FreeCUDAHost((h_data.r_vec));
FreeCUDAHost((h_data.Freq));
FreeCUDAHost((h_data.phdata));
FreeCUDAHost((h_data.im_final));
}
void BPBasic0(GPUDATA& h_data)
{
GPUDATA d_data;
initGPUData(h_data, d_data);
bpBasic0CUDA(d_data, 0);
DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny);
freeGPUData(d_data);
}
/** <20><><EFBFBD><EFBFBD><EFBFBD>ȴ<EFBFBD><C8B4><EFBFBD>**********************************************************************************************/
__global__ void phaseCompensationKernel_single(cufftComplex* phdata, const float* Freq, float r, int K, int Na) {
int freqIdx = blockIdx.x * blockDim.x + threadIdx.x;
int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (freqIdx >= K || pulseIdx >= Na) return;
int idx = pulseIdx * K + freqIdx;
float phase = 4 * PI * Freq[freqIdx] * r / c;
float cos_phase = cosf(phase);
float sin_phase = sinf(phase);
cufftComplex ph = phdata[idx];
float new_real = ph.x * cos_phase - ph.y * sin_phase;
float new_imag = ph.x * sin_phase + ph.y * cos_phase;
phdata[idx] = make_cuComplex(new_real, new_imag);
}
__global__ void fftshiftKernel_single(cufftComplex* data, int Nfft, int Np) {
int pulse = blockIdx.x * blockDim.x + threadIdx.x;
if (pulse >= Np) return;
int half = Nfft / 2;
for (int i = 0; i < half; ++i) {
cufftComplex temp = data[pulse * Nfft + i];
data[pulse * Nfft + i] = data[pulse * Nfft + i + half];
data[pulse * Nfft + i + half] = temp;
}
}
__global__ void processPulseKernel_single(
long prfid,
int nx, int ny,
const float* x_mat, const float* y_mat, const float* z_mat,
float AntX, float AntY, float AntZ,
float R0, float minF,
const cufftComplex* rc_pulse,
const float r_start, const float dr, const int nR,
cufftComplex* im_final
) {
//
long long idx = blockIdx.x * blockDim.x + threadIdx.x;
long long pixelcount = nx * ny;
if (idx >= pixelcount) return;
//printf("processPulseKernel start!!\n");
//if (x >= nx || y >= ny) return;
//int idx = x * ny + y;
float dx = AntX - x_mat[idx];
float dy = AntY - y_mat[idx];
float dz = AntZ - z_mat[idx];
//printf("processPulseKernel xmat !!\n");
float R = sqrtf(dx * dx + dy * dy + dz * dz);
float dR = R - R0;
if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return;
// Linear interpolation
float pos = (dR - r_start) / dr;
int index = (int)floorf(pos);
float weight = pos - index;
if (index < 0 || index >= nR - 1) return;
cufftComplex rc_low = rc_pulse[prfid * nR + index];
cufftComplex rc_high = rc_pulse[prfid * nR + index + 1];
cufftComplex rc_interp;
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
// Phase correction
float phase = 4 * PI * minF / c * dR;
float cos_phase = cosf(phase);
float sin_phase = sinf(phase);
cufftComplex phCorr;
phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase;
phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase;
// Accumulate
im_final[idx].x += phCorr.x;
im_final[idx].y += phCorr.y;
//printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR);
}
void bpBasic0CUDA_single(GPUDATA_single& data, int flag, float* h_R)
{
// Phase compensation
if (flag == 1) {
dim3 block(16, 16);
dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16);
phaseCompensationKernel_single << <grid, block >> > (data.phdata, data.Freq, data.R0, data.K, data.Np);
PrintLasterError("bpBasic0CUDA Phase compensation");
//data.R0 = data.r; // <20><><EFBFBD><EFBFBD>data.r<><72><EFBFBD><EFBFBD>ȷ<EFBFBD><C8B7><EFBFBD><EFBFBD>
}
// FFT<46><54><EFBFBD><EFBFBD>
cufftHandle plan;
cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np);
cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE);
cufftDestroy(plan);
// FFT<46><54>λ
dim3 blockShift(256);
dim3 gridShift((data.Np + 255) / 256);
fftshiftKernel_single << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
PrintLasterError("bpBasic0CUDA Phase FFT Process");
printfinfo("fft finished!!\n");
// ͼ<><CDBC><EFBFBD>ؽ<EFBFBD>
float r_start = data.r_vec[0];
float dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
printfinfo("dr = %f\n", dr);
long pixelcount = data.nx * data.ny;
long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
printfinfo("grid finished!!\n");
//float* d_R = (float*)mallocCUDADevice(sizeof(float) * data.nx * data.ny);
printfinfo("r_start=%e;dr=%e;nR=%d\n", r_start, dr, data.Nfft);
printfinfo("BPimage .....\n");
for (long ii = 0; ii < data.Np; ++ii) {
processPulseKernel_single << <grid_size, BLOCK_SIZE >> > (
ii,
data.nx, data.ny,
data.x_mat, data.y_mat, data.z_mat,
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
data.R0, data.minF[ii],
data.phdata,
r_start, dr, data.Nfft,
data.im_final
//,d_R
);
PrintLasterError("processPulseKernel");
if (ii % 1000 == 0) {
printfinfo("\rPRF(%f %) %d / %d\t\t\t\t", (ii * 100.0 / data.Np), ii, data.Np);
}
}
//FreeCUDADevice(d_R);
PrintLasterError("bpBasic0CUDA Phase BPimage Process finished!!");
}
void initGPUData_single(GPUDATA_single& h_data, GPUDATA_single& d_data)
{
d_data.AntX = h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
d_data.x_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny);
d_data.y_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny);
d_data.z_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny);
d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
d_data.Freq = (float*)mallocCUDADevice(sizeof(float) * h_data.Nfft);
d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np);
d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny);
//HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
//HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
//HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
HostToDevice(h_data.x_mat, d_data.x_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n");
HostToDevice(h_data.y_mat, d_data.y_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n");
HostToDevice(h_data.z_mat, d_data.z_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n");
HostToDevice(h_data.Freq, d_data.Freq, sizeof(float) * h_data.Nfft);
//HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n");
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n");
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
d_data.Nfft = h_data.Nfft;
d_data.K = h_data.K;
d_data.Np = h_data.Np;
d_data.nx = h_data.nx;
d_data.ny = h_data.ny;
d_data.R0 = h_data.R0;
d_data.deltaF = h_data.deltaF;
}
void freeGPUData_single(GPUDATA_single& d_data)
{
//FreeCUDADevice((d_data.AntX));
//FreeCUDADevice((d_data.AntY));
//FreeCUDADevice((d_data.AntZ));
//FreeCUDADevice((d_data.minF));
FreeCUDADevice((d_data.x_mat));
FreeCUDADevice((d_data.y_mat));
FreeCUDADevice((d_data.z_mat));
//FreeCUDADevice((d_data.r_vec));
FreeCUDADevice((d_data.Freq));
FreeCUDADevice((d_data.phdata));
FreeCUDADevice((d_data.im_final));
}
void freeHostData_single(GPUDATA_single& h_data)
{
//FreeCUDAHost((h_data.AntX));
//FreeCUDAHost((h_data.AntY));
//FreeCUDAHost((h_data.AntZ));
FreeCUDAHost((h_data.minF));
//FreeCUDAHost((h_data.x_mat));
//FreeCUDAHost((h_data.y_mat));
//FreeCUDAHost((h_data.z_mat));
FreeCUDAHost((h_data.r_vec));
FreeCUDAHost((h_data.Freq));
FreeCUDAHost((h_data.phdata));
FreeCUDAHost((h_data.im_final));
}
void BPBasic0_single(GPUDATA_single& h_data)
{
GPUDATA_single d_data;
initGPUData_single(h_data, d_data);
bpBasic0CUDA_single(d_data, 0);
DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny);
freeGPUData_single(d_data);
}
void copy_Host_Single_GPUData(GPUDATA_single& h_data, GPUDATA& d_data)
{
d_data.Nfft = h_data.Nfft;
d_data.K = h_data.K;
d_data.Np = h_data.Np;
d_data.nx = h_data.nx;
d_data.ny = h_data.ny;
d_data.R0 = h_data.R0;
d_data.deltaF = h_data.deltaF;
d_data.AntX = (double*)mallocCUDAHost(sizeof(double) * h_data.Np);
d_data.AntY = (double*)mallocCUDAHost(sizeof(double) * h_data.Np);
d_data.AntZ = (double*)mallocCUDAHost(sizeof(double) * h_data.Np);
d_data.r_vec = (double*)mallocCUDAHost(sizeof(double) * h_data.Nfft);
d_data.minF = (double*)mallocCUDAHost(sizeof(double) * h_data.Np);
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np);
d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny);
for (long i = 0; i < h_data.Np; i++) {
d_data.AntX[i] = h_data.AntX[i];
d_data.AntY[i] = h_data.AntY[i];
d_data.AntZ[i] = h_data.AntZ[i];
d_data.minF[i] = h_data.minF[i];
}
for (long i = 0; i < h_data.Nfft; i++) {
d_data.r_vec[i] = h_data.r_vec[i];
}
double* x_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny);
double* y_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny);
double* z_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny);
for (long i = 0; i < h_data.ny; i++) {
for (long j = 0; j < h_data.nx; j++) {
x_mat[i * h_data.nx + j] = h_data.x_mat[i * h_data.nx + j];
y_mat[i * h_data.nx + j] = h_data.y_mat[i * h_data.nx + j];
z_mat[i * h_data.nx + j] = h_data.z_mat[i * h_data.nx + j];
}
}
double* h_freq = (double*)mallocCUDAHost(sizeof(double) * h_data.Nfft);
for (long i = 0; i < h_data.Nfft; i++) {
h_freq[i] = h_data.Freq[i];
}
HostToDevice(h_freq, d_data.Freq, sizeof(double) * h_data.Nfft);
HostToDevice(x_mat, d_data.x_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n");
HostToDevice(y_mat, d_data.y_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n");
HostToDevice(z_mat, d_data.z_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n");
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n");
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n");
FreeCUDAHost(h_freq);
FreeCUDAHost(x_mat);
FreeCUDAHost(y_mat);
FreeCUDAHost(z_mat);
}
//int main() {
// GPUDATA h_data, d_data;
//
// // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// h_data.Nfft = 1024;
// h_data.K = 512;
// // ... <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼ<EFBFBD><CABC>
//
// // <20><>ʼ<EFBFBD><CABC><EFBFBD><EFBFBD>ڴ<EFBFBD>
// initGPUData(h_data, d_data);
//
// // ִ<><D6B4><EFBFBD>
// bpBasic0CUDA(d_data, 0);
//
// // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// cudaCheckError(cudaMemcpy(h_data.im_final, d_data.im_final,
// sizeof(cufftComplex) * h_data.nx * h_data.ny, cudaMemcpyDeviceToHost));
//
// // <20>ͷ<EFBFBD><CDB7><EFBFBD>Դ
// freeGPUData(d_data);
//
// return 0;
//}