检查了fft变换的结果
parent
c9cd0c3ee1
commit
bdb256ff88
|
|
@ -252,7 +252,7 @@ extern "C" void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length)
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, long len)
|
extern "C" void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, long len)
|
||||||
{
|
{
|
||||||
for (long i = 0; i < len; i++) {
|
for (long i = 0; i < len; i++) {
|
||||||
dst[i] = make_cuComplex(src[i].real(), src[i].imag());
|
dst[i] = make_cuComplex(src[i].real(), src[i].imag());
|
||||||
|
|
@ -260,6 +260,23 @@ extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<dou
|
||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C" void HostCuComplexToshared_complexPtr( cuComplex* src, std::complex<double>* dst, long len)
|
||||||
|
{
|
||||||
|
double maxvalue = src[0].x;
|
||||||
|
for (long i = 0; i < len; i++) {
|
||||||
|
dst[i] = std::complex<double>(src[i].x, src[i].y);
|
||||||
|
if (maxvalue < src[i].x) {
|
||||||
|
maxvalue = src[i].x;
|
||||||
|
}
|
||||||
|
if (maxvalue < src[i].y) {
|
||||||
|
maxvalue = src[i].y;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("max value %e\n", maxvalue);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
extern __global__ void CUDA_D_sin(double* y, double* X, int n) {
|
extern __global__ void CUDA_D_sin(double* y, double* X, int n) {
|
||||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
if (idx < n) {
|
if (idx < n) {
|
||||||
|
|
@ -318,6 +335,11 @@ extern "C" void* mallocCUDAHost(long memsize) {
|
||||||
|
|
||||||
// 主机参数内存释放
|
// 主机参数内存释放
|
||||||
extern "C" void FreeCUDAHost(void* ptr) {
|
extern "C" void FreeCUDAHost(void* ptr) {
|
||||||
|
|
||||||
|
if (nullptr == ptr||NULL==ptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
cudaFreeHost(ptr);
|
cudaFreeHost(ptr);
|
||||||
#ifdef __CUDADEBUG__
|
#ifdef __CUDADEBUG__
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
|
|
@ -327,6 +349,8 @@ extern "C" void FreeCUDAHost(void* ptr) {
|
||||||
}
|
}
|
||||||
#endif // __CUDADEBUG__
|
#endif // __CUDADEBUG__
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
ptr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU参数内存声明
|
// GPU参数内存声明
|
||||||
|
|
@ -346,6 +370,9 @@ extern "C" void* mallocCUDADevice(long memsize) {
|
||||||
|
|
||||||
// GPU参数内存释放
|
// GPU参数内存释放
|
||||||
extern "C" void FreeCUDADevice(void* ptr) {
|
extern "C" void FreeCUDADevice(void* ptr) {
|
||||||
|
if (nullptr == ptr || NULL == ptr) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
cudaFree(ptr);
|
cudaFree(ptr);
|
||||||
#ifdef __CUDADEBUG__
|
#ifdef __CUDADEBUG__
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
|
|
@ -355,6 +382,7 @@ extern "C" void FreeCUDADevice(void* ptr) {
|
||||||
}
|
}
|
||||||
#endif // __CUDADEBUG__
|
#endif // __CUDADEBUG__
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
ptr = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// GPU 内存数据转移
|
// GPU 内存数据转移
|
||||||
|
|
|
||||||
|
|
@ -110,5 +110,6 @@ extern "C" GPUBASELIBAPI void CUDAIFFT(cuComplex* inArr, cuComplex* outArr, long
|
||||||
|
|
||||||
extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length);
|
extern "C" GPUBASELIBAPI void FFTShift1D(cuComplex* d_data, int batch_size, int signal_length);
|
||||||
extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, long len);
|
extern "C" GPUBASELIBAPI void shared_complexPtrToHostCuComplex(std::complex<double>* src, cuComplex* dst, long len);
|
||||||
|
extern "C" GPUBASELIBAPI void HostCuComplexToshared_complexPtr(cuComplex* src, std::complex<double>* dst, long len);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -52,19 +52,40 @@ __global__ void fftshiftKernel(cufftComplex* data, int Nfft, int Np) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void processPulseKernel(int nx, int ny, const double* x_mat, const double* y_mat, const double* z_mat,
|
__global__ void processPulseKernel(
|
||||||
|
long prfid,
|
||||||
|
int nx, int ny, const double* x_mat, const double* y_mat, const double* z_mat,
|
||||||
double AntX, double AntY, double AntZ, double R0, double minF,
|
double AntX, double AntY, double AntZ, double R0, double minF,
|
||||||
const cufftComplex* rc_pulse, double r_start, double dr, int nR,
|
const cufftComplex* rc_pulse, double r_start, double dr, int nR,
|
||||||
cufftComplex* im_final) {
|
cufftComplex* im_final) {
|
||||||
|
//int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
//int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
if (x >= nx || y >= ny) return;
|
if (x >= nx || y >= ny) return;
|
||||||
|
|
||||||
int idx = x * ny + y;
|
long long idx = x * ny + y;
|
||||||
|
|
||||||
|
//long long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
//long long pixelcount = nx * ny;
|
||||||
|
//if (idx >= pixelcount) return;
|
||||||
|
|
||||||
|
//printf("processPulseKernel start!!\n");
|
||||||
|
|
||||||
|
//if (x >= nx || y >= ny) return;
|
||||||
|
//int idx = x * ny + y;
|
||||||
|
|
||||||
|
|
||||||
double dx = AntX - x_mat[idx];
|
double dx = AntX - x_mat[idx];
|
||||||
double dy = AntY - y_mat[idx];
|
double dy = AntY - y_mat[idx];
|
||||||
double dz = AntZ - z_mat[idx];
|
double dz = AntZ - z_mat[idx];
|
||||||
|
|
||||||
|
//printf("processPulseKernel xmat !!\n");
|
||||||
|
|
||||||
double dR = sqrtf(dx * dx + dy * dy + dz * dz) - R0;
|
double dR = sqrtf(dx * dx + dy * dy + dz * dz) - R0;
|
||||||
|
|
||||||
// Range check
|
// Range check
|
||||||
|
|
@ -77,8 +98,9 @@ __global__ void processPulseKernel(int nx, int ny, const double* x_mat, const do
|
||||||
|
|
||||||
if (index < 0 || index >= nR - 1) return;
|
if (index < 0 || index >= nR - 1) return;
|
||||||
|
|
||||||
cufftComplex rc_low = rc_pulse[index];
|
|
||||||
cufftComplex rc_high = rc_pulse[index + 1];
|
cufftComplex rc_low = rc_pulse[prfid * nR +index];
|
||||||
|
cufftComplex rc_high = rc_pulse[prfid * nR+index + 1];
|
||||||
cufftComplex rc_interp;
|
cufftComplex rc_interp;
|
||||||
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
|
rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight;
|
||||||
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
|
rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight;
|
||||||
|
|
@ -95,6 +117,8 @@ __global__ void processPulseKernel(int nx, int ny, const double* x_mat, const do
|
||||||
// Accumulate
|
// Accumulate
|
||||||
im_final[idx].x += phCorr.x;
|
im_final[idx].x += phCorr.x;
|
||||||
im_final[idx].y += phCorr.y;
|
im_final[idx].y += phCorr.y;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void bpBasic0CUDA(GPUDATA& data, int flag) {
|
void bpBasic0CUDA(GPUDATA& data, int flag) {
|
||||||
|
|
@ -119,25 +143,34 @@ void bpBasic0CUDA(GPUDATA& data, int flag) {
|
||||||
fftshiftKernel << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
|
fftshiftKernel << <gridShift, blockShift >> > (data.phdata, data.Nfft, data.Np);
|
||||||
cudaCheckError(cudaDeviceSynchronize());
|
cudaCheckError(cudaDeviceSynchronize());
|
||||||
|
|
||||||
|
printf("fft finished!!\n");
|
||||||
// 图像重建
|
// 图像重建
|
||||||
double r_start = data.r_vec[0];
|
double r_start = data.r_vec[0];
|
||||||
double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
|
double dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1);
|
||||||
|
printf("dr finished!!\n");
|
||||||
|
size_t pixelcount = data.nx* data.ny;
|
||||||
|
size_t grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
|
printf("grid finished!!\n");
|
||||||
|
|
||||||
dim3 block(16, 16);
|
for (long ii = 0; ii < data.Np; ++ii) {
|
||||||
dim3 grid((data.nx + 15) / 16, (data.ny + 15) / 16);
|
|
||||||
|
|
||||||
for (int ii = 0; ii < data.Np; ++ii) {
|
processPulseKernel << <grid_size, BLOCK_SIZE >> > (
|
||||||
processPulseKernel << <grid, block >> > (
|
ii,
|
||||||
data.nx, data.ny,
|
data.nx, data.ny,
|
||||||
data.x_mat, data.y_mat, data.z_mat,
|
data.x_mat, data.y_mat, data.z_mat,
|
||||||
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
|
data.AntX[ii], data.AntY[ii], data.AntZ[ii],
|
||||||
data.R0, data.minF[ii],
|
data.R0, data.minF[ii],
|
||||||
data.phdata + ii * data.Nfft,
|
data.phdata,
|
||||||
r_start, dr, data.Nfft,
|
r_start, dr, data.Nfft,
|
||||||
data.im_final
|
data.im_final
|
||||||
);
|
);
|
||||||
cudaCheckError(cudaPeekAtLastError());
|
PrintLasterError("processPulseKernel");
|
||||||
|
if (ii % 1000==0) {
|
||||||
|
printf("\rPRF(%f %) %d / %d\t\t\t\t",(ii*100.0/data.Np), ii,data.Np);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
cudaCheckError(cudaDeviceSynchronize());
|
cudaCheckError(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -148,7 +181,7 @@ void computeRvec(GPUDATA& data) {
|
||||||
double maxWr = 299792458.0f / (2.0f * deltaF);
|
double maxWr = 299792458.0f / (2.0f * deltaF);
|
||||||
|
|
||||||
// 生成r_vec(主机端)
|
// 生成r_vec(主机端)
|
||||||
double* r_vec_host=new double[data.Nfft];
|
double* r_vec_host = (double*)mallocCUDAHost(sizeof(double) * data.Nfft);// new double[data.Nfft];
|
||||||
const double step = maxWr / data.Nfft;
|
const double step = maxWr / data.Nfft;
|
||||||
const double start = -data.Nfft / 2.0f * step;
|
const double start = -data.Nfft / 2.0f * step;
|
||||||
|
|
||||||
|
|
@ -163,27 +196,27 @@ void computeRvec(GPUDATA& data) {
|
||||||
|
|
||||||
|
|
||||||
void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
|
void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
|
||||||
d_data.AntX = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
d_data.AntX =h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
||||||
d_data.AntY = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
||||||
d_data.AntZ = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
||||||
d_data.minF = (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np);
|
||||||
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
||||||
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
||||||
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny);
|
||||||
d_data.r_vec = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
||||||
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft);
|
||||||
d_data.phdata = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.K * h_data.Np);
|
d_data.phdata = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.K * h_data.Np);
|
||||||
d_data.im_final = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
d_data.im_final = (cufftComplex*)mallocCUDADevice(sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
||||||
|
|
||||||
HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
|
//HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np);
|
||||||
HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
|
//HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np);
|
||||||
HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
|
//HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np);
|
||||||
HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
|
//HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np);
|
||||||
HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny);
|
HostToDevice(h_data.x_mat, d_data.x_mat,sizeof(double) * h_data.nx * h_data.ny);
|
||||||
HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny);
|
HostToDevice(h_data.y_mat, d_data.y_mat,sizeof(double) * h_data.nx * h_data.ny);
|
||||||
HostToDevice(h_data.z_mat, d_data.z_mat,sizeof(double) * h_data.nx * h_data.ny);
|
HostToDevice(h_data.z_mat, d_data.z_mat,sizeof(double) * h_data.nx * h_data.ny);
|
||||||
HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft);
|
HostToDevice(h_data.Freq, d_data.Freq, sizeof(double) * h_data.Nfft);
|
||||||
HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
|
//HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft);
|
||||||
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cufftComplex) * h_data.K * h_data.Np);
|
HostToDevice(h_data.phdata, d_data.phdata, sizeof(cufftComplex) * h_data.K * h_data.Np);
|
||||||
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
HostToDevice(h_data.im_final, d_data.im_final, sizeof(cufftComplex) * h_data.nx * h_data.ny);
|
||||||
|
|
||||||
|
|
@ -199,14 +232,14 @@ void initGPUData(GPUDATA& h_data, GPUDATA& d_data) {
|
||||||
|
|
||||||
void freeGPUData(GPUDATA& d_data) {
|
void freeGPUData(GPUDATA& d_data) {
|
||||||
|
|
||||||
FreeCUDADevice((d_data.AntX));
|
//FreeCUDADevice((d_data.AntX));
|
||||||
FreeCUDADevice((d_data.AntY));
|
//FreeCUDADevice((d_data.AntY));
|
||||||
FreeCUDADevice((d_data.AntZ));
|
//FreeCUDADevice((d_data.AntZ));
|
||||||
FreeCUDADevice((d_data.minF));
|
//FreeCUDADevice((d_data.minF));
|
||||||
FreeCUDADevice((d_data.x_mat));
|
FreeCUDADevice((d_data.x_mat));
|
||||||
FreeCUDADevice((d_data.y_mat));
|
FreeCUDADevice((d_data.y_mat));
|
||||||
FreeCUDADevice((d_data.z_mat));
|
FreeCUDADevice((d_data.z_mat));
|
||||||
FreeCUDADevice((d_data.r_vec));
|
//FreeCUDADevice((d_data.r_vec));
|
||||||
FreeCUDADevice((d_data.Freq));
|
FreeCUDADevice((d_data.Freq));
|
||||||
FreeCUDADevice((d_data.phdata));
|
FreeCUDADevice((d_data.phdata));
|
||||||
FreeCUDADevice((d_data.im_final));
|
FreeCUDADevice((d_data.im_final));
|
||||||
|
|
@ -214,13 +247,13 @@ void freeGPUData(GPUDATA& d_data) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void freeHostData(GPUDATA& h_data) {
|
void freeHostData(GPUDATA& h_data) {
|
||||||
FreeCUDAHost((h_data.AntX));
|
//FreeCUDAHost((h_data.AntX));
|
||||||
FreeCUDAHost((h_data.AntY));
|
//FreeCUDAHost((h_data.AntY));
|
||||||
FreeCUDAHost((h_data.AntZ));
|
//FreeCUDAHost((h_data.AntZ));
|
||||||
FreeCUDAHost((h_data.minF));
|
FreeCUDAHost((h_data.minF));
|
||||||
FreeCUDAHost((h_data.x_mat));
|
//FreeCUDAHost((h_data.x_mat));
|
||||||
FreeCUDAHost((h_data.y_mat));
|
//FreeCUDAHost((h_data.y_mat));
|
||||||
FreeCUDAHost((h_data.z_mat));
|
//FreeCUDAHost((h_data.z_mat));
|
||||||
FreeCUDAHost((h_data.r_vec));
|
FreeCUDAHost((h_data.r_vec));
|
||||||
FreeCUDAHost((h_data.Freq));
|
FreeCUDAHost((h_data.Freq));
|
||||||
FreeCUDAHost((h_data.phdata));
|
FreeCUDAHost((h_data.phdata));
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ inline void gpuAssert(cudaError_t code, const char* file, int line) {
|
||||||
|
|
||||||
|
|
||||||
struct GPUDATA {
|
struct GPUDATA {
|
||||||
int Nfft, K, Np, nx, ny; // 傅里叶点数、频点数、脉冲数、图像列、图像行
|
size_t Nfft, K, Np, nx, ny; // 傅里叶点数、频点数、脉冲数、图像列、图像行
|
||||||
double* AntX, * AntY, * AntZ, * minF; // 天线坐标、起始频率
|
double* AntX, * AntY, * AntZ, * minF; // 天线坐标、起始频率
|
||||||
double* x_mat, * y_mat, * z_mat;// 地面坐标
|
double* x_mat, * y_mat, * z_mat;// 地面坐标
|
||||||
double* r_vec; // 坐标范围
|
double* r_vec; // 坐标范围
|
||||||
|
|
@ -28,6 +28,7 @@ struct GPUDATA {
|
||||||
};
|
};
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
void bpBasic0CUDA(GPUDATA& data, int flag);
|
||||||
void computeRvec(GPUDATA& data);
|
void computeRvec(GPUDATA& data);
|
||||||
void initGPUData(GPUDATA& h_data, GPUDATA& d_data);
|
void initGPUData(GPUDATA& h_data, GPUDATA& d_data);
|
||||||
void freeGPUData(GPUDATA& d_data);
|
void freeGPUData(GPUDATA& d_data);
|
||||||
|
|
|
||||||
|
|
@ -292,6 +292,8 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
|
||||||
long imageBlockline = Memory1GB / 8 / 2 / imHeight * 4; //2GB
|
long imageBlockline = Memory1GB / 8 / 2 / imHeight * 4; //2GB
|
||||||
imageBlockline = imageBlockline < 1 ? 1 : imageBlockline;
|
imageBlockline = imageBlockline < 1 ? 1 : imageBlockline;
|
||||||
|
|
||||||
|
qDebug() << "echo block rows: " << echoBlockline;
|
||||||
|
qDebug() << "image block rows: " << imageBlockline;
|
||||||
|
|
||||||
// 天线坐标
|
// 天线坐标
|
||||||
std::shared_ptr<double> Pxs(new double[this->L0ds->getPluseCount()], delArrPtr);
|
std::shared_ptr<double> Pxs(new double[this->L0ds->getPluseCount()], delArrPtr);
|
||||||
|
|
@ -356,6 +358,9 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
|
||||||
h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * imrowcount * imcolcount);
|
h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * imrowcount * imcolcount);
|
||||||
shared_complexPtrToHostCuComplex(imgArr.get(), h_data.im_final, imrowcount * imcolcount);
|
shared_complexPtrToHostCuComplex(imgArr.get(), h_data.im_final, imrowcount * imcolcount);
|
||||||
|
|
||||||
|
// 保存fft 结果
|
||||||
|
this->TimeEchoDataPath = JoinPath(this->L1ds->getoutFolderPath(), this->L0ds->getSimulationTaskName() + "_Timeecho.bin");
|
||||||
|
gdalImageComplex outTimeEchoImg = CreategdalImageComplexNoProj(this->TimeEchoDataPath,PRFCount, freqpoint, 1);
|
||||||
|
|
||||||
for (long prfid = 0; prfid < PRFCount; prfid = prfid + echoBlockline) {
|
for (long prfid = 0; prfid < PRFCount; prfid = prfid + echoBlockline) {
|
||||||
long ehcoprfcount = echoBlockline;
|
long ehcoprfcount = echoBlockline;
|
||||||
|
|
@ -366,25 +371,24 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
|
||||||
std::shared_ptr<double> antpx(new double[ehcoprfcount], delArrPtr);
|
std::shared_ptr<double> antpx(new double[ehcoprfcount], delArrPtr);
|
||||||
std::shared_ptr<double> antpy(new double[ehcoprfcount], delArrPtr);
|
std::shared_ptr<double> antpy(new double[ehcoprfcount], delArrPtr);
|
||||||
std::shared_ptr<double> antpz(new double[ehcoprfcount], delArrPtr);
|
std::shared_ptr<double> antpz(new double[ehcoprfcount], delArrPtr);
|
||||||
for (long anti = 0; anti < ehcoprfcount; anti++) { // ÌìÏß×ø±ê
|
for (long anti = 0; anti < ehcoprfcount; anti++) {
|
||||||
antpx.get()[anti] = Pxs.get()[anti + prfid];
|
if (anti + prfid < PRFCount) {
|
||||||
antpy.get()[anti] = Pys.get()[anti + prfid];
|
antpx.get()[anti] = Pxs.get()[anti + prfid];
|
||||||
antpz.get()[anti] = Pzs.get()[anti + prfid];
|
antpy.get()[anti] = Pys.get()[anti + prfid];
|
||||||
if (abs(antpx.get()[anti]) < 10 || abs(antpy.get()[anti]) < 10 || abs(antpz.get()[anti]) < 10) {
|
antpz.get()[anti] = Pzs.get()[anti + prfid];
|
||||||
qDebug() << anti << ":" << antpx.get()[anti] << "," << antpy.get()[anti] << "," << antpz.get()[anti];
|
//if (abs(antpx.get()[anti]) < 10 || abs(antpy.get()[anti]) < 10 || abs(antpz.get()[anti]) < 10) {
|
||||||
|
// qDebug() << anti << ":" << antpx.get()[anti] << "," << antpy.get()[anti] << "," << antpz.get()[anti];
|
||||||
|
//}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 起始频率
|
// 起始频率
|
||||||
|
|
||||||
h_data.minF = (double*)mallocCUDAHost(sizeof(double) * ehcoprfcount);
|
h_data.minF = (double*)mallocCUDAHost(sizeof(double) * ehcoprfcount);
|
||||||
|
h_data.Freq = (double*)mallocCUDAHost(sizeof(double) * ehcoprfcount);
|
||||||
for (long anti = 0; anti < ehcoprfcount; anti++) {
|
for (long anti = 0; anti < ehcoprfcount; anti++) {
|
||||||
h_data.minF[anti] = startFreq;
|
h_data.minF[anti] = startFreq;
|
||||||
}
|
h_data.Freq[anti] = startFreq;
|
||||||
|
|
||||||
h_data.Freq = (double*)mallocCUDAHost(sizeof(double) * freqpoint);
|
|
||||||
for (long fid = 0; fid < freqpoint; fid++) {
|
|
||||||
h_data.Freq[fid] = startFreq + fid * h_data.deltaF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
h_data.AntX = antpx.get(); // 天线
|
h_data.AntX = antpx.get(); // 天线
|
||||||
|
|
@ -392,11 +396,27 @@ ErrorCode TBPImageAlgCls::ProcessGPU()
|
||||||
h_data.AntZ = antpz.get();
|
h_data.AntZ = antpz.get();
|
||||||
h_data.Np = ehcoprfcount;
|
h_data.Np = ehcoprfcount;
|
||||||
h_data.phdata= (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * ehcoprfcount * echofreqpoint);
|
h_data.phdata= (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * ehcoprfcount * echofreqpoint);
|
||||||
|
|
||||||
|
qDebug() << ehcoprfcount <<":\t"<<h_data.Np;
|
||||||
|
|
||||||
shared_complexPtrToHostCuComplex(echoArr.get(), h_data.phdata, ehcoprfcount * echofreqpoint);
|
shared_complexPtrToHostCuComplex(echoArr.get(), h_data.phdata, ehcoprfcount * echofreqpoint);
|
||||||
|
|
||||||
BPBasic0(h_data); //BP
|
// BP
|
||||||
|
GPUDATA d_data;
|
||||||
|
initGPUData(h_data, d_data);
|
||||||
|
bpBasic0CUDA(d_data, 0);
|
||||||
|
printf("BP finished!!!\n");
|
||||||
|
DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny);
|
||||||
|
DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * ehcoprfcount* echofreqpoint);
|
||||||
|
freeGPUData(d_data);
|
||||||
|
|
||||||
|
std::shared_ptr < std::complex<double>> echofftdata(new std::complex<double>[ehcoprfcount * echofreqpoint], delArrPtr);
|
||||||
qDebug() << QString(" image block PRF:[%1] \t").arg((img_rid + imrowcount) * 100.0 / imHeight) << QString(" echo [%1] prfidx:\t ").arg((prfid + ehcoprfcount)*100.0/ PRFCount)<< prfid << "\t-\t" << prfid + ehcoprfcount;
|
qDebug() << QString(" image block PRF:[%1] \t").arg((img_rid + imrowcount) * 100.0 / imHeight) << QString(" echo [%1] prfidx:\t ").arg((prfid + ehcoprfcount)*100.0/ PRFCount)<< prfid << "\t-\t" << prfid + ehcoprfcount;
|
||||||
|
HostCuComplexToshared_complexPtr(h_data.phdata, echofftdata.get(), ehcoprfcount* echofreqpoint);
|
||||||
|
outTimeEchoImg.saveImage(echofftdata, prfid, 0, ehcoprfcount, echofreqpoint,1);
|
||||||
|
|
||||||
|
|
||||||
|
HostCuComplexToshared_complexPtr(h_data.im_final, imgArr.get(), imrowcount* imcolcount);
|
||||||
this->L1ds->saveImageRaster(imgArr, img_rid, imrowcount);
|
this->L1ds->saveImageRaster(imgArr, img_rid, imrowcount);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue