From 3b300af71c773a870c741d12eab3b4e1324b1917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E5=A2=9E=E8=BE=89?= <3045316072@qq.com> Date: Tue, 11 Mar 2025 09:31:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E5=9F=BA=E4=BA=8Efp?= =?UTF-8?q?32=E7=9A=84fp64=E6=A8=A1=E6=8B=9F=E8=AE=A1=E7=AE=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../BaseTool/ImageOperatorBase.cpp | 3 +- GPUBaseLib/GPUBaseLib.vcxproj | 5 + GPUBaseLib/GPUBaseLib.vcxproj.filters | 6 + GPUBaseLib/GPUTool/GPUDouble32.cu | 560 +++++++++ GPUBaseLib/GPUTool/GPUDouble32.cuh | 76 ++ Toolbox/SimulationSARTool/GPUBpSimulation.cu | 6 +- .../SimulationSAR/BPBasic0_CUDA.cu | 335 +++++- .../SimulationSAR/BPBasic0_CUDA.cuh | 14 +- .../SimulationSARTool.vcxproj | 2 +- Toolbox/SimulationSARTool/UnitTestMain.cpp | 1045 ++++++++++------- 10 files changed, 1640 insertions(+), 412 deletions(-) create mode 100644 GPUBaseLib/GPUTool/GPUDouble32.cu create mode 100644 GPUBaseLib/GPUTool/GPUDouble32.cuh diff --git a/BaseCommonLibrary/BaseTool/ImageOperatorBase.cpp b/BaseCommonLibrary/BaseTool/ImageOperatorBase.cpp index b3c419e..4ffc877 100644 --- a/BaseCommonLibrary/BaseTool/ImageOperatorBase.cpp +++ b/BaseCommonLibrary/BaseTool/ImageOperatorBase.cpp @@ -3613,7 +3613,8 @@ void testOutClsArr(QString filename, long* amp, long rowcount, long colcount) { void BASECONSTVARIABLEAPI testOutComplexDoubleArr(QString filename, std::complex* data, long rowcount, long colcount) { - gdalImageComplex compleximg= CreateEchoComplex(filename, rowcount, colcount, 1); + QString ampPath = getDebugDataPath(filename); + gdalImageComplex compleximg= CreateEchoComplex(ampPath, rowcount, colcount, 1); compleximg.saveImage( data, 0, 0, rowcount, colcount, 1); return void BASECONSTVARIABLEAPI(); diff --git a/GPUBaseLib/GPUBaseLib.vcxproj b/GPUBaseLib/GPUBaseLib.vcxproj index 103a178..0e21f32 100644 --- a/GPUBaseLib/GPUBaseLib.vcxproj +++ b/GPUBaseLib/GPUBaseLib.vcxproj @@ -27,10 +27,12 @@ + + @@ -193,6 +195,9 @@ true compute_86,sm_86 + true + compile + O3 diff --git a/GPUBaseLib/GPUBaseLib.vcxproj.filters b/GPUBaseLib/GPUBaseLib.vcxproj.filters index a0ea2cd..1d8fae5 100644 --- a/GPUBaseLib/GPUBaseLib.vcxproj.filters +++ b/GPUBaseLib/GPUBaseLib.vcxproj.filters @@ -24,10 +24,16 @@ GPUTool + + GPUTool + GPUTool + + GPUTool + \ No newline at end of file diff --git a/GPUBaseLib/GPUTool/GPUDouble32.cu b/GPUBaseLib/GPUTool/GPUDouble32.cu new file mode 100644 index 0000000..7343486 --- /dev/null +++ b/GPUBaseLib/GPUTool/GPUDouble32.cu @@ -0,0 +1,560 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "BaseConstVariable.h" +#include "GPUTool.cuh" +#include "GPUDouble32.cuh" +#include "PrintMsgToQDebug.h" + + +// 将 double 转换为 double32 + + +// 方法一:使用CUDA内置快速除法指令 +__device__ float fast_reciprocal(float x) { + return __fdividef(1.0f, x); // 精确度约21位 +} + + +// 方法二:PTX指令级实现(更快但精度略低) +__device__ float ptx_reciprocal(float x) { + float r; + asm("rcp.approx.ftz.f32 %0, %1;" : "=f"(r) : "f"(x)); + return r; // 精确度约20位 +} + + +// 快速双32位运算核心算法 +__device__ __host__ double32 two_sum(float a, float b) { + float s = a + b; + float v = s - a; + float e = (a - (s - v)) + (b - v); + return double32(s, e); +} + +__device__ __host__ double32 quick_two_sum(float a, float b) { + float s = a + b; + float e = b - (s - a); + return double32(s, e); +} + + + + + + +__device__ double32 double_to_double32(double value) { + const float SCALE_FACTOR = 1u << 12 + 1; // 4097.0f + const float INV_SCALE = 1.0f / SCALE_FACTOR; + + // Step 1: 分割双精度值为高低两部分 + float high = __double2float_rd(value); + double residual = value - __double2float_rd(high); + + // Step 2: 使用Dekker算法精确分解余数 + float temp = SCALE_FACTOR * __double2float_ru(residual); + float res_hi = temp - (temp - __double2float_ru(residual)); + float res_lo = __double2float_ru(residual) - res_hi; + + // Step 3: 合并结果 + float low = (res_hi + (res_lo + (residual - __double2float_rd(residual)))) * INV_SCALE; + + // Step 4: 规范化处理 + float s = high + low; + float e = low - (s - high); + return double32(s, e); +} + +// 将 double32 转换为 double +__device__ __host__ double double32_to_double(const double32& value) { + // 分步转换确保精度不丢失 + const double high_part = static_cast(value.high); + const double low_part = static_cast(value.low); + + // Kahan式加法补偿舍入误差 + const double sum = high_part + low_part; + const double err = (high_part - sum) + low_part; + + return sum + err; +} + + + +// 使用加法函数 +__device__ double32 double32_add(const double32& a, const double32& b) { + double32 s = two_sum(a.high, b.high); + s.low += a.low + b.low; + return quick_two_sum(s.high, s.low); +} + +// 使用减法函数 +__device__ double32 double32_sub(const double32& a, const double32& b) { + // 步骤1:高位部分相减(核心计算) + float high_diff = a.high - b.high; + + // 步骤2:误差补偿计算(参考Dekker算法) + float temp = (a.high - (high_diff + b.high)) + (b.high - (a.high - high_diff)); + + // 步骤3:低位综合计算(结合参考的精度保护机制) + float low_diff = (a.low - b.low) + temp; + + // 步骤4:重正规化处理(关键精度保障,参考) + float sum = high_diff + low_diff; + float residual = low_diff - (sum - high_diff); + + return { sum, residual }; +} + +// 使用乘法函数 +__device__ double32 double32_mul(const double32& a, const double32& b) { + const float split = 4097.0f; // 2^12 + 1 + float c = split * a.high; + float a_hi = c - (c - a.high); + float a_lo = a.high - a_hi; + + c = split * b.high; + float b_hi = c - (c - b.high); + float b_lo = b.high - b_hi; + + float p = a.high * b.high; + float e = (((a_hi * b_hi - p) + a_hi * b_lo) + a_lo * b_hi) + a_lo * b_lo; + e += a.high * b.low + a.low * b.high; + return quick_two_sum(p, e); +} + +// 使用除法函数 +__device__ double32 double32_div(const double32& x, const double32& y) { + // 步骤1:使用改进的牛顿迭代法 + float y_hi = y.high; + float inv_hi = fast_reciprocal(y_hi); + + // 一次牛顿迭代提升精度(需要2次FMA) + inv_hi = __fmaf_rn(inv_hi, __fmaf_rn(-y_hi, inv_hi, 1.0f), inv_hi); + + // 步骤2:计算商的高位部分 + float q_hi = x.high * inv_hi; + + // 步骤3:误差补偿计算 + double32 p = double32_mul(y, double32(q_hi, 0)); + double32 r = double32_sub(x, p); + + // 步骤4:精确计算低位部分 + float q_lo = (r.high + r.low) * inv_hi; + q_lo = __fmaf_rn(-q_hi, y.low, q_lo) * inv_hi; + + // 步骤5:规范化结果 + return quick_two_sum(q_hi, q_lo); +} + +// 使用 sin 函数 +__device__ double32 double32_sin(const double32& a) { + double32 result; + result.high = sinf(a.high); + result.low = sinf(a.low); + return result; +} + +// 使用 cos 函数 +__device__ double32 double32_cos(const double32& a) { + double32 result; + result.high = cosf(a.high); + result.low = cosf(a.low); + return result; +} + +// 使用 log2 函数 +__device__ double32 double32_log2(const double32& a) { + double32 result; + result.high = log2f(a.high); + result.low = log2f(a.low); + return result; +} + +// 使用 log10 函数 +__device__ double32 double32_log10(const double32& a) { + double32 result; + result.high = log10f(a.high); + result.low = log10f(a.low); + return result; +} + +// 使用 ln 函数 +__device__ double32 double32_ln(const double32& a) { + double32 result; + result.high = logf(a.high); + result.low = logf(a.low); + return result; +} + +// 使用 exp 函数 +__device__ double32 double32_exp(const double32& a) { + double32 result; + result.high = expf(a.high); + result.low = expf(a.low); + return result; +} + +// 使用 pow 函数 +__device__ double32 double32_pow(const double32& a, const double32& b) { + double32 result; + result.high = powf(a.high, b.high); + result.low = powf(a.low, b.low); + return result; +} + +// 使用 sqrt 函数 +__device__ double32 double32_sqrt(const double32& a) { + double32 result; + result.high = sqrtf(a.high); + result.low = sqrtf(a.low); + return result; +} + + + + +__global__ void test_double_to_double32_kernel(double* d_input, double* d_output, int size) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + double value = d_input[idx]; + d_output[idx] = double32_to_double(double_to_double32(value))-value; + } +} + + + + +__global__ void test_kernel(double* d_input, double* d_output, int size, int operation) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + double va = d_input[idx]; + double vb = va + 1.0; + double32 a = double_to_double32(va); + double32 b = double_to_double32(vb); // 用于二元操作的第二个操作数 + switch (operation) { + case 0: d_output[idx] = double32_to_double(double32_add(a, b)) - (va + vb); break; + case 1: d_output[idx] = double32_to_double(double32_sub(a, b)) - (va - vb); break; + case 2: d_output[idx] = double32_to_double(double32_mul(a, b)) - (va * vb); break; + case 3: d_output[idx] = double32_to_double(double32_div(a, b)) - (va / vb); break; + case 4: d_output[idx] = double32_to_double(double32_sin(a))-sin(va); break; + case 5: d_output[idx] = double32_to_double(double32_cos(a))-cos(va); break; + case 6: d_output[idx] = double32_to_double(double32_log2(a))-log2(va); break; + case 7: d_output[idx] = double32_to_double(double32_log10(a))-log10(va); break; + case 8: d_output[idx] = double32_to_double(double32_ln(a))-log(va); break; + case 9: d_output[idx] = double32_to_double(double32_exp(a))-exp(va); break; + case 10: d_output[idx] = double32_to_double(double32_pow(a, b))-pow(va,vb); break; + case 11: d_output[idx] = double32_to_double(double32_sqrt(a))-sqrt(va); break; + } + if (idx == 1) { + switch (operation) { + case 0: d_output[idx] = printf("add, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 1: d_output[idx] = printf("sub, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 2: d_output[idx] = printf("mul, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 3: d_output[idx] = printf("div, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 4: d_output[idx] = printf("sin, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 5: d_output[idx] = printf("cos, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 6: d_output[idx] = printf("log2, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 7: d_output[idx] = printf("log10, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 8: d_output[idx] = printf("ln, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 9: d_output[idx] = printf("exp, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 10: d_output[idx] = printf("pow, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + case 11: d_output[idx] = printf("sqrt, \tva=%f,vb=%f,d_output=%e\n", va, vb, d_output[idx]);; break; + } + + } + + } +} + +void test_function(int operation, const char* operation_name) { + const int size = 1024; + double* h_input = new double[size]; + double* h_output = new double[size]; + double* d_input; + double* d_output; + + // 初始化数据 + for (int i = 0; i < size; ++i) { + h_input[i] = static_cast(i)*100000.0 + 0.1234564324324232421421421421* 100000.0; + } + + // 分配设备内存 + cudaMalloc(&d_input, size * sizeof(double)); + cudaMalloc(&d_output, size * sizeof(double)); + cudaMemcpy(d_input, h_input, size * sizeof(double), cudaMemcpyHostToDevice); + + // 启动 CUDA 内核 + auto start = std::chrono::high_resolution_clock::now(); + test_kernel << <(size + 255) / 256, 256 >> > (d_input, d_output, size, operation); + cudaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + std::cout << operation_name << " time: " << elapsed.count() << " seconds" << std::endl; + + // 复制结果回主机 + cudaMemcpy(h_output, d_output, size * sizeof(double), cudaMemcpyDeviceToHost); + + // 计算精度 + double total_error = 0.0; + for (int i = 0; i < size; ++i) { + + double error = std::abs(h_output[i]); + total_error += error; + } + double average_error = total_error / size; + std::cout << operation_name << " average error: " << average_error << std::endl; + std::cout << operation_name << " average error: " << average_error << std::endl; + + // 释放内存 + delete[] h_input; + delete[] h_output; + cudaFree(d_input); + cudaFree(d_output); +} + + +__global__ void time_test_kernel(double* d_input, double* d_output, int size, int operation) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + double va = d_input[idx]; + double vb = va + 1.0; + double32 a = double_to_double32(va); + double32 b = double_to_double32(vb); // 用于二元操作的第二个操作数 + double result = 0.0; + for (long i = 0; i< 1000000; i++) { + switch (operation) { + case 0: result+= (double32_add(a, b).high); break; + case 1: result+= (double32_sub(a, b).high); break; + case 2: result+= (double32_mul(a, b).high); break; + case 3: result+= (double32_div(a, b).high); break; + case 4: result+= (double32_sin(a).high); break; + case 5: result+= (double32_cos(a).high); break; + case 6: result+= (double32_log2(a).high); break; + case 7: result+= (double32_log10(a).high); break; + case 8: result+= (double32_ln(a).high); break; + case 9: result+= (double32_exp(a).high); break; + case 10: result+= (double32_pow(a, b).high); break; + case 11: result+= (double32_sqrt(a).high); break; + } + } + d_output[idx]=result; + } +} + +__global__ void time_test_double_kernel(double* d_input, double* d_output, int size, int operation) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < size) { + double va = d_input[idx]; + double vb = va + 1.0; + double32 a = double_to_double32(va); + double32 b = double_to_double32(vb); // 用于二元操作的第二个操作数 + double result = 0.0; + for (long i = 0; i < 1000000; i++) { + switch (operation) { + case 0: result+= (va + vb); break; + case 1: result+= (va - vb); break; + case 2: result+= (va * vb); break; + case 3: result+= (va / vb); break; + case 4: result+= sin(va); break; + case 5: result+= cos(va); break; + case 6: result+= log2(va); break; + case 7: result+= log10(va); break; + case 8: result+= log(va); break; + case 9: result+= exp(va); break; + case 10:result+= pow(va, vb); break; + case 11:result+= sqrt(va); break; + } + } + d_output[idx] = result; + } +} + + + +void test_double_to_double32() { + const int size = 1024; + double* h_input = new double[size]; + double* h_output = new double[size]; + double* d_input; + double* d_output; + + // 初始化数据 + for (int i = 0; i < size; ++i) { + h_input[i] = static_cast(i) * 1000000.0 + 0.123456789011 * 1000000.0; + } + + // 分配设备内存 + cudaMalloc(&d_input, size * sizeof(double)); + cudaMalloc(&d_output, size * sizeof(double)); + cudaMemcpy(d_input, h_input, size * sizeof(double), cudaMemcpyHostToDevice); + + // 启动 CUDA 内核 + auto start = std::chrono::high_resolution_clock::now(); + test_double_to_double32_kernel << <(size + 255) / 256, 256 >> > (d_input, d_output, size); + cudaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + std::cout << "double_to_double32 conversion time: " << elapsed.count() << " seconds" << std::endl; + + // 复制结果回主机 + cudaMemcpy(h_output, d_output, size * sizeof(double), cudaMemcpyDeviceToHost); + + // 计算精度 + double total_error = 0.0; + for (int i = 0; i < size; ++i) { + double error = std::abs(h_output[i]); + total_error += error; + } + double average_error = total_error / size; + std::cout << "double_to_double32 average error: " << average_error << std::endl; + + // 计算有效位数 + double effective_digits = -std::log10(average_error); + std::cout << "double_to_double32 effective digits: " << effective_digits << std::endl; + + // 释放内存 + delete[] h_input; + delete[] h_output; + cudaFree(d_input); + cudaFree(d_output); +} + + +void time_test_function(int operation, const char* operation_name) { + const int size = 1024; + double* h_input = new double[size]; + double* h_output = new double[size]; + double* d_input; + double* d_output; + + // 初始化数据 + for (int i = 0; i < size; ++i) { + h_input[i] = static_cast(i) * 100000.0 + 0.1234564324324232421421421421 * 100000.0; + } + + // 分配设备内存 + cudaMalloc(&d_input, size * sizeof(double)); + cudaMalloc(&d_output, size * sizeof(double)); + cudaMemcpy(d_input, h_input, size * sizeof(double), cudaMemcpyHostToDevice); + + // 启动 CUDA 内核 (double32) + auto start = std::chrono::high_resolution_clock::now(); + time_test_kernel << <(size + 255) / 256, 256 >> > (d_input, d_output, size, operation); + cudaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + std::cout << operation_name << " (double32) time: " << elapsed.count() << " seconds" << std::endl; + + // 复制结果回主机 + cudaMemcpy(h_output, d_output, size * sizeof(double), cudaMemcpyDeviceToHost); + + // 计算精度 + double total_error = 0.0; + for (int i = 0; i < size; ++i) { + double error = std::abs(h_output[i]); + total_error += error; + } + double average_error = total_error / size; + std::cout << operation_name << " (double32) average error: " << average_error << std::endl; + + // 启动 CUDA 内核 (double) + start = std::chrono::high_resolution_clock::now(); + time_test_double_kernel << <(size + 255) / 256, 256 >> > (d_input, d_output, size, operation); + cudaDeviceSynchronize(); + end = std::chrono::high_resolution_clock::now(); + elapsed = end - start; + std::cout << operation_name << " (double) time: " << elapsed.count() << " seconds" << std::endl; + + // 复制结果回主机 + cudaMemcpy(h_output, d_output, size * sizeof(double), cudaMemcpyDeviceToHost); + + // 计算精度 + total_error = 0.0; + for (int i = 0; i < size; ++i) { + double error = std::abs(h_output[i]); + total_error += error; + } + average_error = total_error / size; + std::cout << operation_name << " (double) average error: " << average_error << std::endl; + + // 释放内存 + delete[] h_input; + delete[] h_output; + cudaFree(d_input); + cudaFree(d_output); +} + +void time_test_double_to_double32() { + const int size = 1024; + double* h_input = new double[size]; + double* h_output = new double[size]; + double* d_input; + double* d_output; + + // 初始化数据 + for (int i = 0; i < size; ++i) { + h_input[i] = static_cast(i) * 1000000.0 + 0.123456789011 * 1000000.0; + } + + // 分配设备内存 + cudaMalloc(&d_input, size * sizeof(double)); + cudaMalloc(&d_output, size * sizeof(double)); + cudaMemcpy(d_input, h_input, size * sizeof(double), cudaMemcpyHostToDevice); + + // 启动 CUDA 内核 + auto start = std::chrono::high_resolution_clock::now(); + test_double_to_double32_kernel << <(size + 255) / 256, 256 >> > (d_input, d_output, size); + cudaDeviceSynchronize(); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + std::cout << "double_to_double32 conversion time: " << elapsed.count() << " seconds" << std::endl; + + // 复制结果回主机 + cudaMemcpy(h_output, d_output, size * sizeof(double), cudaMemcpyDeviceToHost); + + // 计算精度 + double total_error = 0.0; + for (int i = 0; i < size; ++i) { + double error = std::abs(h_output[i]); + total_error += error; + } + double average_error = total_error / size; + std::cout << "double_to_double32 average error: " << average_error << std::endl; + + // 计算有效位数 + double effective_digits = -std::log10(average_error); + std::cout << "double_to_double32 effective digits: " << effective_digits << std::endl; + + // 释放内存 + delete[] h_input; + delete[] h_output; + cudaFree(d_input); + cudaFree(d_output); +} + + + +void time_test_add() { time_test_function(0, "Addition"); } +void time_test_sub() { time_test_function(1, "Subtraction"); } +void time_test_mul() { time_test_function(2, "Multiplication"); } +void time_test_div() { time_test_function(3, "Division"); } +void time_test_sin() { time_test_function(4, "Sine"); } +void time_test_cos() { time_test_function(5, "Cosine"); } +void time_test_log2() { time_test_function(6, "Log2"); } +void time_test_log10() { time_test_function(7, "Log10"); } +void time_test_ln() { time_test_function(8, "Natural Logarithm"); } +void time_test_exp() { time_test_function(9, "Exponential"); } +void time_test_pow() { time_test_function(10, "Power"); } +void time_test_sqrt() { time_test_function(11, "Square Root"); } diff --git a/GPUBaseLib/GPUTool/GPUDouble32.cuh b/GPUBaseLib/GPUTool/GPUDouble32.cuh new file mode 100644 index 0000000..b1e571d --- /dev/null +++ b/GPUBaseLib/GPUTool/GPUDouble32.cuh @@ -0,0 +1,76 @@ +#ifndef __GPUDOUBLE32__H__ +#define __GPUDOUBLE32__H__ + +// 定义double32 struct ,使用fp32模拟double +struct double32{ + float high; + float low; + __device__ __host__ double32(float h = 0, float l = 0) : high(h), low(l) {} +}; + +extern __device__ double32 double_to_double32(double value); +extern __device__ __host__ double double32_to_double(const double32& value); +// 使用 PTX 优化的加法函数 +extern __device__ double32 double32_add(const double32& a, const double32& b); + +// 使用 PTX 优化的减法函数 +extern __device__ double32 double32_sub(const double32& a, const double32& b); + +// 使用 PTX 优化的乘法函数 +extern __device__ double32 double32_mul(const double32& a, const double32& b); + +// 使用 PTX 优化的除法函数 +extern __device__ double32 double32_div(const double32& a, const double32& b); + +// 使用 PTX 优化的 sin 函数 +extern __device__ double32 double32_sin(const double32& a); + +// 使用 PTX 优化的 cos 函数 +extern __device__ double32 double32_cos(const double32& a); + +// 使用 PTX 优化的 log2 函数 +extern __device__ double32 double32_log2(const double32& a); + +// 使用 PTX 优化的 log10 函数 +extern __device__ double32 double32_log10(const double32& a); + +// 使用 PTX 优化的 ln 函数 +extern __device__ double32 double32_ln(const double32& a); + +// 使用 PTX 优化的 exp 函数 +extern __device__ double32 double32_exp(const double32& a); + +// 使用 PTX 优化的 pow 函数 +extern __device__ double32 double32_pow(const double32& a, const double32& b); + +// 使用 PTX 优化的 sqrt 函数 +extern __device__ double32 double32_sqrt(const double32& a); + + +extern "C" GPUBASELIBAPI void test_double_to_double32(); +extern "C" GPUBASELIBAPI void test_function(int operation, const char* operation_name); + + + + + +extern "C" GPUBASELIBAPI void time_test_add(); +extern "C" GPUBASELIBAPI void time_test_sub(); +extern "C" GPUBASELIBAPI void time_test_mul(); +extern "C" GPUBASELIBAPI void time_test_div(); +extern "C" GPUBASELIBAPI void time_test_sin(); +extern "C" GPUBASELIBAPI void time_test_cos(); +extern "C" GPUBASELIBAPI void time_test_log2(); +extern "C" GPUBASELIBAPI void time_test_log10(); +extern "C" GPUBASELIBAPI void time_test_ln(); +extern "C" GPUBASELIBAPI void time_test_exp(); +extern "C" GPUBASELIBAPI void time_test_pow(); +extern "C" GPUBASELIBAPI void time_test_sqrt(); + + + + + + + +#endif // !__GPUDOUBLE32__H__ diff --git a/Toolbox/SimulationSARTool/GPUBpSimulation.cu b/Toolbox/SimulationSARTool/GPUBpSimulation.cu index 4c66f27..03648eb 100644 --- a/Toolbox/SimulationSARTool/GPUBpSimulation.cu +++ b/Toolbox/SimulationSARTool/GPUBpSimulation.cu @@ -99,9 +99,9 @@ void RFPCProcess(double Tx, double Ty, double Tz, double* AntY = (double*)mallocCUDADevice(sizeof(double) * d_data.Np); double* AntZ = (double*)mallocCUDADevice(sizeof(double) * d_data.Np); - HostToDevice(d_data.AntX, AntX, sizeof(double) * d_data.Np); printf("antX host to device finished!!\n"); - HostToDevice(d_data.AntY, AntY, sizeof(double) * d_data.Np); printf("antY host to device finished!!\n"); - HostToDevice(d_data.AntZ, AntZ, sizeof(double) * d_data.Np); printf("antZ host to device finished!!\n"); + HostToDevice(d_data.AntX, AntX, sizeof(double) * d_data.Np); //printf("antX host to device finished!!\n"); + HostToDevice(d_data.AntY, AntY, sizeof(double) * d_data.Np); //printf("antY host to device finished!!\n"); + HostToDevice(d_data.AntZ, AntZ, sizeof(double) * d_data.Np); //printf("antZ host to device finished!!\n"); double minF = d_data.minF[0]; long grid_size = (d_data.Np + BLOCK_SIZE - 1) / BLOCK_SIZE; double dfreq = d_data.deltaF; diff --git a/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu b/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu index 0156e2b..cdf1705 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu +++ b/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cu @@ -278,7 +278,337 @@ void BPBasic0(GPUDATA& h_data) - + + +/** 单精度代码**********************************************************************************************/ + + + + + + + +__global__ void phaseCompensationKernel_single(cufftComplex* phdata, const float* Freq, float r, int K, int Na) { + int freqIdx = blockIdx.x * blockDim.x + threadIdx.x; + int pulseIdx = blockIdx.y * blockDim.y + threadIdx.y; + + if (freqIdx >= K || pulseIdx >= Na) return; + + int idx = pulseIdx * K + freqIdx; + float phase = 4 * PI * Freq[freqIdx] * r / c; + float cos_phase = cosf(phase); + float sin_phase = sinf(phase); + + cufftComplex ph = phdata[idx]; + float new_real = ph.x * cos_phase - ph.y * sin_phase; + float new_imag = ph.x * sin_phase + ph.y * cos_phase; + phdata[idx] = make_cuComplex(new_real, new_imag); +} + +__global__ void fftshiftKernel_single(cufftComplex* data, int Nfft, int Np) { + int pulse = blockIdx.x * blockDim.x + threadIdx.x; + if (pulse >= Np) return; + + int half = Nfft / 2; + for (int i = 0; i < half; ++i) { + cufftComplex temp = data[pulse * Nfft + i]; + data[pulse * Nfft + i] = data[pulse * Nfft + i + half]; + data[pulse * Nfft + i + half] = temp; + } +} + +__global__ void processPulseKernel_single( + long prfid, + int nx, int ny, + const float* x_mat, const float* y_mat, const float* z_mat, + float AntX, float AntY, float AntZ, + float R0, float minF, + const cufftComplex* rc_pulse, + const float r_start, const float dr, const int nR, + cufftComplex* im_final +) { + // + + long long idx = blockIdx.x * blockDim.x + threadIdx.x; + long long pixelcount = nx * ny; + if (idx >= pixelcount) return; + + //printf("processPulseKernel start!!\n"); + + //if (x >= nx || y >= ny) return; + //int idx = x * ny + y; + + + float dx = AntX - x_mat[idx]; + float dy = AntY - y_mat[idx]; + float dz = AntZ - z_mat[idx]; + + //printf("processPulseKernel xmat !!\n"); + float R = sqrtf(dx * dx + dy * dy + dz * dz); + float dR = R - R0; + + if (dR < r_start || dR >= (r_start + dr * (nR - 1))) return; + // Linear interpolation + float pos = (dR - r_start) / dr; + int index = (int)floorf(pos); + float weight = pos - index; + + if (index < 0 || index >= nR - 1) return; + + cufftComplex rc_low = rc_pulse[prfid * nR + index]; + cufftComplex rc_high = rc_pulse[prfid * nR + index + 1]; + cufftComplex rc_interp; + rc_interp.x = rc_low.x * (1 - weight) + rc_high.x * weight; + rc_interp.y = rc_low.y * (1 - weight) + rc_high.y * weight; + + // Phase correction + float phase = 4 * PI * minF / c * dR; + float cos_phase = cosf(phase); + float sin_phase = sinf(phase); + + cufftComplex phCorr; + phCorr.x = rc_interp.x * cos_phase - rc_interp.y * sin_phase; + phCorr.y = rc_interp.x * sin_phase + rc_interp.y * cos_phase; + + // Accumulate + im_final[idx].x += phCorr.x; + im_final[idx].y += phCorr.y; + //printf("r_start=%e;dr=%e;nR=%d\n", r_start, dr, nR); + +} + + + + + + + + + + + + + + + + + + + + + + + + + + +void bpBasic0CUDA_single(GPUDATA_single& data, int flag, float* h_R) +{ + // Phase compensation + if (flag == 1) { + dim3 block(16, 16); + dim3 grid((data.K + 15) / 16, (data.Np + 15) / 16); + phaseCompensationKernel_single << > > (data.phdata, data.Freq, data.R0, data.K, data.Np); + PrintLasterError("bpBasic0CUDA Phase compensation"); + //data.R0 = data.r; // 假设data.r已正确设置 + } + + // FFT处理 + cufftHandle plan; + cufftPlan1d(&plan, data.Nfft, CUFFT_C2C, data.Np); + cufftExecC2C(plan, data.phdata, data.phdata, CUFFT_INVERSE); + cufftDestroy(plan); + + // FFT移位 + dim3 blockShift(256); + dim3 gridShift((data.Np + 255) / 256); + fftshiftKernel_single << > > (data.phdata, data.Nfft, data.Np); + PrintLasterError("bpBasic0CUDA Phase FFT Process"); + + printfinfo("fft finished!!\n"); + // 图像重建 + + + + float r_start = data.r_vec[0]; + float dr = (data.r_vec[data.Nfft - 1] - r_start) / (data.Nfft - 1); + printfinfo("dr = %f\n", dr); + long pixelcount = data.nx * data.ny; + long grid_size = (pixelcount + BLOCK_SIZE - 1) / BLOCK_SIZE; + printfinfo("grid finished!!\n"); + + //float* d_R = (float*)mallocCUDADevice(sizeof(float) * data.nx * data.ny); + printfinfo("r_start=%e;dr=%e;nR=%d\n", r_start, dr, data.Nfft); + printfinfo("BPimage .....\n"); + for (long ii = 0; ii < data.Np; ++ii) { + processPulseKernel_single << > > ( + ii, + data.nx, data.ny, + data.x_mat, data.y_mat, data.z_mat, + data.AntX[ii], data.AntY[ii], data.AntZ[ii], + data.R0, data.minF[ii], + data.phdata, + r_start, dr, data.Nfft, + data.im_final + //,d_R + ); + PrintLasterError("processPulseKernel"); + if (ii % 1000 == 0) { + printfinfo("\rPRF(%f %) %d / %d\t\t\t\t", (ii * 100.0 / data.Np), ii, data.Np); + } + } + //FreeCUDADevice(d_R); + + PrintLasterError("bpBasic0CUDA Phase BPimage Process finished!!"); +} + +void initGPUData_single(GPUDATA_single& h_data, GPUDATA_single& d_data) +{ + d_data.AntX = h_data.AntX; //(double*)mallocCUDADevice(sizeof(double) * h_data.Np); + d_data.AntY = h_data.AntY;//(double*)mallocCUDADevice(sizeof(double) * h_data.Np); + d_data.AntZ = h_data.AntZ;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np); + d_data.minF = h_data.minF;// (double*)mallocCUDADevice(sizeof(double) * h_data.Np); + d_data.x_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny); + d_data.y_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny); + d_data.z_mat = (float*)mallocCUDADevice(sizeof(float) * h_data.nx * h_data.ny); + d_data.r_vec = h_data.r_vec;// (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft); + d_data.Freq = (float*)mallocCUDADevice(sizeof(float) * h_data.Nfft); + d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np); + d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny); + + //HostToDevice(h_data.AntX, d_data.AntX,sizeof(double) * h_data.Np); + //HostToDevice(h_data.AntY, d_data.AntY,sizeof(double) * h_data.Np); + //HostToDevice(h_data.AntZ, d_data.AntZ,sizeof(double) * h_data.Np); + //HostToDevice(h_data.minF, d_data.minF,sizeof(double) * h_data.Np); + HostToDevice(h_data.x_mat, d_data.x_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n"); + HostToDevice(h_data.y_mat, d_data.y_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n"); + HostToDevice(h_data.z_mat, d_data.z_mat, sizeof(float) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n"); + HostToDevice(h_data.Freq, d_data.Freq, sizeof(float) * h_data.Nfft); + //HostToDevice(h_data.r_vec, d_data.r_vec, sizeof(double) * h_data.Nfft); + HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n"); + HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n"); + + // 拷贝标量参数 + d_data.Nfft = h_data.Nfft; + d_data.K = h_data.K; + d_data.Np = h_data.Np; + d_data.nx = h_data.nx; + d_data.ny = h_data.ny; + d_data.R0 = h_data.R0; + d_data.deltaF = h_data.deltaF; +} + +void freeGPUData_single(GPUDATA_single& d_data) +{ + //FreeCUDADevice((d_data.AntX)); +//FreeCUDADevice((d_data.AntY)); +//FreeCUDADevice((d_data.AntZ)); +//FreeCUDADevice((d_data.minF)); + FreeCUDADevice((d_data.x_mat)); + FreeCUDADevice((d_data.y_mat)); + FreeCUDADevice((d_data.z_mat)); + //FreeCUDADevice((d_data.r_vec)); + FreeCUDADevice((d_data.Freq)); + FreeCUDADevice((d_data.phdata)); + FreeCUDADevice((d_data.im_final)); +} + +void freeHostData_single(GPUDATA_single& h_data) +{ + //FreeCUDAHost((h_data.AntX)); +//FreeCUDAHost((h_data.AntY)); +//FreeCUDAHost((h_data.AntZ)); + FreeCUDAHost((h_data.minF)); + //FreeCUDAHost((h_data.x_mat)); + //FreeCUDAHost((h_data.y_mat)); + //FreeCUDAHost((h_data.z_mat)); + FreeCUDAHost((h_data.r_vec)); + FreeCUDAHost((h_data.Freq)); + FreeCUDAHost((h_data.phdata)); + FreeCUDAHost((h_data.im_final)); +} + +void BPBasic0_single(GPUDATA_single& h_data) +{ + GPUDATA_single d_data; + initGPUData_single(h_data, d_data); + bpBasic0CUDA_single(d_data, 0); + DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); + freeGPUData_single(d_data); +} + +void copy_Host_Single_GPUData(GPUDATA_single& h_data, GPUDATA& d_data) +{ + d_data.Nfft = h_data.Nfft; + d_data.K = h_data.K; + d_data.Np = h_data.Np; + d_data.nx = h_data.nx; + d_data.ny = h_data.ny; + d_data.R0 = h_data.R0; + d_data.deltaF = h_data.deltaF; + + + d_data.AntX = (double*)mallocCUDAHost(sizeof(double) * h_data.Np); + d_data.AntY = (double*)mallocCUDAHost(sizeof(double) * h_data.Np); + d_data.AntZ = (double*)mallocCUDAHost(sizeof(double) * h_data.Np); + d_data.r_vec = (double*)mallocCUDAHost(sizeof(double) * h_data.Nfft); + d_data.minF = (double*)mallocCUDAHost(sizeof(double) * h_data.Np); + + d_data.x_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); + d_data.y_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); + d_data.z_mat = (double*)mallocCUDADevice(sizeof(double) * h_data.nx * h_data.ny); + d_data.Freq = (double*)mallocCUDADevice(sizeof(double) * h_data.Nfft); + + d_data.phdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.Nfft * h_data.Np); + d_data.im_final = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * h_data.nx * h_data.ny); + + + for (long i = 0; i < h_data.Np; i++) { + d_data.AntX[i] = h_data.AntX[i]; + d_data.AntY[i] = h_data.AntY[i]; + d_data.AntZ[i] = h_data.AntZ[i]; + d_data.minF[i] = h_data.minF[i]; + } + + + + for (long i = 0; i < h_data.Nfft; i++) { + d_data.r_vec[i] = h_data.r_vec[i]; + + } + + double* x_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny); + double* y_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny); + double* z_mat = (double*)mallocCUDAHost(sizeof(double) * h_data.nx * h_data.ny); + + + for (long i = 0; i < h_data.ny; i++) { + for (long j = 0; j < h_data.nx; j++) { + x_mat[i * h_data.nx + j] = h_data.x_mat[i * h_data.nx + j]; + y_mat[i * h_data.nx + j] = h_data.y_mat[i * h_data.nx + j]; + z_mat[i * h_data.nx + j] = h_data.z_mat[i * h_data.nx + j]; + } + } + + double* h_freq = (double*)mallocCUDAHost(sizeof(double) * h_data.Nfft); + for (long i = 0; i < h_data.Nfft; i++) { + h_freq[i] = h_data.Freq[i]; + } + HostToDevice(h_freq, d_data.Freq, sizeof(double) * h_data.Nfft); + HostToDevice(x_mat, d_data.x_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image X Copy finished!!!\n"); + HostToDevice(y_mat, d_data.y_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Y Copy finished!!!\n"); + HostToDevice(z_mat, d_data.z_mat, sizeof(double) * h_data.nx * h_data.ny); printf("image Z Copy finished!!!\n"); + + HostToDevice(h_data.phdata, d_data.phdata, sizeof(cuComplex) * h_data.Nfft * h_data.Np); printf("image echo Copy finished!!!\n"); + HostToDevice(h_data.im_final, d_data.im_final, sizeof(cuComplex) * h_data.nx * h_data.ny); printf("image data Copy finished!!!\n"); + FreeCUDAHost(h_freq); + FreeCUDAHost(x_mat); + FreeCUDAHost(y_mat); + FreeCUDAHost(z_mat); + + +} + //int main() { // GPUDATA h_data, d_data; // @@ -301,4 +631,5 @@ void BPBasic0(GPUDATA& h_data) // freeGPUData(d_data); // // return 0; -//} \ No newline at end of file +//} + \ No newline at end of file diff --git a/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cuh b/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cuh index 47857e2..c971640 100644 --- a/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cuh +++ b/Toolbox/SimulationSARTool/SimulationSAR/BPBasic0_CUDA.cuh @@ -56,11 +56,15 @@ extern "C" { extern "C" { // 函数废弃,因为这个成像精度不够 - //void bpBasic0CUDA_single(GPUDATA_single& data, int flag, float* h_R = nullptr); - //void initGPUData_single(GPUDATA_single& h_data, GPUDATA_single& d_data); - //void freeGPUData_single(GPUDATA_single& d_data); - //void freeHostData_single(GPUDATA_single& d_data); - //void BPBasic0_single(GPUDATA_single& h_data); + void bpBasic0CUDA_single(GPUDATA_single& data, int flag, float* h_R = nullptr); + void initGPUData_single(GPUDATA_single& h_data, GPUDATA_single& d_data); + void freeGPUData_single(GPUDATA_single& d_data); + void freeHostData_single(GPUDATA_single& d_data); + void BPBasic0_single(GPUDATA_single& h_data); + + void copy_Host_Single_GPUData(GPUDATA_single& h_data, GPUDATA& d_data); + + }; diff --git a/Toolbox/SimulationSARTool/SimulationSARTool.vcxproj b/Toolbox/SimulationSARTool/SimulationSARTool.vcxproj index 455e87e..d538eab 100644 --- a/Toolbox/SimulationSARTool/SimulationSARTool.vcxproj +++ b/Toolbox/SimulationSARTool/SimulationSARTool.vcxproj @@ -41,7 +41,7 @@ Unicode - DynamicLibrary + Application v143 false true diff --git a/Toolbox/SimulationSARTool/UnitTestMain.cpp b/Toolbox/SimulationSARTool/UnitTestMain.cpp index 7ed4763..1d1c4e6 100644 --- a/Toolbox/SimulationSARTool/UnitTestMain.cpp +++ b/Toolbox/SimulationSARTool/UnitTestMain.cpp @@ -15,6 +15,7 @@ #include "BPBasic0_CUDA.cuh" #include "ImageOperatorBase.h" #include "GPUBpSimulation.cuh" +#include "GPUDouble32.cuh" #include @@ -120,11 +121,19 @@ void testSimualtionEchoPoint() { /** 7. 构建回波 **************************************************************************************************/ GPUDATA d_data; initGPUData(h_data, d_data); + + long targetStep = dem_rowCount * dem_ColCount / 100; + for (long i = 0; i < dem_rowCount * dem_ColCount; i = i + targetStep) { + float Tx = h_data.x_mat[i], + Ty = h_data.y_mat[i], + Tz = h_data.z_mat[i]; + float Tslx = h_data.x_mat[i], Tsly = h_data.y_mat[i], Tslz = h_data.z_mat[i]; - RFPCProcess(Tx, Ty, Tz, - Tslx, Tsly, Tslz, // 目标的坡面向量 - p1, p2, p3, p4, p5, p6, - d_data); + RFPCProcess(Tx, Ty, Tz, + Tslx, Tsly, Tslz, // 目标的坡面向量 + p1, p2, p3, p4, p5, p6, + d_data); + } /** 8. 展示回波 **************************************************************************************************/ @@ -404,405 +413,641 @@ void testBpImage() { } -// -// -//void testSimualtionEchoPoint_single() { -// GPUDATA_single h_data{}; -// /** 1. 轨道 **************************************************************************************************/ -// qDebug() << u8"1.轨道文件读取中。。。"; -// QString inGPSPath = u8"C:\\Users\\30453\\Desktop\\script\\data\\GF3_Simulation.gpspos.data"; -// long gpspoints = gdalImage(inGPSPath).height; -// std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); -// h_data.AntX = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// h_data.AntY = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// h_data.AntZ = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// -// for (long i = 0; i < gpspoints; i = i + 1) { -// h_data.AntX[i] = antpos.get()[i].Px; -// h_data.AntY[i] = antpos.get()[i].Py; -// h_data.AntZ[i] = antpos.get()[i].Pz; -// } -// //gpspoints = gpspoints / 2; -// qDebug() << "GPS points Count:\t" << gpspoints; -// qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); -// qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") -// .arg(h_data.AntX[gpspoints - 1]) -// .arg(h_data.AntY[gpspoints - 1]) -// .arg(h_data.AntZ[gpspoints - 1]); -// /** 2. 成像网格 **************************************************************************************************/ -// qDebug() << u8"轨道文件读取结束\n2.成像网格读取。。。"; -// QString demxyzPath = u8"C:\\Users\\30453\\Desktop\\script\\已修改GF3场景\\data\\demxyz.bin"; -// gdalImage demgridimg(demxyzPath); -// long dem_rowCount = demgridimg.height; -// long dem_ColCount = demgridimg.width; -// std::shared_ptr demX = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// std::shared_ptr demY = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// std::shared_ptr demZ = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// -// h_data.x_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// h_data.y_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// h_data.z_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// -// for (long i = 0; i < dem_rowCount; i++) { -// for (long j = 0; j < dem_ColCount; j++) { -// h_data.x_mat[i * dem_ColCount + j] = demX.get()[i * dem_ColCount + j]; -// h_data.y_mat[i * dem_ColCount + j] = demY.get()[i * dem_ColCount + j]; -// h_data.z_mat[i * dem_ColCount + j] = demZ.get()[i * dem_ColCount + j]; -// } -// } -// qDebug() << "dem row Count:\t" << dem_rowCount; -// qDebug() << "dem col Count:\t" << dem_ColCount; -// -// qDebug() << u8"成像网格读取结束"; -// /** 3. 频率网格 **************************************************************************************************/ -// qDebug() << u8"3.处理频率"; -// float centerFreq = 5.3e9; -// float bandwidth = 40e6; -// long freqpoints = 2048; -// std::shared_ptr freqlist(getFreqPoints_mallocHost_single(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints), FreeCUDAHost); -// std::shared_ptr minF(new float[gpspoints], delArrPtr); -// for (long i = 0; i < gpspoints; i++) { -// minF.get()[i] = freqlist.get()[0]; -// } -// h_data.deltaF = bandwidth / (freqpoints - 1); -// qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; -// qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; -// qDebug() << "freq points:\t" << freqpoints; -// qDebug() << "delta freq:\t" << freqlist.get()[1] - freqlist.get()[0]; -// qDebug() << u8"频率结束"; -// /** 4. 初始化回波 **************************************************************************************************/ -// qDebug() << u8"4.初始化回波"; -// std::shared_ptr phdata(createEchoPhase_mallocHost(gpspoints, freqpoints), FreeCUDAHost); -// qDebug() << "Azimuth Points:\t" << gpspoints; -// qDebug() << "Range Points:\t" << freqpoints; -// qDebug() << u8"初始化回波结束"; -// /** 5. 初始化图像 **************************************************************************************************/ -// qDebug() << u8"5.初始化图像"; -// h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * dem_rowCount * dem_ColCount); -// qDebug() << "Azimuth Points:\t" << gpspoints; -// qDebug() << "Range Points:\t" << freqpoints; -// qDebug() << u8"初始化图像结束"; -// -// /** 6. 模型参数初始化 **************************************************************************************************/ -// qDebug() << u8"6.模型参数初始化"; -// -// h_data.nx = dem_ColCount; -// h_data.ny = dem_rowCount; -// -// h_data.Np = gpspoints; -// h_data.Freq = freqlist.get(); -// h_data.minF = minF.get(); -// h_data.Nfft = freqpoints; -// h_data.K = h_data.Nfft; -// h_data.phdata = phdata.get(); -// h_data.R0 = 900000; -// qDebug() << u8"模型参数结束"; -// -// -// /** 7. 目标 **************************************************************************************************/ -// float Tx = -2029086.618142, Ty = 4139594.934504, Tz = 4392846.782027; -// float Tslx = -2029086.618142, Tsly = 4139594.934504, Tslz = 4392846.782027; -// float p1 = 1, p2 = 0, p3 = 0, p4 = 0, p5 = 0, p6 = 0; -// -// /** 7. 构建回波 **************************************************************************************************/ -// GPUDATA_single d_data; -// initGPUData_single(h_data, d_data); -// -// RFPCProcess_single(Tx, Ty, Tz, -// Tslx, Tsly, Tslz, // 目标的坡面向量 -// p1, p2, p3, p4, p5, p6, -// d_data); -// -// -// /** 8. 展示回波 **************************************************************************************************/ -// { -// DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// cuComplex* h_ifftphdata = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// cuComplex* d_ifftphdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// CUDAIFFT(d_data.phdata, d_ifftphdata, d_data.Np, d_data.Nfft, d_data.Nfft); -// FFTShift1D(d_ifftphdata, d_data.Np, d_data.Nfft); -// DeviceToHost(h_ifftphdata, d_ifftphdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); -// { -// for (long i = 0; i < d_data.Np; i++) { -// for (long j = 0; j < d_data.Nfft; j++) { -// ifftdata.get()[i * d_data.Nfft + j] = -// std::complex( -// h_ifftphdata[i * d_data.Nfft + j].x, -// h_ifftphdata[i * d_data.Nfft + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("echo_ifft_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); -// -// FreeCUDADevice(d_ifftphdata); -// FreeCUDAHost(h_ifftphdata); -// -// } -// /** 9. 成像 **************************************************************************************************/ -// -// // 计算maxWr(需要先计算deltaF) -// float deltaF = h_data.deltaF; // 从输入参数获取 -// float maxWr = 299792458.0f / (2.0f * deltaF); -// qDebug() << "maxWr :\t" << maxWr; -// float* r_vec_host = (float*)mallocCUDAHost(sizeof(float) * h_data.Nfft);// new float[data.Nfft]; -// const float step = maxWr / h_data.Nfft; -// const float start = -1 * h_data.Nfft / 2.0f * step; -// printf("nfft=%d\n", h_data.Nfft); -// -// for (int i = 0; i < h_data.Nfft; ++i) { -// r_vec_host[i] = start + i * step; -// } -// -// h_data.r_vec = r_vec_host; -// d_data.r_vec = h_data.r_vec; -// -// qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; -// qDebug() << "r_vec step:\t" << step; -// -// bpBasic0CUDA_single(d_data, 0); -// -// DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); -// -// { -// DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); -// { -// for (long i = 0; i < d_data.Np; i++) { -// for (long j = 0; j < d_data.Nfft; j++) { -// ifftdata.get()[i * d_data.Nfft + j] = -// std::complex( -// h_data.phdata[i * d_data.Nfft + j].x, -// h_data.phdata[i * d_data.Nfft + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("echo_ifft_BPBasic_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); -// -// -// -// std::shared_ptr> im_finals(new std::complex[d_data.nx * d_data.ny], delArrPtr); -// { -// for (long i = 0; i < d_data.ny; i++) { -// for (long j = 0; j < d_data.nx; j++) { -// im_finals.get()[i * d_data.nx + j] = std::complex( -// h_data.im_final[i * d_data.nx + j].x, -// h_data.im_final[i * d_data.nx + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("im_finals_single.bin"), im_finals.get(), d_data.ny, d_data.nx); -// } -//} -// -// -//void testBpImage_single() { -// -// GPUDATA_single h_data{}; -// /** 0. 轨道 **************************************************************************************************/ -// QString echoPath = "D:\\Programme\\vs2022\\RasterMergeTest\\LAMPCAE_SCANE-all-scane\\GF3_Simulation.xml"; -// std::shared_ptr echoL0ds(new EchoL0Dataset); -// echoL0ds->Open(echoPath); -// qDebug() << u8"加载回拨文件:\t" << echoPath; -// -// /** 1. 轨道 **************************************************************************************************/ -// qDebug() << u8"1.轨道文件读取中。。。"; -// QString inGPSPath = echoL0ds->getGPSPointFilePath(); -// long gpspoints = gdalImage(inGPSPath).height; -// std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); -// h_data.AntX = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// h_data.AntY = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// h_data.AntZ = (float*)mallocCUDAHost(sizeof(float) * gpspoints); -// -// for (long i = 0; i < gpspoints; i = i + 1) { -// h_data.AntX[i] = antpos.get()[i].Px; -// h_data.AntY[i] = antpos.get()[i].Py; -// h_data.AntZ[i] = antpos.get()[i].Pz; -// } -// //gpspoints = gpspoints / 2; -// qDebug() << "GPS points Count:\t" << gpspoints; -// qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); -// qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") -// .arg(h_data.AntX[gpspoints - 1]) -// .arg(h_data.AntY[gpspoints - 1]) -// .arg(h_data.AntZ[gpspoints - 1]); -// /** 2. 成像网格 **************************************************************************************************/ -// qDebug() << u8"轨道文件读取结束\n2.成像网格读取。。。"; -// QString demxyzPath = u8"D:\\Programme\\vs2022\\RasterMergeTest\\simulationData\\demdataset\\demxyz.bin"; -// gdalImage demgridimg(demxyzPath); -// long dem_rowCount = demgridimg.height; -// long dem_ColCount = demgridimg.width; -// std::shared_ptr demX = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// std::shared_ptr demY = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// std::shared_ptr demZ = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); -// -// h_data.x_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// h_data.y_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// h_data.z_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); -// -// for (long i = 0; i < dem_rowCount; i++) { -// for (long j = 0; j < dem_ColCount; j++) { -// h_data.x_mat[i * dem_ColCount + j] = demX.get()[i * dem_ColCount + j]; -// h_data.y_mat[i * dem_ColCount + j] = demY.get()[i * dem_ColCount + j]; -// h_data.z_mat[i * dem_ColCount + j] = demZ.get()[i * dem_ColCount + j]; -// } -// } -// qDebug() << "dem row Count:\t" << dem_rowCount; -// qDebug() << "dem col Count:\t" << dem_ColCount; -// -// qDebug() << u8"成像网格读取结束"; -// /** 3. 频率网格 **************************************************************************************************/ -// qDebug() << u8"3.处理频率"; -// float centerFreq = echoL0ds->getCenterFreq(); -// float bandwidth = echoL0ds->getBandwidth(); -// size_t freqpoints = echoL0ds->getPlusePoints(); -// std::shared_ptr freqlist(getFreqPoints_mallocHost_single(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints), FreeCUDAHost); -// std::shared_ptr minF(new float[gpspoints], delArrPtr); -// for (long i = 0; i < gpspoints; i++) { -// minF.get()[i] = freqlist.get()[0]; -// } -// h_data.deltaF = bandwidth / (freqpoints - 1); -// qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; -// qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; -// qDebug() << "freq points:\t" << freqpoints; -// qDebug() << "delta freq:\t" << freqlist.get()[1] - freqlist.get()[0]; -// qDebug() << u8"频率结束"; -// /** 4. 初始化回波 **************************************************************************************************/ -// qDebug() << u8"4.初始化回波"; -// std::shared_ptr> echoData = echoL0ds->getEchoArr(); -// size_t echosize = sizeof(cuComplex) * gpspoints * freqpoints; -// qDebug() << "echo data size (byte) :\t" << echosize; -// h_data.phdata = (cuComplex*)mallocCUDAHost(echosize); -// shared_complexPtrToHostCuComplex(echoData.get(), h_data.phdata, gpspoints * freqpoints); -// -// qDebug() << "Azimuth Points:\t" << gpspoints; -// qDebug() << "Range Points:\t" << freqpoints; -// qDebug() << u8"初始化回波结束"; -// /** 5. 初始化图像 **************************************************************************************************/ -// qDebug() << u8"5.初始化图像"; -// h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * dem_rowCount * dem_ColCount); -// qDebug() << "Azimuth Points:\t" << gpspoints; -// qDebug() << "Range Points:\t" << freqpoints; -// qDebug() << u8"初始化图像结束"; -// -// /** 6. 模型参数初始化 **************************************************************************************************/ -// qDebug() << u8"6.模型参数初始化"; -// -// h_data.nx = dem_ColCount; -// h_data.ny = dem_rowCount; -// -// h_data.Np = gpspoints; -// h_data.Freq = freqlist.get(); -// h_data.minF = minF.get(); -// h_data.Nfft = freqpoints; -// h_data.K = h_data.Nfft; -// -// h_data.R0 = echoL0ds->getRefPhaseRange(); -// qDebug() << u8"模型参数结束"; -// -// -// /** 7. 目标 **************************************************************************************************/ -// double Tx = -2029086.618142, Ty = 4139594.934504, Tz = 4392846.782027; -// double Tslx = -2029086.618142, Tsly = 4139594.934504, Tslz = 4392846.782027; -// double p1 = 33.3, p2 = 0, p3 = 0, p4 = 0, p5 = 0, p6 = 0; -// -// /** 7. 构建回波 **************************************************************************************************/ -// GPUDATA_single d_data; -// initGPUData_single(h_data, d_data); -// -// /** 8. 展示回波 **************************************************************************************************/ -// { -// DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// cuComplex* h_ifftphdata = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// cuComplex* d_ifftphdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// CUDAIFFT(d_data.phdata, d_ifftphdata, d_data.Np, d_data.Nfft, d_data.Nfft); -// FFTShift1D(d_ifftphdata, d_data.Np, d_data.Nfft); -// DeviceToHost(h_ifftphdata, d_ifftphdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); -// { -// for (long i = 0; i < d_data.Np; i++) { -// for (long j = 0; j < d_data.Nfft; j++) { -// ifftdata.get()[i * d_data.Nfft + j] = -// std::complex( -// h_ifftphdata[i * d_data.Nfft + j].x, -// h_ifftphdata[i * d_data.Nfft + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("echo_ifft_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); -// -// FreeCUDADevice(d_ifftphdata); -// FreeCUDAHost(h_ifftphdata); -// -// } -// /** 9. 成像 **************************************************************************************************/ -// -// // 计算maxWr(需要先计算deltaF) -// double deltaF = h_data.deltaF; // 从输入参数获取 -// double maxWr = 299792458.0f / (2.0f * deltaF); -// qDebug() << "maxWr :\t" << maxWr; -// float* r_vec_host = (float*)mallocCUDAHost(sizeof(float) * h_data.Nfft);// new double[data.Nfft]; -// const double step = maxWr / h_data.Nfft; -// const double start = -1 * h_data.Nfft / 2.0f * step; -// printf("nfft=%d\n", h_data.Nfft); -// -// for (int i = 0; i < h_data.Nfft; ++i) { -// r_vec_host[i] = start + i * step; -// } -// -// h_data.r_vec = r_vec_host; -// d_data.r_vec = h_data.r_vec; -// -// qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; -// qDebug() << "r_vec step:\t" << step; -// -// bpBasic0CUDA_single(d_data, 0); -// -// DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); -// { -// DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); -// std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); -// { -// for (long i = 0; i < d_data.Np; i++) { -// for (long j = 0; j < d_data.Nfft; j++) { -// ifftdata.get()[i * d_data.Nfft + j] = -// std::complex( -// h_data.phdata[i * d_data.Nfft + j].x, -// h_data.phdata[i * d_data.Nfft + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("echo_ifft_BPBasic_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); -// -// std::shared_ptr> im_finals(new std::complex[d_data.nx * d_data.ny], delArrPtr); -// { -// for (long i = 0; i < d_data.ny; i++) { -// for (long j = 0; j < d_data.nx; j++) { -// im_finals.get()[i * d_data.nx + j] = std::complex( -// h_data.im_final[i * d_data.nx + j].x, -// h_data.im_final[i * d_data.nx + j].y); -// } -// } -// } -// testOutComplexDoubleArr(QString("im_finals_single.bin"), im_finals.get(), d_data.ny, d_data.nx); -// } -// -// -// -//} -// + + +void testSimualtionEchoPoint_single() { + GPUDATA_single h_data{}; + /** 1. 轨道 **************************************************************************************************/ + qDebug() << u8"1.轨道文件读取中。。。"; + QString inGPSPath = u8"C:\\Users\\30453\\Desktop\\script\\data\\GF3_Simulation.gpspos.data"; + long gpspoints = gdalImage(inGPSPath).height; + std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); + h_data.AntX = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntY = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntZ = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + + for (long i = 0; i < gpspoints; i = i + 1) { + h_data.AntX[i] = antpos.get()[i].Px; + h_data.AntY[i] = antpos.get()[i].Py; + h_data.AntZ[i] = antpos.get()[i].Pz; + } + //gpspoints = gpspoints / 2; + qDebug() << "GPS points Count:\t" << gpspoints; + qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); + qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") + .arg(h_data.AntX[gpspoints - 1]) + .arg(h_data.AntY[gpspoints - 1]) + .arg(h_data.AntZ[gpspoints - 1]); + /** 2. 成像网格 **************************************************************************************************/ + qDebug() << u8"轨道文件读取结束\n2.成像网格读取。。。"; + QString demxyzPath = u8"C:\\Users\\30453\\Desktop\\script\\已修改GF3场景\\data\\demxyz.bin"; + gdalImage demgridimg(demxyzPath); + long dem_rowCount = demgridimg.height; + long dem_ColCount = demgridimg.width; + std::shared_ptr demX = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demY = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demZ = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + + h_data.x_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.y_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.z_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + + for (long i = 0; i < dem_rowCount; i++) { + for (long j = 0; j < dem_ColCount; j++) { + h_data.x_mat[i * dem_ColCount + j] = demX.get()[i * dem_ColCount + j]; + h_data.y_mat[i * dem_ColCount + j] = demY.get()[i * dem_ColCount + j]; + h_data.z_mat[i * dem_ColCount + j] = demZ.get()[i * dem_ColCount + j]; + } + } + qDebug() << "dem row Count:\t" << dem_rowCount; + qDebug() << "dem col Count:\t" << dem_ColCount; + + qDebug() << u8"成像网格读取结束"; + /** 3. 频率网格 **************************************************************************************************/ + qDebug() << u8"3.处理频率"; + float centerFreq = 5.3e9; + float bandwidth = 40e6; + long freqpoints = 2048; + std::shared_ptr freqlist(getFreqPoints_mallocHost_single(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints), FreeCUDAHost); + std::shared_ptr minF(new float[gpspoints], delArrPtr); + for (long i = 0; i < gpspoints; i++) { + minF.get()[i] = freqlist.get()[0]; + } + h_data.deltaF = bandwidth / (freqpoints - 1); + qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; + qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; + qDebug() << "freq points:\t" << freqpoints; + qDebug() << "delta freq:\t" << freqlist.get()[1] - freqlist.get()[0]; + qDebug() << u8"频率结束"; + /** 4. 初始化回波 **************************************************************************************************/ + qDebug() << u8"4.初始化回波"; + std::shared_ptr phdata(createEchoPhase_mallocHost(gpspoints, freqpoints), FreeCUDAHost); + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化回波结束"; + /** 5. 初始化图像 **************************************************************************************************/ + qDebug() << u8"5.初始化图像"; + h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * dem_rowCount * dem_ColCount); + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化图像结束"; + + /** 6. 模型参数初始化 **************************************************************************************************/ + qDebug() << u8"6.模型参数初始化"; + + h_data.nx = dem_ColCount; + h_data.ny = dem_rowCount; + + h_data.Np = gpspoints; + h_data.Freq = freqlist.get(); + h_data.minF = minF.get(); + h_data.Nfft = freqpoints; + h_data.K = h_data.Nfft; + h_data.phdata = phdata.get(); + h_data.R0 = 900000; + qDebug() << u8"模型参数结束"; + + + /** 7. 目标 **************************************************************************************************/ + float Tx = -2029086.618142, Ty = 4139594.934504, Tz = 4392846.782027; + float Tslx = -2029086.618142, Tsly = 4139594.934504, Tslz = 4392846.782027; + float p1 = 1, p2 = 0, p3 = 0, p4 = 0, p5 = 0, p6 = 0; + + /** 7. 构建回波 **************************************************************************************************/ + GPUDATA_single d_data; + initGPUData_single(h_data, d_data); + + RFPCProcess_single(Tx, Ty, Tz, + Tslx, Tsly, Tslz, // 目标的坡面向量 + p1, p2, p3, p4, p5, p6, + d_data); + + + /** 8. 展示回波 **************************************************************************************************/ + { + DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* h_ifftphdata = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* d_ifftphdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + CUDAIFFT(d_data.phdata, d_ifftphdata, d_data.Np, d_data.Nfft, d_data.Nfft); + FFTShift1D(d_ifftphdata, d_data.Np, d_data.Nfft); + DeviceToHost(h_ifftphdata, d_ifftphdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_ifftphdata[i * d_data.Nfft + j].x, + h_ifftphdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); + + FreeCUDADevice(d_ifftphdata); + FreeCUDAHost(h_ifftphdata); + + } + /** 9. 成像 **************************************************************************************************/ + + // 计算maxWr(需要先计算deltaF) + float deltaF = h_data.deltaF; // 从输入参数获取 + float maxWr = 299792458.0f / (2.0f * deltaF); + qDebug() << "maxWr :\t" << maxWr; + float* r_vec_host = (float*)mallocCUDAHost(sizeof(float) * h_data.Nfft);// new float[data.Nfft]; + const float step = maxWr / h_data.Nfft; + const float start = -1 * h_data.Nfft / 2.0f * step; + printf("nfft=%d\n", h_data.Nfft); + + for (int i = 0; i < h_data.Nfft; ++i) { + r_vec_host[i] = start + i * step; + } + + h_data.r_vec = r_vec_host; + d_data.r_vec = h_data.r_vec; + + qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; + qDebug() << "r_vec step:\t" << step; + + bpBasic0CUDA_single(d_data, 0); + + DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); + + { + DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_data.phdata[i * d_data.Nfft + j].x, + h_data.phdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_BPBasic_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); -// -//int main(int argc, char* argv[]) { -// -// QApplication a(argc, argv); -// -// return 0; -//} -// -// + std::shared_ptr> im_finals(new std::complex[d_data.nx * d_data.ny], delArrPtr); + { + for (long i = 0; i < d_data.ny; i++) { + for (long j = 0; j < d_data.nx; j++) { + im_finals.get()[i * d_data.nx + j] = std::complex( + h_data.im_final[i * d_data.nx + j].x, + h_data.im_final[i * d_data.nx + j].y); + } + } + } + testOutComplexDoubleArr(QString("im_finals_single.bin"), im_finals.get(), d_data.ny, d_data.nx); + } +} + + +void testBpImage_single() { + + GPUDATA_single h_data{}; + /** 0. 轨道 **************************************************************************************************/ + QString echoPath = "D:\\Programme\\vs2022\\RasterMergeTest\\LAMPCAE_SCANE-all-scane\\GF3_Simulation.xml"; + std::shared_ptr echoL0ds(new EchoL0Dataset); + echoL0ds->Open(echoPath); + qDebug() << u8"加载回拨文件:\t" << echoPath; + + /** 1. 轨道 **************************************************************************************************/ + qDebug() << u8"1.轨道文件读取中。。。"; + QString inGPSPath = echoL0ds->getGPSPointFilePath(); + long gpspoints = gdalImage(inGPSPath).height; + std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); + h_data.AntX = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntY = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntZ = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + + for (long i = 0; i < gpspoints; i = i + 1) { + h_data.AntX[i] = antpos.get()[i].Px; + h_data.AntY[i] = antpos.get()[i].Py; + h_data.AntZ[i] = antpos.get()[i].Pz; + } + //gpspoints = gpspoints / 2; + qDebug() << "GPS points Count:\t" << gpspoints; + qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); + qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") + .arg(h_data.AntX[gpspoints - 1]) + .arg(h_data.AntY[gpspoints - 1]) + .arg(h_data.AntZ[gpspoints - 1]); + /** 2. 成像网格 **************************************************************************************************/ + qDebug() << u8"轨道文件读取结束\n2.成像网格读取。。。"; + QString demxyzPath = u8"D:\\Programme\\vs2022\\RasterMergeTest\\simulationData\\demdataset\\demxyz.bin"; + gdalImage demgridimg(demxyzPath); + long dem_rowCount = demgridimg.height; + long dem_ColCount = demgridimg.width; + std::shared_ptr demX = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demY = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demZ = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + + h_data.x_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.y_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.z_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + + for (long i = 0; i < dem_rowCount; i++) { + for (long j = 0; j < dem_ColCount; j++) { + h_data.x_mat[i * dem_ColCount + j] = demX.get()[i * dem_ColCount + j]; + h_data.y_mat[i * dem_ColCount + j] = demY.get()[i * dem_ColCount + j]; + h_data.z_mat[i * dem_ColCount + j] = demZ.get()[i * dem_ColCount + j]; + } + } + qDebug() << "dem row Count:\t" << dem_rowCount; + qDebug() << "dem col Count:\t" << dem_ColCount; + + qDebug() << u8"成像网格读取结束"; + /** 3. 频率网格 **************************************************************************************************/ + qDebug() << u8"3.处理频率"; + float centerFreq = echoL0ds->getCenterFreq(); + float bandwidth = echoL0ds->getBandwidth(); + size_t freqpoints = echoL0ds->getPlusePoints(); + std::shared_ptr freqlist(getFreqPoints_mallocHost_single(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints), FreeCUDAHost); + std::shared_ptr minF(new float[gpspoints], delArrPtr); + for (long i = 0; i < gpspoints; i++) { + minF.get()[i] = freqlist.get()[0]; + } + h_data.deltaF = bandwidth / (freqpoints - 1); + qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; + qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; + qDebug() << "freq points:\t" << freqpoints; + qDebug() << "delta freq:\t" << freqlist.get()[1] - freqlist.get()[0]; + qDebug() << u8"频率结束"; + /** 4. 初始化回波 **************************************************************************************************/ + qDebug() << u8"4.初始化回波"; + std::shared_ptr> echoData = echoL0ds->getEchoArr(); + size_t echosize = sizeof(cuComplex) * gpspoints * freqpoints; + qDebug() << "echo data size (byte) :\t" << echosize; + h_data.phdata = (cuComplex*)mallocCUDAHost(echosize); + shared_complexPtrToHostCuComplex(echoData.get(), h_data.phdata, gpspoints * freqpoints); + + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化回波结束"; + /** 5. 初始化图像 **************************************************************************************************/ + qDebug() << u8"5.初始化图像"; + h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * dem_rowCount * dem_ColCount); + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化图像结束"; + + /** 6. 模型参数初始化 **************************************************************************************************/ + qDebug() << u8"6.模型参数初始化"; + + h_data.nx = dem_ColCount; + h_data.ny = dem_rowCount; + + h_data.Np = gpspoints; + h_data.Freq = freqlist.get(); + h_data.minF = minF.get(); + h_data.Nfft = freqpoints; + h_data.K = h_data.Nfft; + + h_data.R0 = echoL0ds->getRefPhaseRange(); + qDebug() << u8"模型参数结束"; + + + /** 7. 目标 **************************************************************************************************/ + double Tx = -2029086.618142, Ty = 4139594.934504, Tz = 4392846.782027; + double Tslx = -2029086.618142, Tsly = 4139594.934504, Tslz = 4392846.782027; + double p1 = 33.3, p2 = 0, p3 = 0, p4 = 0, p5 = 0, p6 = 0; + + /** 7. 构建回波 **************************************************************************************************/ + GPUDATA_single d_data; + initGPUData_single(h_data, d_data); + + /** 8. 展示回波 **************************************************************************************************/ + { + DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* h_ifftphdata = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* d_ifftphdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + CUDAIFFT(d_data.phdata, d_ifftphdata, d_data.Np, d_data.Nfft, d_data.Nfft); + FFTShift1D(d_ifftphdata, d_data.Np, d_data.Nfft); + DeviceToHost(h_ifftphdata, d_ifftphdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_ifftphdata[i * d_data.Nfft + j].x, + h_ifftphdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); + + FreeCUDADevice(d_ifftphdata); + FreeCUDAHost(h_ifftphdata); + + } + /** 9. 成像 **************************************************************************************************/ + + // 计算maxWr(需要先计算deltaF) + double deltaF = h_data.deltaF; // 从输入参数获取 + double maxWr = 299792458.0f / (2.0f * deltaF); + qDebug() << "maxWr :\t" << maxWr; + float* r_vec_host = (float*)mallocCUDAHost(sizeof(float) * h_data.Nfft);// new double[data.Nfft]; + const double step = maxWr / h_data.Nfft; + const double start = -1 * h_data.Nfft / 2.0f * step; + printf("nfft=%d\n", h_data.Nfft); + + for (int i = 0; i < h_data.Nfft; ++i) { + r_vec_host[i] = start + i * step; + } + + h_data.r_vec = r_vec_host; + d_data.r_vec = h_data.r_vec; + + qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; + qDebug() << "r_vec step:\t" << step; + + bpBasic0CUDA_single(d_data, 0); + + DeviceToHost(h_data.im_final, d_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); + { + DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_data.phdata[i * d_data.Nfft + j].x, + h_data.phdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_BPBasic_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); + + std::shared_ptr> im_finals(new std::complex[d_data.nx * d_data.ny], delArrPtr); + { + for (long i = 0; i < d_data.ny; i++) { + for (long j = 0; j < d_data.nx; j++) { + im_finals.get()[i * d_data.nx + j] = std::complex( + h_data.im_final[i * d_data.nx + j].x, + h_data.im_final[i * d_data.nx + j].y); + } + } + } + testOutComplexDoubleArr(QString("im_finals_single.bin"), im_finals.get(), d_data.ny, d_data.nx); + } +} + + + +void testSimualtionEchoPoint_singleRFPC_doubleImage() { + GPUDATA_single h_data{}; + /** 1. 轨道 **************************************************************************************************/ + qDebug() << u8"1.轨道文件读取中。。。"; + QString inGPSPath = u8"C:\\Users\\30453\\Desktop\\script\\data\\GF3_Simulation.gpspos.data"; + long gpspoints = gdalImage(inGPSPath).height; + std::shared_ptr antpos = SatelliteAntPosOperator::readAntPosFile(inGPSPath, gpspoints); + h_data.AntX = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntY = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + h_data.AntZ = (float*)mallocCUDAHost(sizeof(float) * gpspoints); + + for (long i = 0; i < gpspoints; i = i + 1) { + h_data.AntX[i] = antpos.get()[i].Px; + h_data.AntY[i] = antpos.get()[i].Py; + h_data.AntZ[i] = antpos.get()[i].Pz; + } + //gpspoints = gpspoints / 2; + qDebug() << "GPS points Count:\t" << gpspoints; + qDebug() << "PRF 0:\t " << QString("%1,%2,%3").arg(h_data.AntX[0]).arg(h_data.AntY[0]).arg(h_data.AntZ[0]); + qDebug() << "PRF " << gpspoints - 1 << ":\t " << QString("%1,%2,%3") + .arg(h_data.AntX[gpspoints - 1]) + .arg(h_data.AntY[gpspoints - 1]) + .arg(h_data.AntZ[gpspoints - 1]); + /** 2. 成像网格 **************************************************************************************************/ + qDebug() << u8"轨道文件读取结束\n2.成像网格读取。。。"; + QString demxyzPath = u8"C:\\Users\\30453\\Desktop\\script\\已修改GF3场景\\data\\demxyz.bin"; + gdalImage demgridimg(demxyzPath); + long dem_rowCount = demgridimg.height; + long dem_ColCount = demgridimg.width; + std::shared_ptr demX = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 1, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demY = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 2, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + std::shared_ptr demZ = readDataArr(demgridimg, 0, 0, dem_rowCount, dem_ColCount, 3, GDALREADARRCOPYMETHOD::VARIABLEMETHOD); + + h_data.x_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.y_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + h_data.z_mat = (float*)mallocCUDAHost(sizeof(float) * dem_rowCount * dem_ColCount); + + for (long i = 0; i < dem_rowCount; i++) { + for (long j = 0; j < dem_ColCount; j++) { + h_data.x_mat[i * dem_ColCount + j] = demX.get()[i * dem_ColCount + j]; + h_data.y_mat[i * dem_ColCount + j] = demY.get()[i * dem_ColCount + j]; + h_data.z_mat[i * dem_ColCount + j] = demZ.get()[i * dem_ColCount + j]; + } + } + qDebug() << "dem row Count:\t" << dem_rowCount; + qDebug() << "dem col Count:\t" << dem_ColCount; + + qDebug() << u8"成像网格读取结束"; + /** 3. 频率网格 **************************************************************************************************/ + qDebug() << u8"3.处理频率"; + float centerFreq = 5.3e9; + float bandwidth = 40e6; + long freqpoints = 2048; + std::shared_ptr freqlist(getFreqPoints_mallocHost_single(centerFreq - bandwidth / 2, centerFreq + bandwidth / 2, freqpoints), FreeCUDAHost); + std::shared_ptr minF(new float[gpspoints], delArrPtr); + for (long i = 0; i < gpspoints; i++) { + minF.get()[i] = freqlist.get()[0]; + } + h_data.deltaF = bandwidth / (freqpoints - 1); + qDebug() << "start Freq:\t" << centerFreq - bandwidth / 2; + qDebug() << "end Freq:\t" << centerFreq + bandwidth / 2; + qDebug() << "freq points:\t" << freqpoints; + qDebug() << "delta freq:\t" << freqlist.get()[1] - freqlist.get()[0]; + qDebug() << u8"频率结束"; + /** 4. 初始化回波 **************************************************************************************************/ + qDebug() << u8"4.初始化回波"; + std::shared_ptr phdata(createEchoPhase_mallocHost(gpspoints, freqpoints), FreeCUDAHost); + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化回波结束"; + /** 5. 初始化图像 **************************************************************************************************/ + qDebug() << u8"5.初始化图像"; + h_data.im_final = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * dem_rowCount * dem_ColCount); + qDebug() << "Azimuth Points:\t" << gpspoints; + qDebug() << "Range Points:\t" << freqpoints; + qDebug() << u8"初始化图像结束"; + + /** 6. 模型参数初始化 **************************************************************************************************/ + qDebug() << u8"6.模型参数初始化"; + + h_data.nx = dem_ColCount; + h_data.ny = dem_rowCount; + + h_data.Np = gpspoints; + h_data.Freq = freqlist.get(); + h_data.minF = minF.get(); + h_data.Nfft = freqpoints; + h_data.K = h_data.Nfft; + h_data.phdata = phdata.get(); + h_data.R0 = 900000; + qDebug() << u8"模型参数结束"; + + + /** 7. 目标 **************************************************************************************************/ + + + float p1 = 1, p2 = 0, p3 = 0, p4 = 0, p5 = 0, p6 = 0; + + /** 7. 构建回波 **************************************************************************************************/ + GPUDATA_single d_data; + initGPUData_single(h_data, d_data); + long targetStep = dem_rowCount * dem_ColCount/100; + for (long i = 0; i < dem_rowCount*dem_ColCount; i=i+ targetStep) { + float Tx = h_data.x_mat[i], + Ty = h_data.y_mat[i], + Tz = h_data.z_mat[i]; + float Tslx = h_data.x_mat[i], Tsly = h_data.y_mat[i], Tslz = h_data.z_mat[i]; + + RFPCProcess_single(Tx, Ty, Tz, + Tslx, Tsly, Tslz, // 目标的坡面向量 + p1, p2, p3, p4, p5, p6, + d_data); + } + + /** 8. 展示回波 **************************************************************************************************/ + { + DeviceToHost(h_data.phdata, d_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* h_ifftphdata = (cuComplex*)mallocCUDAHost(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + cuComplex* d_ifftphdata = (cuComplex*)mallocCUDADevice(sizeof(cuComplex) * d_data.Np * d_data.Nfft); + CUDAIFFT(d_data.phdata, d_ifftphdata, d_data.Np, d_data.Nfft, d_data.Nfft); + FFTShift1D(d_ifftphdata, d_data.Np, d_data.Nfft); + DeviceToHost(h_ifftphdata, d_ifftphdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_ifftphdata[i * d_data.Nfft + j].x, + h_ifftphdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_single.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); + + FreeCUDADevice(d_ifftphdata); + FreeCUDAHost(h_ifftphdata); + + } + /** 9. 成像 **************************************************************************************************/ + + // 计算maxWr(需要先计算deltaF) + float deltaF = h_data.deltaF; // 从输入参数获取 + float maxWr = 299792458.0f / (2.0f * deltaF); + qDebug() << "maxWr :\t" << maxWr; + float* r_vec_host = (float*)mallocCUDAHost(sizeof(float) * h_data.Nfft);// new float[data.Nfft]; + const float step = maxWr / h_data.Nfft; + const float start = -1 * h_data.Nfft / 2.0f * step; + printf("nfft=%d\n", h_data.Nfft); + + for (int i = 0; i < h_data.Nfft; ++i) { + r_vec_host[i] = start + i * step; + } + + h_data.r_vec = r_vec_host; + d_data.r_vec = h_data.r_vec; + + qDebug() << "r_vec [0]:\t" << h_data.r_vec[0]; + qDebug() << "r_vec step:\t" << step; + + + // 处理double 类型成像 + GPUDATA g_data{}; + + + copy_Host_Single_GPUData(h_data, g_data); + bpBasic0CUDA(g_data, 0); + + DeviceToHost(h_data.im_final, g_data.im_final, sizeof(cuComplex) * d_data.nx * d_data.ny); + + { + DeviceToHost(h_data.phdata, g_data.phdata, sizeof(cuComplex) * d_data.Np * d_data.Nfft); + std::shared_ptr> ifftdata(new std::complex[d_data.Np * d_data.Nfft], delArrPtr); + { + for (long i = 0; i < d_data.Np; i++) { + for (long j = 0; j < d_data.Nfft; j++) { + ifftdata.get()[i * d_data.Nfft + j] = + std::complex( + h_data.phdata[i * d_data.Nfft + j].x, + h_data.phdata[i * d_data.Nfft + j].y); + } + } + } + testOutComplexDoubleArr(QString("echo_ifft_BPBasic_double.bin"), ifftdata.get(), d_data.Np, d_data.Nfft); + + + + std::shared_ptr> im_finals(new std::complex[d_data.nx * d_data.ny], delArrPtr); + { + for (long i = 0; i < d_data.ny; i++) { + for (long j = 0; j < d_data.nx; j++) { + im_finals.get()[i * d_data.nx + j] = std::complex( + h_data.im_final[i * d_data.nx + j].x, + h_data.im_final[i * d_data.nx + j].y); + } + } + } + testOutComplexDoubleArr(QString("im_finals_double.bin"), im_finals.get(), d_data.ny, d_data.nx); + } +} + + +void test_double32() { + test_double_to_double32(); + test_function(0, "Addition"); + test_function(1, "Subtraction"); + test_function(2, "Multiplication"); + test_function(3, "Division"); + test_function(4, "Sine"); + test_function(5, "Cosine"); + test_function(6, "Log2"); + test_function(7, "Log10"); + test_function(8, "Natural Logarithm"); + test_function(9, "Exponential"); + test_function(10, "Power"); + test_function(11, "Square Root"); + + + + + + time_test_add(); + time_test_sub(); + time_test_mul(); + time_test_div(); + time_test_sin(); + time_test_cos(); + time_test_log2(); + time_test_log10(); + time_test_ln(); + time_test_exp(); + time_test_pow(); + time_test_sqrt(); +} + +/** 性能测试************************************************************************/ + +int main(int argc, char* argv[]) { + + QApplication a(argc, argv); + + + + //testSimualtionEchoPoint(); + //testSimualtionEchoPoint_singleRFPC_doubleImage(); + return 0; +} + +