2024-11-29 15:32:50 +00:00
|
|
|
# RasterProcessTool
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern "C" void AddCUDA(void* aPtr, void* bptr, void* cptr, long member, LAMPGPUDATETYPE datetype) {
|
|
|
|
int blockSize = 256; // 每个块的线程数
|
|
|
|
int numBlocks = (member + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// CUDA 核函数
|
|
|
|
__global__ void computeDistanceAndEchoID(float* antPx, float* antPy, float* antPz,
|
|
|
|
float* img_x, float* img_y, float* img_z,
|
|
|
|
complexfloat* echopluse, complexfloat* imgarr,
|
|
|
|
long rowcount, long colcount, long prfid,
|
|
|
|
float Rnear, float fs, float factorj) {
|
|
|
|
|
|
|
|
long idx = blockIdx.x * blockDim.x + threadIdx.x;
|
|
|
|
// 确保线程索引有效
|
|
|
|
if (idx < rowcount * colcount) {
|
|
|
|
|
|
|
|
// 计算距离
|
|
|
|
float dx = antPx[prfid] - img_x[idx];
|
|
|
|
float dy = antPy[prfid] - img_y[idx];
|
|
|
|
float dz = antPz[prfid] - img_z[idx];
|
|
|
|
float imgR = sqrt(dx * dx + dy * dy + dz * dz);
|
|
|
|
|
|
|
|
// 计算 EchoID
|
|
|
|
long echoID = floor(((imgR - Rnear) * 2 / LIGHTSPEED) * fs);//回波坐标
|
|
|
|
float Rftj = imgR * factorj; // 校正
|
|
|
|
//printf("%d:(%f,%f),%f,%f |||||, %f, %f, %f, %f, %f, %f, %f\n", idx, echopluse[echoID].real, Rftj, imgR, fs,
|
|
|
|
// antPx[prfid], antPy[prfid], antPz[prfid], img_x[idx], img_y[idx], img_z[idx], imgR);
|
|
|
|
if (echoID < 0 || echoID >= colcount) {
|
|
|
|
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
complexfloat Rphi{ 0,Rftj };
|
|
|
|
Rphi = expComplex(Rphi);
|
|
|
|
imgarr[idx] = addComplex(imgarr[idx], mulComplex(echopluse[echoID], Rphi));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void TBPImageGPUBlock(float* antPx, float* antPy, float* antPz, float* img_x, float* img_y, float* img_z,
|
|
|
|
std::shared_ptr<std::complex<double>> echoArr, long prfcount, long plusecount,
|
|
|
|
std::shared_ptr<std::complex<double>> imageArr,
|
|
|
|
float freq, float fs, float Rnear, float Rfar, float factorj,
|
|
|
|
long startline, long stepline,
|
|
|
|
long rowcount, long colcount) {
|
|
|
|
|
|
|
|
long pixelcount = rowcount * colcount;
|
|
|
|
complexfloat* h_echopluse;
|
|
|
|
complexfloat* h_imgarr;
|
|
|
|
|
|
|
|
cudaMallocHost(&h_echopluse, sizeof(float) * 2 * plusecount); // 单个传感器的位置
|
|
|
|
cudaMallocHost(&h_imgarr, sizeof(float) * 2 * stepline * colcount);
|
|
|
|
|
|
|
|
for (long i = 0; i < stepline; i++) {
|
|
|
|
long rid = startline + i;
|
|
|
|
for (long j = 0; j < colcount; j++) {
|
|
|
|
h_imgarr[i * colcount + j].real = imageArr.get()[rid * colcount + j].real();
|
|
|
|
h_imgarr[i * colcount + j].imag = imageArr.get()[rid * colcount + j].imag();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::cout << "h_imgarr init finished!!" << std::endl;
|
|
|
|
|
|
|
|
|
|
|
|
float* h_antPx, * h_antPy, * h_antPz;
|
|
|
|
cudaMallocHost(&h_antPx, sizeof(float) * prfcount); // 单个传感器的位置
|
|
|
|
cudaMallocHost(&h_antPy, sizeof(float) * prfcount);
|
|
|
|
cudaMallocHost(&h_antPz, sizeof(float) * prfcount);
|
|
|
|
|
|
|
|
// 初始化
|
|
|
|
for (long i = 0; i < prfcount; i++) {
|
|
|
|
h_antPx[i] = antPx[i];
|
|
|
|
h_antPy[i] = antPy[i];
|
|
|
|
h_antPz[i] = antPz[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
float* h_img_x, * h_img_y, * h_img_z;
|
|
|
|
cudaMallocHost(&h_img_x, sizeof(float) * stepline * colcount);
|
|
|
|
cudaMallocHost(&h_img_y, sizeof(float) * stepline * colcount);
|
|
|
|
cudaMallocHost(&h_img_z, sizeof(float) * stepline * colcount);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 初始化
|
|
|
|
long rid = 0;
|
|
|
|
for (long i = 0; i < stepline; i++) {
|
|
|
|
rid = startline + i;
|
|
|
|
for (long j = 0; j < colcount; j++) {
|
|
|
|
h_img_x[i * colcount + j] = img_x[rid * colcount + j];
|
|
|
|
h_img_y[i * colcount + j] = img_y[rid * colcount + j];
|
|
|
|
h_img_z[i * colcount + j] = img_z[rid * colcount + j];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
std::cout << "h_img_x init finished!!" << std::endl;
|
|
|
|
|
|
|
|
// 分配设备内存
|
|
|
|
float* d_antPx, * d_antPy, * d_antPz, * d_img_x, * d_img_y, * d_img_z;
|
|
|
|
|
|
|
|
complexfloat* d_echopluse;
|
|
|
|
complexfloat* d_imgarr;
|
|
|
|
|
|
|
|
cudaMalloc(&d_echopluse, sizeof(float) * 2 * plusecount);
|
|
|
|
cudaMalloc(&d_imgarr, sizeof(float) * 2 * stepline * colcount);
|
|
|
|
cudaMalloc(&d_antPx, sizeof(float) * prfcount);
|
|
|
|
cudaMalloc(&d_antPy, sizeof(float) * prfcount);
|
|
|
|
cudaMalloc(&d_antPz, sizeof(float) * prfcount);
|
|
|
|
cudaMalloc(&d_img_x, sizeof(float) * rowcount * colcount);
|
|
|
|
cudaMalloc(&d_img_y, sizeof(float) * rowcount * colcount);
|
|
|
|
cudaMalloc(&d_img_z, sizeof(float) * rowcount * colcount);
|
|
|
|
|
|
|
|
|
|
|
|
// 将数据从主机拷贝到设备
|
|
|
|
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_antPy, h_antPy, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_antPz, h_antPz, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_img_x, h_img_x, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_img_y, h_img_y, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_img_z, h_img_z, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
|
|
|
|
cudaMemcpy(d_imgarr, h_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyHostToDevice);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int blockSize = 256; // 每个块的线程数
|
|
|
|
int numBlocks = (pixelcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
|
|
|
|
long eid = 0;
|
|
|
|
std::complex<double> Rphi;
|
|
|
|
for (long prfid = 0; prfid < prfcount; prfid++) {
|
|
|
|
|
|
|
|
for (long i = 0; i < plusecount; i++) {
|
|
|
|
h_echopluse[i].real = echoArr.get()[prfid * plusecount + i].real();
|
|
|
|
h_echopluse[i].imag = echoArr.get()[prfid * plusecount + i].imag();
|
|
|
|
}
|
|
|
|
|
|
|
|
cudaMemcpy(d_echopluse, h_echopluse, sizeof(float) * 2 * plusecount, cudaMemcpyHostToDevice);
|
|
|
|
|
|
|
|
computeDistanceAndEchoID << < numBlocks, blockSize >> > (d_antPx, d_antPy, d_antPz,
|
|
|
|
d_img_x, d_img_y, d_img_z,
|
|
|
|
d_echopluse, d_imgarr,
|
|
|
|
rowcount, colcount, prfid,
|
|
|
|
Rnear, fs, factorj);
|
|
|
|
cudaDeviceSynchronize();// 等待所有设备任务完成
|
|
|
|
if (prfid % 100 == 0) {
|
|
|
|
//std::cout << "\rprf " << prfid <<"/"<< prfcount << "\t\t\t";
|
|
|
|
}
|
|
|
|
//cudaMemcpy(h_echopluse, d_echopluse, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);
|
|
|
|
|
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
// GPU -> CPU
|
|
|
|
cudaMemcpy(h_imgarr, d_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);
|
|
|
|
|
|
|
|
for (long i = 0; i < stepline; i++) {
|
|
|
|
long rid = startline + i;
|
|
|
|
for (long j = 0; j < colcount; j++) {
|
|
|
|
imageArr.get()[rid * colcount + j] = std::complex<double>(h_imgarr[i * colcount + j].real, h_imgarr[i * colcount + j].imag);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// 清理资源
|
|
|
|
cudaFree(d_antPx);
|
|
|
|
cudaFree(d_antPy);
|
|
|
|
cudaFree(d_antPz);
|
|
|
|
cudaFree(d_img_x);
|
|
|
|
cudaFree(d_img_y);
|
|
|
|
cudaFree(d_img_z);
|
|
|
|
|
|
|
|
cudaFree(d_echopluse);
|
|
|
|
cudaFree(d_imgarr);
|
|
|
|
|
|
|
|
cudaFreeHost(h_antPx);
|
|
|
|
cudaFreeHost(h_antPy);
|
|
|
|
cudaFreeHost(h_antPz);
|
|
|
|
cudaFreeHost(h_img_x);
|
|
|
|
cudaFreeHost(h_img_y);
|
|
|
|
cudaFreeHost(h_img_z);
|
|
|
|
cudaFreeHost(h_echopluse);
|
|
|
|
cudaFreeHost(h_imgarr);
|
|
|
|
std::cout << "end GPU" << std::endl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void RTPC(float* antx, float* anty, float* antz,
|
|
|
|
float* demx, float* demy, float* demz,
|
|
|
|
float* demslopex, float* demslopey, float* demslopez
|
|
|
|
) {}
|
|
|
|
|
|
|
|
|
|
|
|
|