RasterProcessTool/README.md

239 lines
6.5 KiB
Markdown
Raw Normal View History

# RasterProcessTool
extern "C" void AddCUDA(void* aPtr, void* bptr, void* cptr, long member, LAMPGPUDATETYPE datetype) {
int blockSize = 256; // 每个块的线程数
int numBlocks = (member + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
}
// CUDA 核函数
__global__ void computeDistanceAndEchoID(float* antPx, float* antPy, float* antPz,
float* img_x, float* img_y, float* img_z,
complexfloat* echopluse, complexfloat* imgarr,
long rowcount, long colcount, long prfid,
float Rnear, float fs, float factorj) {
long idx = blockIdx.x * blockDim.x + threadIdx.x;
// 确保线程索引有效
if (idx < rowcount * colcount) {
// 计算距离
float dx = antPx[prfid] - img_x[idx];
float dy = antPy[prfid] - img_y[idx];
float dz = antPz[prfid] - img_z[idx];
float imgR = sqrt(dx * dx + dy * dy + dz * dz);
// 计算 EchoID
long echoID = floor(((imgR - Rnear) * 2 / LIGHTSPEED) * fs);//回波坐标
float Rftj = imgR * factorj; // 校正
//printf("%d:(%f,%f),%f,%f |||||, %f, %f, %f, %f, %f, %f, %f\n", idx, echopluse[echoID].real, Rftj, imgR, fs,
// antPx[prfid], antPy[prfid], antPz[prfid], img_x[idx], img_y[idx], img_z[idx], imgR);
if (echoID < 0 || echoID >= colcount) {
}
else {
complexfloat Rphi{ 0,Rftj };
Rphi = expComplex(Rphi);
imgarr[idx] = addComplex(imgarr[idx], mulComplex(echopluse[echoID], Rphi));
}
}
}
void TBPImageGPUBlock(float* antPx, float* antPy, float* antPz, float* img_x, float* img_y, float* img_z,
std::shared_ptr<std::complex<double>> echoArr, long prfcount, long plusecount,
std::shared_ptr<std::complex<double>> imageArr,
float freq, float fs, float Rnear, float Rfar, float factorj,
long startline, long stepline,
long rowcount, long colcount) {
long pixelcount = rowcount * colcount;
complexfloat* h_echopluse;
complexfloat* h_imgarr;
cudaMallocHost(&h_echopluse, sizeof(float) * 2 * plusecount); // 单个传感器的位置
cudaMallocHost(&h_imgarr, sizeof(float) * 2 * stepline * colcount);
for (long i = 0; i < stepline; i++) {
long rid = startline + i;
for (long j = 0; j < colcount; j++) {
h_imgarr[i * colcount + j].real = imageArr.get()[rid * colcount + j].real();
h_imgarr[i * colcount + j].imag = imageArr.get()[rid * colcount + j].imag();
}
}
std::cout << "h_imgarr init finished!!" << std::endl;
float* h_antPx, * h_antPy, * h_antPz;
cudaMallocHost(&h_antPx, sizeof(float) * prfcount); // 单个传感器的位置
cudaMallocHost(&h_antPy, sizeof(float) * prfcount);
cudaMallocHost(&h_antPz, sizeof(float) * prfcount);
// 初始化
for (long i = 0; i < prfcount; i++) {
h_antPx[i] = antPx[i];
h_antPy[i] = antPy[i];
h_antPz[i] = antPz[i];
}
float* h_img_x, * h_img_y, * h_img_z;
cudaMallocHost(&h_img_x, sizeof(float) * stepline * colcount);
cudaMallocHost(&h_img_y, sizeof(float) * stepline * colcount);
cudaMallocHost(&h_img_z, sizeof(float) * stepline * colcount);
// 初始化
long rid = 0;
for (long i = 0; i < stepline; i++) {
rid = startline + i;
for (long j = 0; j < colcount; j++) {
h_img_x[i * colcount + j] = img_x[rid * colcount + j];
h_img_y[i * colcount + j] = img_y[rid * colcount + j];
h_img_z[i * colcount + j] = img_z[rid * colcount + j];
}
}
std::cout << "h_img_x init finished!!" << std::endl;
// 分配设备内存
float* d_antPx, * d_antPy, * d_antPz, * d_img_x, * d_img_y, * d_img_z;
complexfloat* d_echopluse;
complexfloat* d_imgarr;
cudaMalloc(&d_echopluse, sizeof(float) * 2 * plusecount);
cudaMalloc(&d_imgarr, sizeof(float) * 2 * stepline * colcount);
cudaMalloc(&d_antPx, sizeof(float) * prfcount);
cudaMalloc(&d_antPy, sizeof(float) * prfcount);
cudaMalloc(&d_antPz, sizeof(float) * prfcount);
cudaMalloc(&d_img_x, sizeof(float) * rowcount * colcount);
cudaMalloc(&d_img_y, sizeof(float) * rowcount * colcount);
cudaMalloc(&d_img_z, sizeof(float) * rowcount * colcount);
// 将数据从主机拷贝到设备
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPx, h_antPx, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPy, h_antPy, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_antPz, h_antPz, sizeof(float) * prfcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_x, h_img_x, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_y, h_img_y, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_img_z, h_img_z, sizeof(float) * rowcount * colcount, cudaMemcpyHostToDevice);
cudaMemcpy(d_imgarr, h_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyHostToDevice);
int blockSize = 256; // 每个块的线程数
int numBlocks = (pixelcount + blockSize - 1) / blockSize; // 根据 pixelcount 计算网格大小
long eid = 0;
std::complex<double> Rphi;
for (long prfid = 0; prfid < prfcount; prfid++) {
for (long i = 0; i < plusecount; i++) {
h_echopluse[i].real = echoArr.get()[prfid * plusecount + i].real();
h_echopluse[i].imag = echoArr.get()[prfid * plusecount + i].imag();
}
cudaMemcpy(d_echopluse, h_echopluse, sizeof(float) * 2 * plusecount, cudaMemcpyHostToDevice);
computeDistanceAndEchoID << < numBlocks, blockSize >> > (d_antPx, d_antPy, d_antPz,
d_img_x, d_img_y, d_img_z,
d_echopluse, d_imgarr,
rowcount, colcount, prfid,
Rnear, fs, factorj);
cudaDeviceSynchronize();// 等待所有设备任务完成
if (prfid % 100 == 0) {
//std::cout << "\rprf " << prfid <<"/"<< prfcount << "\t\t\t";
}
//cudaMemcpy(h_echopluse, d_echopluse, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);
}
std::cout << std::endl;
// GPU -> CPU
cudaMemcpy(h_imgarr, d_imgarr, sizeof(float) * 2 * stepline * colcount, cudaMemcpyDeviceToHost);
for (long i = 0; i < stepline; i++) {
long rid = startline + i;
for (long j = 0; j < colcount; j++) {
imageArr.get()[rid * colcount + j] = std::complex<double>(h_imgarr[i * colcount + j].real, h_imgarr[i * colcount + j].imag);
}
}
// 清理资源
cudaFree(d_antPx);
cudaFree(d_antPy);
cudaFree(d_antPz);
cudaFree(d_img_x);
cudaFree(d_img_y);
cudaFree(d_img_z);
cudaFree(d_echopluse);
cudaFree(d_imgarr);
cudaFreeHost(h_antPx);
cudaFreeHost(h_antPy);
cudaFreeHost(h_antPz);
cudaFreeHost(h_img_x);
cudaFreeHost(h_img_y);
cudaFreeHost(h_img_z);
cudaFreeHost(h_echopluse);
cudaFreeHost(h_imgarr);
std::cout << "end GPU" << std::endl;
}
void RTPC(float* antx, float* anty, float* antz,
float* demx, float* demy, float* demz,
float* demslopex, float* demslopey, float* demslopez
) {}