// // Author: Joshua Cohen // Copyright 2017 // // Note the algorithm has been updated to both tile the input image processing, as well as switch // from column-major Fortran ordering to row-major C++ ordering. For the purposes of this algorithm, // the "image" refers to the full input or output image, whereas the "tile" refers to a block of // between 1 and LINES_PER_TILE output image lines. #include #include #include #include #include #include #include "DataAccessor.h" #include "Constants.h" #include "Poly2d.h" #include "ResampMethods.h" #include "ResampSlc.h" #ifdef GPU_ACC_ENABLED #include "GPUresamp.h" #endif #define LINES_PER_TILE 1000 using std::complex; using std::max; using std::min; using std::vector; ResampSlc::ResampSlc() { rgCarrier = new Poly2d(); azCarrier = new Poly2d(); rgOffsetsPoly = new Poly2d(); azOffsetsPoly = new Poly2d(); dopplerPoly = new Poly2d(); slcInAccessor = 0; slcOutAccessor = 0; residRgAccessor = 0; residAzAccessor = 0; usr_enable_gpu = true; } ResampSlc::ResampSlc(const ResampSlc &rsmp) { rgCarrier = new Poly2d(*rsmp.rgCarrier); azCarrier = new Poly2d(*rsmp.azCarrier); rgOffsetsPoly = new Poly2d(*rsmp.rgOffsetsPoly); azOffsetsPoly = new Poly2d(*rsmp.azOffsetsPoly); dopplerPoly = new Poly2d(*rsmp.dopplerPoly); slcInAccessor = rsmp.slcInAccessor; slcOutAccessor = rsmp.slcOutAccessor; residRgAccessor = rsmp.residRgAccessor; residAzAccessor = rsmp.residAzAccessor; usr_enable_gpu = rsmp.usr_enable_gpu; } ResampSlc::~ResampSlc() { clearPolys(); } void ResampSlc::setRgCarrier(Poly2d *poly) { if (rgCarrier != NULL) delete rgCarrier; rgCarrier = poly; } void ResampSlc::setAzCarrier(Poly2d *poly) { if (azCarrier != NULL) delete azCarrier; azCarrier = poly; } void ResampSlc::setRgOffsets(Poly2d *poly) { if (rgOffsetsPoly != NULL) delete rgOffsetsPoly; rgOffsetsPoly = poly; } void ResampSlc::setAzOffsets(Poly2d *poly) { if (azOffsetsPoly != NULL) delete azOffsetsPoly; azOffsetsPoly = poly; } void ResampSlc::setDoppler(Poly2d *poly) { if (dopplerPoly != NULL) delete dopplerPoly; dopplerPoly = poly; } // * * * * * * * * * NOTE: THESE SHOULD BE USED WITH EXTREME PREJUDICE * * * * * * * * * Poly2d* ResampSlc::releaseRgCarrier() { Poly2d *tmp = rgCarrier; rgCarrier = NULL; return tmp; } Poly2d* ResampSlc::releaseAzCarrier() { Poly2d *tmp = azCarrier; azCarrier = NULL; return tmp; } Poly2d* ResampSlc::releaseRgOffsets() { Poly2d *tmp = rgOffsetsPoly; rgOffsetsPoly = NULL; return tmp; } Poly2d* ResampSlc::releaseAzOffsets() { Poly2d *tmp = azOffsetsPoly; azOffsetsPoly = NULL; return tmp; } Poly2d* ResampSlc::releaseDoppler() { Poly2d *tmp = dopplerPoly; dopplerPoly = NULL; return tmp; } // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * void ResampSlc::clearPolys() { if (rgCarrier != NULL) delete rgCarrier; if (azCarrier != NULL) delete azCarrier; if (rgOffsetsPoly != NULL) delete rgOffsetsPoly; if (azOffsetsPoly != NULL) delete azOffsetsPoly; if (dopplerPoly != NULL) delete dopplerPoly; } void ResampSlc::resetPolys() { clearPolys(); rgCarrier = new Poly2d(); azCarrier = new Poly2d(); rgOffsetsPoly = new Poly2d(); azOffsetsPoly = new Poly2d(); dopplerPoly = new Poly2d(); } void copyPolyToArr(Poly2d *poly, vector &destArr) { // Len of destArr is at least 7 destArr[0] = poly->azimuthOrder; destArr[1] = poly->rangeOrder; destArr[2] = poly->azimuthMean; destArr[3] = poly->rangeMean; destArr[4] = poly->azimuthNorm; destArr[5] = poly->rangeNorm; for (int i=0; i<((destArr[0]+1)*(destArr[1]+1)); i++) destArr[6+i] = poly->coeffs[i]; } void ResampSlc::resamp() { vector residAz(outWidth,0.), residRg(outWidth,0.); double ro, ao, ph, dop, fracr, fraca, t0, k, kk; vector > > chip(SINC_ONE, vector >(SINC_ONE)); vector > imgIn(0); // Linearizing the image so it's easier to pass around vector > imgOut(outWidth,complex(0.,0.)); complex cval; int chipi, chipj, nTiles, lastLines, firstImageRow, lastImageRow, firstTileRow; int imgLine, nRowsInTile, nRowsInBlock; ResampMethods rMethods; DataAccessor *slcInAccObj = (DataAccessor*)slcInAccessor; DataAccessor *slcOutAccObj = (DataAccessor*)slcOutAccessor; DataAccessor *residRgAccObj, *residAzAccObj; if (residRgAccessor != 0) residRgAccObj = (DataAccessor*)residRgAccessor; else residRgAccObj = NULL; if (residAzAccessor != 0) residAzAccObj = (DataAccessor*)residAzAccessor; else residAzAccObj = NULL; #ifndef GPU_ACC_ENABLED usr_enable_gpu = false; #endif // Moving this here so we don't waste any time if (!isComplex) { printf("Real data interpolation not implemented yet.\n"); return; } t0 = omp_get_wtime(); printf("\n << Resample one image to another image coordinates >> \n\n"); printf("Input Image Dimensions: %6d lines, %6d pixels\n\n", inLength, inWidth); printf("Output Image Dimensions: %6d lines, %6d pixels\n\n", outLength, outWidth); printf("Number of threads: %d\n", omp_get_max_threads()); printf("Complex data interpolation\n"); rMethods.prepareMethods(SINC_METHOD); printf("Azimuth Carrier Poly\n"); azCarrier->printPoly(); printf("Range Carrier Poly\n"); rgCarrier->printPoly(); printf("Range Offsets Poly\n"); rgOffsetsPoly->printPoly(); printf("Azimuth Offsets Poly\n"); azOffsetsPoly->printPoly(); // Determine number of tiles needed to process image nTiles = outLength / LINES_PER_TILE; lastLines = outLength - (nTiles * LINES_PER_TILE); printf("Resampling in %d tile(s) of %d line(s)", nTiles, LINES_PER_TILE); if (lastLines > 0) { printf(", with a final tile containing %d line(s)", lastLines); } printf("\n"); // For each full tile of LINES_PER_TILE lines... for (int tile=0; tilegetLine((char *)&residAz[0], i); // Read in azimuth residual if it exists for (int j=0; jeval(i+1, j+1) + residAz[j]; // Evaluate net azimuth offset of each pixel in row //imgLine = int(i + ao + 1) - SINC_HALF; // Calculate corresponding minimum line idx of input image imgLine = int(i+ao) - SINC_HALF; firstImageRow = min(firstImageRow, imgLine); // Set the first input image line idx to the smallest value } } firstImageRow = max(firstImageRow, 0); // firstImageRow now has the lowest image row called in the tile processing lastImageRow = 0; // Initialize to first image row for (int i=(firstTileRow+LINES_PER_TILE-40); i<(firstTileRow+LINES_PER_TILE); i++) { // Iterate over last 40 lines of tile if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i); // Read in azimuth residual for (int j=0; jeval(i+1, j+1) + residAz[j]; // Evaluate net azimuth offset of each pixel in row //imgLine = int(i + ao + 1) + SINC_LEN - SINC_HALF; // Calculate corresponding maximum line idx of input image // (note includes the SINC_LEN added later) imgLine = int(i+ao) + SINC_HALF; lastImageRow = max(lastImageRow, imgLine); // Set last input image line idx to the largest value } } lastImageRow = min(lastImageRow, inLength-1); // lastImageRow now has the highest image row called in the tile processing nRowsInBlock = lastImageRow - firstImageRow + 1; // Number of rows in imgIn (NOT TILE) // Resize the image tile to the necessary number of lines if necessary using value-initialization resizing (automatically resizes/initializes new rows) if (imgIn.size() < size_t(nRowsInBlock*inWidth)) imgIn.resize(nRowsInBlock*inWidth); for (int i=0; igetLine((char *)&(imgIn[IDX1D(i,0,inWidth)]), firstImageRow+i); // Sets imgIn[0] == master_image[firstImageRow] // Remove the carriers using OpenMP acceleration #pragma omp parallel for private(ph) for (int j=0; jeval(firstImageRow+i+1,j+1) + azCarrier->eval(firstImageRow+i+1,j+1), 2.*M_PI); // Evaluate the pixel's carrier imgIn[IDX1D(i,j,inWidth)] = imgIn[IDX1D(i,j,inWidth)] * complex(cos(ph), -sin(ph)); // Remove the carrier } } // Loop over lines printf("Interpolating tile %d\n", tile); if (usr_enable_gpu) { #ifdef GPU_ACC_ENABLED vector > imgOutBlock(LINES_PER_TILE*outWidth); vector residAzBlock(1,0.); // Dummy length to use for GPU double *residAzPtr = 0; if (residAzAccessor != 0) { residAzBlock.resize(LINES_PER_TILE*outWidth); for (int i=0; igetLineSequential((char *)&residAzBlock[i*LINES_PER_TILE]); residAzPtr = &residAzBlock[0]; } vector residRgBlock(1,0.); double *residRgPtr = 0; if (residRgAccessor != 0) { residRgBlock.resize(LINES_PER_TILE*outWidth); for (int i=0; igetLineSequential((char *)&residRgBlock[i*LINES_PER_TILE]); residRgPtr = &residRgBlock[0]; } vector azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6); vector rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6); vector dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6); vector azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6); vector rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6); copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...] copyPolyToArr(rgOffsetsPoly, rgOffPolyArr); copyPolyToArr(dopplerPoly, dopPolyArr); copyPolyToArr(azCarrier, azCarPolyArr); copyPolyToArr(rgCarrier, rgCarPolyArr); double gpu_inputs_d[6]; int gpu_inputs_i[8]; gpu_inputs_d[0] = wvl; gpu_inputs_d[1] = refwvl; gpu_inputs_d[2] = r0; gpu_inputs_d[3] = refr0; gpu_inputs_d[4] = slr; gpu_inputs_d[5] = refslr; gpu_inputs_i[0] = inLength; gpu_inputs_i[1] = inWidth; gpu_inputs_i[2] = outWidth; gpu_inputs_i[3] = firstImageRow; gpu_inputs_i[4] = firstTileRow; gpu_inputs_i[5] = nRowsInBlock; gpu_inputs_i[6] = LINES_PER_TILE; gpu_inputs_i[7] = int(flatten); // 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels) runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr, &azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0], &(rMethods.fintp[0])); for (int i=0; isetLineSequential((char *)&imgOutBlock[i*outWidth]); #endif } else { // Interpolation of the complex image. Note that we don't need to make very many changes to the original code in this loop // since the i-index should numerically match the original i-index for (int i=firstTileRow; i<(firstTileRow+LINES_PER_TILE); i++) { // GetLineSequential is fine here, we don't need specific lines, just continue grabbing them if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]); if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]); #pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \ firstprivate(chip) for (int j=0; jeval(i+1,j+1) + residAz[j]; ro = rgOffsetsPoly->eval(i+1,j+1) + residRg[j]; fraca = modf(i+ao, &k); if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue; fracr = modf(j+ro, &kk); if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue; dop = dopplerPoly->eval(i+1,j+1); // Data chip without the carriers for (int ii=0; ii(cos((ii-4.)*dop), -sin((ii-4.)*dop)); for (int jj=0; jjeval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro); // Flatten the carrier if the user wants to if (flatten) { ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) + ((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl))); } ph = modulo_f(ph, 2.*M_PI); cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD); imgOut[j] = cval * complex(cos(ph), sin(ph)); } slcOutAccObj->setLineSequential((char *)&imgOut[0]); } } } // And if there is a final partial tile... if (lastLines > 0) { firstTileRow = nTiles * LINES_PER_TILE; nRowsInTile = outLength - firstTileRow + 1; // NOT EQUIVALENT TO NUMBER OF ROWS IN IMAGE BLOCK printf("Reading in image data for final partial tile\n"); firstImageRow = outLength - 1; for (int i=firstTileRow; igetLine((char *)&residAz[0], i); for (int j=0; jeval(i+1, j+1) + residAz[j]; imgLine = int(i+ao) - SINC_HALF; firstImageRow = min(firstImageRow, imgLine); } } firstImageRow = max(firstImageRow, 0); lastImageRow = 0; for (int i=max(firstTileRow,firstTileRow+nRowsInTile-40); i<(firstTileRow+nRowsInTile); i++) { // Make sure if nRowsInTile < 40 to not read too many lines if (residAzAccessor != 0) residAzAccObj->getLine((char *)&residAz[0], i); for (int j=0; jeval(i+1, j+1) + residAz[j]; imgLine = int(i+ao) + SINC_HALF; lastImageRow = max(lastImageRow, imgLine); } } lastImageRow = min(lastImageRow, inLength-1); nRowsInBlock = lastImageRow - firstImageRow + 1; if (imgIn.size() < size_t(nRowsInBlock*inWidth)) imgIn.resize(nRowsInBlock*inWidth); for (int i=0; igetLine((char *)&(imgIn[IDX1D(i,0,inWidth)]), firstImageRow+i); #pragma omp parallel for private(ph) for (int j=0; jeval(firstImageRow+i+1,j+1) + azCarrier->eval(firstImageRow+i+1,j+1), 2.*M_PI); imgIn[IDX1D(i,j,inWidth)] = imgIn[IDX1D(i,j,inWidth)] * complex(cos(ph), -sin(ph)); } } printf("Interpolating final partial tile\n"); if (usr_enable_gpu) { #ifdef GPU_ACC_ENABLED vector > imgOutBlock(nRowsInTile*outWidth); vector residAzBlock(1,0.); // Dummy length to use for GPU double *residAzPtr = 0; if (residAzAccessor != 0) { residAzBlock.resize(nRowsInTile*outWidth); for (int i=0; igetLineSequential((char *)&residAzBlock[i*nRowsInTile]); residAzPtr = &residAzBlock[0]; } vector residRgBlock(1,0.); double *residRgPtr = 0; if (residRgAccessor != 0) { residRgBlock.resize(nRowsInTile*outWidth); for (int i=0; igetLineSequential((char *)&residRgBlock[i*nRowsInTile]); residRgPtr = &residRgBlock[0]; } vector azOffPolyArr(((azOffsetsPoly->azimuthOrder+1)*(azOffsetsPoly->rangeOrder+1))+6); vector rgOffPolyArr(((rgOffsetsPoly->azimuthOrder+1)*(rgOffsetsPoly->rangeOrder+1))+6); vector dopPolyArr(((dopplerPoly->azimuthOrder+1)*(dopplerPoly->rangeOrder+1))+6); vector azCarPolyArr(((azCarrier->azimuthOrder+1)*(azCarrier->rangeOrder+1))+6); vector rgCarPolyArr(((rgCarrier->azimuthOrder+1)*(rgCarrier->rangeOrder+1))+6); copyPolyToArr(azOffsetsPoly, azOffPolyArr); // arrs are: [azord, rgord, azmean, rgmean, aznorm, rgnorm, coeffs...] copyPolyToArr(rgOffsetsPoly, rgOffPolyArr); copyPolyToArr(dopplerPoly, dopPolyArr); copyPolyToArr(azCarrier, azCarPolyArr); copyPolyToArr(rgCarrier, rgCarPolyArr); double gpu_inputs_d[6]; int gpu_inputs_i[8]; gpu_inputs_d[0] = wvl; gpu_inputs_d[1] = refwvl; gpu_inputs_d[2] = r0; gpu_inputs_d[3] = refr0; gpu_inputs_d[4] = slr; gpu_inputs_d[5] = refslr; gpu_inputs_i[0] = inLength; gpu_inputs_i[1] = inWidth; gpu_inputs_i[2] = outWidth; gpu_inputs_i[3] = firstImageRow; gpu_inputs_i[4] = firstTileRow; gpu_inputs_i[5] = nRowsInBlock; gpu_inputs_i[6] = nRowsInTile; gpu_inputs_i[7] = int(flatten); // 1000 lines per tile is peanuts for this algorithm to run (for test set is only 20M pixels) runGPUResamp(gpu_inputs_d, gpu_inputs_i, (void*)&imgIn[0], (void*)&imgOutBlock[0], residAzPtr, residRgPtr, &azOffPolyArr[0], &rgOffPolyArr[0], &dopPolyArr[0], &azCarPolyArr[0], &rgCarPolyArr[0], &(rMethods.fintp[0])); for (int i=0; isetLineSequential((char *)&imgOutBlock[i*outWidth]); #endif } else { for (int i=firstTileRow; i<(firstTileRow+nRowsInTile); i++) { if (residAzAccessor != 0) residAzAccObj->getLineSequential((char *)&residAz[0]); if (residRgAccessor != 0) residRgAccObj->getLineSequential((char *)&residRg[0]); #pragma omp parallel for private(ro,ao,fracr,fraca,ph,cval,dop,chipi,chipj,k,kk) \ firstprivate(chip) for (int j=0; jeval(i+1,j+1) + residRg[j]; ao = azOffsetsPoly->eval(i+1,j+1) + residAz[j]; fraca = modf(i+ao, &k); if ((k < SINC_HALF) || (k >= (inLength-SINC_HALF))) continue; fracr = modf(j+ro, &kk); if ((kk < SINC_HALF) || (kk >= (inWidth-SINC_HALF))) continue; dop = dopplerPoly->eval(i+1,j+1); // Data chip without the carriers for (int ii=0; ii(cos((ii-4.)*dop), -sin((ii-4.)*dop)); for (int jj=0; jjeval(i+ao,j+ro) + azCarrier->eval(i+ao,j+ro); // Flatten the carrier if the user wants to if (flatten) { ph = ph + ((4. * (M_PI / wvl)) * ((r0 - refr0) + (j * (slr - refslr)) + (ro * slr))) + ((4. * M_PI * (refr0 + (j * refslr))) * ((1. / refwvl) - (1. / wvl))); } ph = modulo_f(ph, 2.*M_PI); cval = rMethods.interpolate_cx(chip,(SINC_HALF+1),(SINC_HALF+1),fraca,fracr,SINC_ONE,SINC_ONE,SINC_METHOD); imgOut[j] = cval * complex(cos(ph), sin(ph)); } slcOutAccObj->setLineSequential((char *)&imgOut[0]); } } } printf("Elapsed time: %f\n", (omp_get_wtime()-t0)); }