From 8673550293009ba357785b97682631d1daea12a1 Mon Sep 17 00:00:00 2001 From: Zhang Yunjun Date: Sat, 9 Mar 2019 17:43:19 -0500 Subject: [PATCH] support PIXEL/ALOS1 .gz files uncompressFile.py: support multiple sub-layers of folders uncompressing prepRawALOS.py: 1. add get_ALOS_ALP_name() to support the compressed ALOS file that is not named with "ALP*", such as the ones from PIXEL 2. remove duplicated inputDir, outputDir and rmfile, as they are in the Namespace inps object already. 3. move the inputDir and outputDir abspath from main() to cmdLineParse() 4. basic formating adjustment --- .gitignore | 1 + contrib/stack/stripmapStack/prepRawALOS.py | 100 ++++++++++++------ contrib/stack/stripmapStack/uncompressFile.py | 25 +++-- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/.gitignore b/.gitignore index 04bb3a3..a9a21d1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.pyc *~ *.swp +*.DS_Store __pycache__ .sconf_temp .sconsign.dblite diff --git a/contrib/stack/stripmapStack/prepRawALOS.py b/contrib/stack/stripmapStack/prepRawALOS.py index 49eba28..5269e30 100755 --- a/contrib/stack/stripmapStack/prepRawALOS.py +++ b/contrib/stack/stripmapStack/prepRawALOS.py @@ -1,25 +1,32 @@ #!/usr/bin/env python3 # David Bekaert + + import os import glob import argparse -from uncompressFile import uncompressfile import shutil +import tarfile +import zipfile +from uncompressFile import uncompressfile + def createParser(): ''' Create command line parser. ''' - parser = argparse.ArgumentParser(description='Prepare ALOS raw processing (unzip/untar files, organize in date folders, generate script to unpack into isce formats).') - parser.add_argument('-i', '--input', dest='input', type=str, required=True, + parser = argparse.ArgumentParser(description='Prepare ALOS raw processing (unzip/untar files, ' + 'organize in date folders, generate script to unpack into isce formats).') + parser.add_argument('-i', '--input', dest='inputDir', type=str, required=True, help='directory with the raw data') parser.add_argument('-rmfile', '--rmfile', dest='rmfile',action='store_true', default=False, - help='Optional: remove zip/tar/compressed files after unpacking into date structure (default is to keep in archive fo lder)') - parser.add_argument('-o', '--output', dest='output', type=str, required=False, + help='Optional: remove zip/tar/compressed files after unpacking into date structure ' + '(default is to keep in archive fo lder)') + parser.add_argument('-o', '--output', dest='outputDir', type=str, required=False, help='output directory where data needs to be unpacked into isce format (for script generation).') - parser.add_argument('-t', '--text_cmd', dest='text_cmd', type=str, default='source ~/.bash_profile;' - , help='text command to be added to the beginning of each line of the run files. Default: source ~/.bash_profile;') + parser.add_argument('-t', '--text_cmd', dest='text_cmd', type=str, default='source ~/.bash_profile;', + help='text command to be added to the beginning of each line of the run files. Default: source ~/.bash_profile;') return parser @@ -29,7 +36,15 @@ def cmdLineParse(iargs=None): ''' parser = createParser() - return parser.parse_args(args = iargs) + inps = parser.parse_args(args = iargs) + + # parsing required inputs + inps.inputDir = os.path.abspath(inps.inputDir) + # parsing optional inputs + if inps.outputDir: + inps.outputDir = os.path.abspath(inps.outputDir) + return inps + def get_Date(ALOSfolder): @@ -56,46 +71,60 @@ def get_Date(ALOSfolder): acquisitionDate = 'FAIL' return successflag, acquisitionDate + +def get_ALOS_ALP_name(infile): + """Get the ALPSRP075780620 name from compress file in various format.""" + outname = None + fbase = os.path.basename(infile) + if fbase.startswith("ALP"): + outname = fbase.split("-")[0] + else: + fext = os.path.splitext(infile)[1] + if fext in ['.tar', '.gz']: + with tarfile.open(infile, 'r') as tar: + file_list = tar.getnames() + elif fext in ['.zip']: + with zipfile.ZipFile(infile, 'r') as z: + file_list = z.namelist() + else: + raise ValueError('unrecognized file extension: {}'.format(fext)) + led_file = [i for i in file_list if 'LED' in i][0] + led_file = os.path.basename(led_file) + outname = [i for i in led_file.split("-") if 'ALP' in i][0] + return outname + + def main(iargs=None): ''' The main driver. ''' inps = cmdLineParse(iargs) - # parsing required inputs - inputDir = os.path.abspath(inps.input) - # parsing optional inputs - if inps.output: - outputDir = os.path.abspath(inps.output) - else: - outputDir = None - rmfile = inps.rmfile # filename of the runfile run_unPack = 'run_unPackALOS' # loop over the different folder, ALOS zip/tar files and unzip them, make the names consistent - ALOS_extensions = (os.path.join(inputDir, 'ALP*.zip'),os.path.join(inputDir, 'ALP*.tar'),os.path.join(inputDir, 'ALP*.gz')) + ALOS_extensions = (os.path.join(inps.inputDir, '*.zip'), + os.path.join(inps.inputDir, '*.tar'), + os.path.join(inps.inputDir, '*.gz')) for ALOS_extension in ALOS_extensions: + # loop over zip/tar files ALOS_filesfolders = glob.glob(ALOS_extension) for ALOS_infilefolder in ALOS_filesfolders: ## the path to the folder/zip workdir = os.path.dirname(ALOS_infilefolder) - + ## get the output name folder without any extensions - temp = os.path.basename(ALOS_infilefolder) - # trim the extensions and keep only very first part - parts = temp.split(".") - parts = parts[0].split('-') - ALOS_outfolder = parts[0] + ALOS_outfolder = get_ALOS_ALP_name(ALOS_infilefolder) # add the path back in - ALOS_outfolder = os.path.join(workdir,ALOS_outfolder) - + ALOS_outfolder = os.path.join(workdir, ALOS_outfolder) + # loop over two cases (either file or folder): ### this is a file, try to unzip/untar it if os.path.isfile(ALOS_infilefolder): # unzip the file in the outfolder - successflag_unzip = uncompressfile(ALOS_infilefolder,ALOS_outfolder) + successflag_unzip = uncompressfile(ALOS_infilefolder, ALOS_outfolder) # put failed files in a seperate directory if not successflag_unzip: @@ -104,7 +133,7 @@ def main(iargs=None): os.rename(ALOS_infilefolder,os.path.join(workdir,'FAILED_FILES','.')) else: # check if file needs to be removed or put in archive folder - if rmfile: + if inps.rmfile: os.remove(ALOS_infilefolder) print('Deleting: ' + ALOS_infilefolder) else: @@ -116,9 +145,10 @@ def main(iargs=None): # loop over the different ALOS folders and make sure the folder names are consistent. # this step is not needed unless the user has manually unzipped data before. - ALOS_folders = glob.glob(os.path.join(inputDir, 'ALP*')) + ALOS_folders = glob.glob(os.path.join(inps.inputDir, 'ALP*')) for ALOS_folder in ALOS_folders: - # in case the user has already unzipped some files, make sure they are unzipped similar like the uncompressfile code + # in case the user has already unzipped some files + # make sure they are unzipped similar like the uncompressfile code temp = os.path.basename(ALOS_folder) parts = temp.split(".") parts = parts[0].split('-') @@ -134,7 +164,7 @@ def main(iargs=None): # loop over the different ALOS folders and organize in date folders - ALOS_folders = glob.glob(os.path.join(inputDir, 'ALP*')) + ALOS_folders = glob.glob(os.path.join(inps.inputDir, 'ALP*')) for ALOS_folder in ALOS_folders: # get the date successflag, imgDate = get_Date(ALOS_folder) @@ -160,14 +190,14 @@ def main(iargs=None): # now generate the unpacking script for all the date dirs - dateDirs = glob.glob(os.path.join(inputDir,'2*')) - if outputDir is not None: + dateDirs = glob.glob(os.path.join(inps.inputDir,'2*')) + if inps.outputDir is not None: f = open(run_unPack,'w') for dataDir in dateDirs: AlosFiles = glob.glob(os.path.join(dataDir, 'ALP*')) if len(AlosFiles)>0: acquisitionDate = os.path.basename(dataDir) - slcDir = os.path.join(outputDir, acquisitionDate) + slcDir = os.path.join(inps.outputDir, acquisitionDate) if not os.path.exists(slcDir): os.makedirs(slcDir) cmd = 'unpackFrame_ALOS_raw.py -i ' + os.path.abspath(dataDir) + ' -o ' + slcDir @@ -179,9 +209,9 @@ def main(iargs=None): print (cmd) f.write(inps.text_cmd + cmd+'\n') f.close() + return + if __name__ == '__main__': main() - - diff --git a/contrib/stack/stripmapStack/uncompressFile.py b/contrib/stack/stripmapStack/uncompressFile.py index b25a2a5..bbb51c4 100755 --- a/contrib/stack/stripmapStack/uncompressFile.py +++ b/contrib/stack/stripmapStack/uncompressFile.py @@ -2,12 +2,14 @@ # David Bekaert -import zipfile + import os import glob import argparse -import tarfile import shutil +import tarfile +import zipfile + def createParser(): ''' @@ -50,6 +52,8 @@ def main(iargs=None): print('Done') elif completeFlag == False: print('Failed') + return + def uncompressfile(inputFile,outputDir): @@ -89,7 +93,7 @@ def uncompressfile(inputFile,outputDir): temp, extension = os.path.splitext(inputFile) # File update - print('File: ', inputFile, ' to ', outputDir) + print('File: ', inputFile, ' to ', outputDir) if extension == '.zip': ZIP = zipfile.ZipFile(inputFile) @@ -105,7 +109,7 @@ def uncompressfile(inputFile,outputDir): # Check if the data is unpacked in its own folder folderfiles = glob.glob(os.path.join(outputDir,'*')) - if len(folderfiles)==1: + while len(folderfiles)==1: # get the sub-folder name only tempdir = os.path.basename(folderfiles[0]) if os.path.isdir(folderfiles[0]): @@ -114,27 +118,30 @@ def uncompressfile(inputFile,outputDir): os.rename(folderfiles[0],tempdir2) os.rmdir(outputDir) os.rename(tempdir2,outputDir) + folderfiles = glob.glob(os.path.join(outputDir,'*')) return completeFlag + elif extension == '.tar' or extension == '.gz': TAR = tarfile.open(inputFile) - + # first test the tar is in good condition try: TAR.extractall(outputDir) TAR.close() completeFlag = True - # Check if the data is unpacked in its own folder + # Check if the data is unpacked in its own folder or its sub-folders folderfiles = glob.glob(os.path.join(outputDir,'*')) - if len(folderfiles)==1: - # get the sub-folder name only + while len(folderfiles) == 1: + # get the sub-folder name only tempdir = os.path.basename(folderfiles[0]) if os.path.isdir(folderfiles[0]): # it seems there is a subfolder, will copy the content in the parent - tempdir2=os.path.join(workdir,tempdir + '.temp') + tempdir2 = os.path.join(workdir, tempdir + '.temp') os.rename(folderfiles[0],tempdir2) os.rmdir(outputDir) os.rename(tempdir2,outputDir) + folderfiles = glob.glob(os.path.join(outputDir,'*')) return completeFlag except: print('Tar file seems to be corrupted, abord...')