support PIXEL/ALOS1 .gz files

uncompressFile.py: support multiple sub-layers of folders uncompressing

prepRawALOS.py:
1. add get_ALOS_ALP_name() to support the compressed ALOS file that is not named with "ALP*", such as the ones from PIXEL
2. remove duplicated inputDir, outputDir and rmfile, as they are in the Namespace inps object already.
3. move the inputDir and outputDir abspath from main() to cmdLineParse()
4. basic formating adjustment
LT1AB
Zhang Yunjun 2019-03-09 17:43:19 -05:00
parent 5e1ca080eb
commit 8673550293
3 changed files with 82 additions and 44 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
*.pyc *.pyc
*~ *~
*.swp *.swp
*.DS_Store
__pycache__ __pycache__
.sconf_temp .sconf_temp
.sconsign.dblite .sconsign.dblite

View File

@ -1,25 +1,32 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# David Bekaert # David Bekaert
import os import os
import glob import glob
import argparse import argparse
from uncompressFile import uncompressfile
import shutil import shutil
import tarfile
import zipfile
from uncompressFile import uncompressfile
def createParser(): def createParser():
''' '''
Create command line parser. Create command line parser.
''' '''
parser = argparse.ArgumentParser(description='Prepare ALOS raw processing (unzip/untar files, organize in date folders, generate script to unpack into isce formats).') parser = argparse.ArgumentParser(description='Prepare ALOS raw processing (unzip/untar files, '
parser.add_argument('-i', '--input', dest='input', type=str, required=True, 'organize in date folders, generate script to unpack into isce formats).')
parser.add_argument('-i', '--input', dest='inputDir', type=str, required=True,
help='directory with the raw data') help='directory with the raw data')
parser.add_argument('-rmfile', '--rmfile', dest='rmfile',action='store_true', default=False, parser.add_argument('-rmfile', '--rmfile', dest='rmfile',action='store_true', default=False,
help='Optional: remove zip/tar/compressed files after unpacking into date structure (default is to keep in archive fo lder)') help='Optional: remove zip/tar/compressed files after unpacking into date structure '
parser.add_argument('-o', '--output', dest='output', type=str, required=False, '(default is to keep in archive fo lder)')
parser.add_argument('-o', '--output', dest='outputDir', type=str, required=False,
help='output directory where data needs to be unpacked into isce format (for script generation).') help='output directory where data needs to be unpacked into isce format (for script generation).')
parser.add_argument('-t', '--text_cmd', dest='text_cmd', type=str, default='source ~/.bash_profile;' parser.add_argument('-t', '--text_cmd', dest='text_cmd', type=str, default='source ~/.bash_profile;',
, help='text command to be added to the beginning of each line of the run files. Default: source ~/.bash_profile;') help='text command to be added to the beginning of each line of the run files. Default: source ~/.bash_profile;')
return parser return parser
@ -29,7 +36,15 @@ def cmdLineParse(iargs=None):
''' '''
parser = createParser() parser = createParser()
return parser.parse_args(args = iargs) inps = parser.parse_args(args = iargs)
# parsing required inputs
inps.inputDir = os.path.abspath(inps.inputDir)
# parsing optional inputs
if inps.outputDir:
inps.outputDir = os.path.abspath(inps.outputDir)
return inps
def get_Date(ALOSfolder): def get_Date(ALOSfolder):
@ -56,46 +71,60 @@ def get_Date(ALOSfolder):
acquisitionDate = 'FAIL' acquisitionDate = 'FAIL'
return successflag, acquisitionDate return successflag, acquisitionDate
def get_ALOS_ALP_name(infile):
"""Get the ALPSRP075780620 name from compress file in various format."""
outname = None
fbase = os.path.basename(infile)
if fbase.startswith("ALP"):
outname = fbase.split("-")[0]
else:
fext = os.path.splitext(infile)[1]
if fext in ['.tar', '.gz']:
with tarfile.open(infile, 'r') as tar:
file_list = tar.getnames()
elif fext in ['.zip']:
with zipfile.ZipFile(infile, 'r') as z:
file_list = z.namelist()
else:
raise ValueError('unrecognized file extension: {}'.format(fext))
led_file = [i for i in file_list if 'LED' in i][0]
led_file = os.path.basename(led_file)
outname = [i for i in led_file.split("-") if 'ALP' in i][0]
return outname
def main(iargs=None): def main(iargs=None):
''' '''
The main driver. The main driver.
''' '''
inps = cmdLineParse(iargs) inps = cmdLineParse(iargs)
# parsing required inputs
inputDir = os.path.abspath(inps.input)
# parsing optional inputs
if inps.output:
outputDir = os.path.abspath(inps.output)
else:
outputDir = None
rmfile = inps.rmfile
# filename of the runfile # filename of the runfile
run_unPack = 'run_unPackALOS' run_unPack = 'run_unPackALOS'
# loop over the different folder, ALOS zip/tar files and unzip them, make the names consistent # loop over the different folder, ALOS zip/tar files and unzip them, make the names consistent
ALOS_extensions = (os.path.join(inputDir, 'ALP*.zip'),os.path.join(inputDir, 'ALP*.tar'),os.path.join(inputDir, 'ALP*.gz')) ALOS_extensions = (os.path.join(inps.inputDir, '*.zip'),
os.path.join(inps.inputDir, '*.tar'),
os.path.join(inps.inputDir, '*.gz'))
for ALOS_extension in ALOS_extensions: for ALOS_extension in ALOS_extensions:
# loop over zip/tar files
ALOS_filesfolders = glob.glob(ALOS_extension) ALOS_filesfolders = glob.glob(ALOS_extension)
for ALOS_infilefolder in ALOS_filesfolders: for ALOS_infilefolder in ALOS_filesfolders:
## the path to the folder/zip ## the path to the folder/zip
workdir = os.path.dirname(ALOS_infilefolder) workdir = os.path.dirname(ALOS_infilefolder)
## get the output name folder without any extensions ## get the output name folder without any extensions
temp = os.path.basename(ALOS_infilefolder) ALOS_outfolder = get_ALOS_ALP_name(ALOS_infilefolder)
# trim the extensions and keep only very first part
parts = temp.split(".")
parts = parts[0].split('-')
ALOS_outfolder = parts[0]
# add the path back in # add the path back in
ALOS_outfolder = os.path.join(workdir,ALOS_outfolder) ALOS_outfolder = os.path.join(workdir, ALOS_outfolder)
# loop over two cases (either file or folder): # loop over two cases (either file or folder):
### this is a file, try to unzip/untar it ### this is a file, try to unzip/untar it
if os.path.isfile(ALOS_infilefolder): if os.path.isfile(ALOS_infilefolder):
# unzip the file in the outfolder # unzip the file in the outfolder
successflag_unzip = uncompressfile(ALOS_infilefolder,ALOS_outfolder) successflag_unzip = uncompressfile(ALOS_infilefolder, ALOS_outfolder)
# put failed files in a seperate directory # put failed files in a seperate directory
if not successflag_unzip: if not successflag_unzip:
@ -104,7 +133,7 @@ def main(iargs=None):
os.rename(ALOS_infilefolder,os.path.join(workdir,'FAILED_FILES','.')) os.rename(ALOS_infilefolder,os.path.join(workdir,'FAILED_FILES','.'))
else: else:
# check if file needs to be removed or put in archive folder # check if file needs to be removed or put in archive folder
if rmfile: if inps.rmfile:
os.remove(ALOS_infilefolder) os.remove(ALOS_infilefolder)
print('Deleting: ' + ALOS_infilefolder) print('Deleting: ' + ALOS_infilefolder)
else: else:
@ -116,9 +145,10 @@ def main(iargs=None):
# loop over the different ALOS folders and make sure the folder names are consistent. # loop over the different ALOS folders and make sure the folder names are consistent.
# this step is not needed unless the user has manually unzipped data before. # this step is not needed unless the user has manually unzipped data before.
ALOS_folders = glob.glob(os.path.join(inputDir, 'ALP*')) ALOS_folders = glob.glob(os.path.join(inps.inputDir, 'ALP*'))
for ALOS_folder in ALOS_folders: for ALOS_folder in ALOS_folders:
# in case the user has already unzipped some files, make sure they are unzipped similar like the uncompressfile code # in case the user has already unzipped some files
# make sure they are unzipped similar like the uncompressfile code
temp = os.path.basename(ALOS_folder) temp = os.path.basename(ALOS_folder)
parts = temp.split(".") parts = temp.split(".")
parts = parts[0].split('-') parts = parts[0].split('-')
@ -134,7 +164,7 @@ def main(iargs=None):
# loop over the different ALOS folders and organize in date folders # loop over the different ALOS folders and organize in date folders
ALOS_folders = glob.glob(os.path.join(inputDir, 'ALP*')) ALOS_folders = glob.glob(os.path.join(inps.inputDir, 'ALP*'))
for ALOS_folder in ALOS_folders: for ALOS_folder in ALOS_folders:
# get the date # get the date
successflag, imgDate = get_Date(ALOS_folder) successflag, imgDate = get_Date(ALOS_folder)
@ -160,14 +190,14 @@ def main(iargs=None):
# now generate the unpacking script for all the date dirs # now generate the unpacking script for all the date dirs
dateDirs = glob.glob(os.path.join(inputDir,'2*')) dateDirs = glob.glob(os.path.join(inps.inputDir,'2*'))
if outputDir is not None: if inps.outputDir is not None:
f = open(run_unPack,'w') f = open(run_unPack,'w')
for dataDir in dateDirs: for dataDir in dateDirs:
AlosFiles = glob.glob(os.path.join(dataDir, 'ALP*')) AlosFiles = glob.glob(os.path.join(dataDir, 'ALP*'))
if len(AlosFiles)>0: if len(AlosFiles)>0:
acquisitionDate = os.path.basename(dataDir) acquisitionDate = os.path.basename(dataDir)
slcDir = os.path.join(outputDir, acquisitionDate) slcDir = os.path.join(inps.outputDir, acquisitionDate)
if not os.path.exists(slcDir): if not os.path.exists(slcDir):
os.makedirs(slcDir) os.makedirs(slcDir)
cmd = 'unpackFrame_ALOS_raw.py -i ' + os.path.abspath(dataDir) + ' -o ' + slcDir cmd = 'unpackFrame_ALOS_raw.py -i ' + os.path.abspath(dataDir) + ' -o ' + slcDir
@ -179,9 +209,9 @@ def main(iargs=None):
print (cmd) print (cmd)
f.write(inps.text_cmd + cmd+'\n') f.write(inps.text_cmd + cmd+'\n')
f.close() f.close()
return
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -2,12 +2,14 @@
# David Bekaert # David Bekaert
import zipfile
import os import os
import glob import glob
import argparse import argparse
import tarfile
import shutil import shutil
import tarfile
import zipfile
def createParser(): def createParser():
''' '''
@ -50,6 +52,8 @@ def main(iargs=None):
print('Done') print('Done')
elif completeFlag == False: elif completeFlag == False:
print('Failed') print('Failed')
return
def uncompressfile(inputFile,outputDir): def uncompressfile(inputFile,outputDir):
@ -105,7 +109,7 @@ def uncompressfile(inputFile,outputDir):
# Check if the data is unpacked in its own folder # Check if the data is unpacked in its own folder
folderfiles = glob.glob(os.path.join(outputDir,'*')) folderfiles = glob.glob(os.path.join(outputDir,'*'))
if len(folderfiles)==1: while len(folderfiles)==1:
# get the sub-folder name only # get the sub-folder name only
tempdir = os.path.basename(folderfiles[0]) tempdir = os.path.basename(folderfiles[0])
if os.path.isdir(folderfiles[0]): if os.path.isdir(folderfiles[0]):
@ -114,7 +118,9 @@ def uncompressfile(inputFile,outputDir):
os.rename(folderfiles[0],tempdir2) os.rename(folderfiles[0],tempdir2)
os.rmdir(outputDir) os.rmdir(outputDir)
os.rename(tempdir2,outputDir) os.rename(tempdir2,outputDir)
folderfiles = glob.glob(os.path.join(outputDir,'*'))
return completeFlag return completeFlag
elif extension == '.tar' or extension == '.gz': elif extension == '.tar' or extension == '.gz':
TAR = tarfile.open(inputFile) TAR = tarfile.open(inputFile)
@ -124,17 +130,18 @@ def uncompressfile(inputFile,outputDir):
TAR.close() TAR.close()
completeFlag = True completeFlag = True
# Check if the data is unpacked in its own folder # Check if the data is unpacked in its own folder or its sub-folders
folderfiles = glob.glob(os.path.join(outputDir,'*')) folderfiles = glob.glob(os.path.join(outputDir,'*'))
if len(folderfiles)==1: while len(folderfiles) == 1:
# get the sub-folder name only # get the sub-folder name only
tempdir = os.path.basename(folderfiles[0]) tempdir = os.path.basename(folderfiles[0])
if os.path.isdir(folderfiles[0]): if os.path.isdir(folderfiles[0]):
# it seems there is a subfolder, will copy the content in the parent # it seems there is a subfolder, will copy the content in the parent
tempdir2=os.path.join(workdir,tempdir + '.temp') tempdir2 = os.path.join(workdir, tempdir + '.temp')
os.rename(folderfiles[0],tempdir2) os.rename(folderfiles[0],tempdir2)
os.rmdir(outputDir) os.rmdir(outputDir)
os.rename(tempdir2,outputDir) os.rename(tempdir2,outputDir)
folderfiles = glob.glob(os.path.join(outputDir,'*'))
return completeFlag return completeFlag
except: except:
print('Tar file seems to be corrupted, abord...') print('Tar file seems to be corrupted, abord...')