fix url change

LT1AB
Sara Mirzaee 2021-03-09 13:43:13 -06:00
parent e40f70593f
commit 1a86bca5c4
2 changed files with 48 additions and 105 deletions

View File

@ -8,15 +8,15 @@ import argparse
import datetime
from html.parser import HTMLParser
server = 'https://qc.sentinel1.eo.esa.int/'
server2 = 'http://aux.sentinel1.eo.esa.int/'
server = 'http://aux.sentinel1.eo.esa.int/'
orbitMap = [('precise','aux_poeorb'),
('restituted','aux_resorb')]
orbitMap = [('precise', 'POEORB/'),
('restituted', 'RESORB/')]
datefmt = "%Y%m%dT%H%M%S"
queryfmt = "%Y-%m-%d"
queryfmt2= "%Y/%m/%d/"
queryfmt2 = "%Y/%m/%d/"
def cmdLineParse():
'''
@ -25,9 +25,9 @@ def cmdLineParse():
parser = argparse.ArgumentParser(description='Fetch orbits corresponding to given SAFE package')
parser.add_argument('-i', '--input', dest='input', type=str, required=True,
help='Path to SAFE package of interest')
help='Path to SAFE package of interest')
parser.add_argument('-o', '--output', dest='outdir', type=str, default='.',
help='Path to output directory')
help='Path to output directory')
return parser.parse_args()
@ -43,9 +43,9 @@ def FileToTimeStamp(safename):
try:
tstamp = datetime.datetime.strptime(fields[-4], datefmt)
sstamp = datetime.datetime.strptime(fields[-5], datefmt)
except:
p = re.compile(r'(?<=_)\d{8}')
dt2 = p.search(safename).group()
except:
p = re.compile(r'(?<=_)\d{8}')
dt2 = p.search(safename).group()
tstamp = datetime.datetime.strptime(dt2, '%Y%m%d')
satName = fields[0]
@ -55,47 +55,37 @@ def FileToTimeStamp(safename):
class MyHTMLParser(HTMLParser):
def __init__(self, satName):
def __init__(self, satName, url):
HTMLParser.__init__(self)
self.fileList = []
self.pages = 0
self.in_td = False
self.in_a = False
self.in_ul = False
self.in_table = False
self._url = url
self.satName = satName
def handle_starttag(self, tag, attrs):
if tag == 'td':
self.in_td = True
elif tag == 'a' and self.in_td:
elif tag == 'a':
self.in_a = True
elif tag == 'ul':
for k,v in attrs:
if k == 'class' and v.startswith('pagination'):
self.in_ul = True
elif tag == 'li' and self.in_ul:
self.pages += 1
for name, val in attrs:
if name == "href":
if val.startswith("http"):
self._url = val.strip()
def handle_data(self,data):
def handle_data(self, data):
if self.in_td and self.in_a:
#if 'S1A_OPER' in data:
#if 'S1B_OPER' in data:
if satName in data:
self.fileList.append(data.strip())
if self.satName in data:
self.fileList.append((self._url, data.strip()))
def handle_endtag(self, tag):
def handle_tag(self, tag):
if tag == 'td':
self.in_td = False
self.in_a = False
elif tag == 'a' and self.in_td:
elif tag == 'a':
self.in_a = False
elif tag == 'ul' and self.in_ul:
self.in_ul = False
elif tag == 'html':
if self.pages == 0:
self.pages = 1
else:
# decrement page back and page forward list items
self.pages -= 2
self._url = None
def download_file(url, outdir='.', session=None):
@ -117,7 +107,7 @@ def download_file(url, outdir='.', session=None):
success = False
if success:
with open(path,'wb') as f:
with open(path, 'wb') as f:
for chunk in request.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
@ -149,82 +139,35 @@ if __name__ == '__main__':
fileTS, satName, fileTSStart = FileToTimeStamp(inps.input)
print('Reference time: ', fileTS)
print('Satellite name: ', satName)
match = None
session = requests.Session()
for spec in orbitMap:
oType = spec[0]
if oType == 'precise':
delta =datetime.timedelta(days=2)
elif oType == 'restituted':
delta = datetime.timedelta(days=1)
url = server + spec[1] + str(fileTS.year).zfill(2) + '/' + str(fileTS.month).zfill(2) + \
'/' + str(fileTS.day).zfill(2) + '/'
timebef = (fileTS - delta).strftime(queryfmt)
timeaft = (fileTS + delta).strftime(queryfmt)
url = server + spec[1]
query = (url +
'/?validity_start={0}..{1}&sentinel1__mission={2}'
.format(timebef, timeaft,satName))
print(query)
success = False
match = None
try:
print('Querying for {0} orbits'.format(oType))
r = session.get(query, verify=False)
r = session.get(url, verify=False)
r.raise_for_status()
parser = MyHTMLParser(satName)
parser = MyHTMLParser(satName, url)
parser.feed(r.text)
print("Found {} pages".format(parser.pages))
# get results from first page
results = parser.fileList
# page through and get more results
for page in range(2, parser.pages + 1):
page_parser = MyHTMLParser(satName)
page_query = "{}&page={}".format(query, page)
print(page_query)
r = session.get(page_query, verify=False)
r.raise_for_status()
page_parser.feed(r.text)
results.extend(page_parser.fileList)
# run through all results and pull the orbit files
if results:
for result in results:
tbef, taft, mission = fileToRange(os.path.basename(result))
if (tbef <= fileTSStart) and (taft >= fileTS):
datestr2 = FileToTimeStamp(result)[0].strftime(queryfmt2)
match = (server2 + spec[1].replace('aux_', '').upper() +
'/' +datestr2+ result + '.EOF')
break
if match is not None:
success = True
except Exception as e:
print('Exception - something went wrong with the web scraper:')
print('Exception: {}'.format(e))
print('Continuing process')
for resulturl, result in parser.fileList:
match = os.path.join(resulturl, result)
if match is not None:
success = True
except:
pass
if match is None:
print('Failed to find {0} orbits for Time {1}'.format(oType, fileTS))
if success:
break
if match:
res = download_file(match, inps.outdir, session=session)
if res is False:
print('Failed to download URL: ', match)
session.close()
if match is not None:
res = download_file(match, inps.outdir, session)
if res is False:
print('Failed to download URL: ', match)
else:
print('Failed to find {1} orbits for tref {0}'.format(fileTS, satName))

View File

@ -24,7 +24,7 @@ All file paths in the input files should either be absolute paths or relative to
For more details on what these corrections are and where to get the aux files, see: https://sentinel.esa.int/documents/247904/1653440/Sentinel-1-IPF_EAP_Phase_correction
Aux data can be accessed here:
https://qc.sentinel1.eo.esa.int/aux_cal/?instrument_configuration_id=3
https://aux.sentinel1.eo.esa.int/AUX_CAL/
Note 3: Precise orbits
@ -33,10 +33,10 @@ All file paths in the input files should either be absolute paths or relative to
Use of precise / restituted orbits are highly recommend for precise processing of Sentinel-1A interferograms. You can dump all the orbits in a common folder and ISCE will automatically identify the right orbit file to use with your data.
Precise orbit data can be accessed here and are typically available 3 weeks after the SLCs are available:
https://qc.sentinel1.eo.esa.int/aux_poeorb/
https://aux.sentinel1.eo.esa.int/POEORB/
Restituted orbits can be accessed here and are available at the same time as the SLCs:
https://qc.sentinel1.eo.esa.int/aux_resorb/
https://aux.sentinel1.eo.esa.int/RESORB/
Note 3: Multiple slices