fix url change

2021-03-09 13:43:13 -06:00 · 2021-03-09 13:43:13 -06:00 · 1a86bca5c4
parent e40f70593f
commit 1a86bca5c4
2 changed files with 48 additions and 105 deletions
--- a/contrib/stack/topsStack/fetchOrbit.py
+++ b/contrib/stack/topsStack/fetchOrbit.py
@ -8,15 +8,15 @@ import argparse
 import datetime
 from html.parser import HTMLParser
-server = 'https://qc.sentinel1.eo.esa.int/'
+server = 'http://aux.sentinel1.eo.esa.int/'
 server2 = 'http://aux.sentinel1.eo.esa.int/'
-orbitMap = [('precise','aux_poeorb'),
+orbitMap = [('precise', 'POEORB/'),
-            ('restituted','aux_resorb')]
+            ('restituted', 'RESORB/')]
 datefmt = "%Y%m%dT%H%M%S"
 queryfmt = "%Y-%m-%d"
-queryfmt2= "%Y/%m/%d/"
+queryfmt2 = "%Y/%m/%d/"
 def cmdLineParse():
    '''
@ -55,47 +55,37 @@ def FileToTimeStamp(safename):
 class MyHTMLParser(HTMLParser):
-    def __init__(self, satName):
+    def __init__(self, satName, url):
        HTMLParser.__init__(self)
        self.fileList = []
        self.pages = 0
        self.in_td = False
        self.in_a = False
-        self.in_ul = False
+        self.in_table = False
        self._url = url
        self.satName = satName
    def handle_starttag(self, tag, attrs):
        if tag == 'td':
            self.in_td = True
-        elif tag == 'a' and self.in_td:
+        elif tag == 'a':
            self.in_a = True
-        elif tag == 'ul':
+            for name, val in attrs:
-            for k,v in attrs:
+                if name == "href":
-                if k == 'class' and v.startswith('pagination'):
+                    if val.startswith("http"):
-                    self.in_ul = True
+                        self._url = val.strip()
        elif tag == 'li' and self.in_ul:
            self.pages += 1
-    def handle_data(self,data):
+    def handle_data(self, data):
        if self.in_td and self.in_a:
-            #if 'S1A_OPER' in data:
+            if self.satName in data:
-            #if 'S1B_OPER' in data:
+                self.fileList.append((self._url, data.strip()))
            if satName in data:
                self.fileList.append(data.strip())
-    def handle_endtag(self, tag):
+    def handle_tag(self, tag):
        if tag == 'td':
            self.in_td = False
            self.in_a = False
-        elif tag == 'a' and self.in_td:
+        elif tag == 'a':
            self.in_a = False
-        elif tag == 'ul' and self.in_ul:
+            self._url = None
            self.in_ul = False
        elif tag == 'html':
            if self.pages == 0:
                self.pages = 1
            else:
                # decrement page back and page forward list items
                self.pages -= 2
 def download_file(url, outdir='.', session=None):
@ -117,7 +107,7 @@ def download_file(url, outdir='.', session=None):
        success = False
    if success:
-        with open(path,'wb') as f:
+        with open(path, 'wb') as f:
            for chunk in request.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
@ -156,75 +146,28 @@ if __name__ == '__main__':
    for spec in orbitMap:
        oType = spec[0]
-        if oType == 'precise':
+        url = server + spec[1] + str(fileTS.year).zfill(2) + '/' + str(fileTS.month).zfill(2) + \
-            delta =datetime.timedelta(days=2) 
+                               '/' + str(fileTS.day).zfill(2) + '/'
        elif oType == 'restituted':
            delta = datetime.timedelta(days=1)
        timebef = (fileTS - delta).strftime(queryfmt)
        timeaft = (fileTS + delta).strftime(queryfmt)
        url = server + spec[1]
        query = (url + 
                '/?validity_start={0}..{1}&sentinel1__mission={2}'
                .format(timebef, timeaft,satName))
        print(query)
        success = False
        match = None
        try:
-            print('Querying for {0} orbits'.format(oType))
+            r = session.get(url, verify=False)
            r = session.get(query, verify=False)
            r.raise_for_status()
-            parser = MyHTMLParser(satName)
+
            parser = MyHTMLParser(satName, url)
            parser.feed(r.text)
-            print("Found {} pages".format(parser.pages))
+            for resulturl, result in parser.fileList:
-
+                match = os.path.join(resulturl, result)
            # get results from first page
            results = parser.fileList
            # page through and get more results
            for page in range(2, parser.pages + 1):
                page_parser = MyHTMLParser(satName)
                page_query = "{}&page={}".format(query, page)
                print(page_query)
                r = session.get(page_query, verify=False)
                r.raise_for_status()
                page_parser.feed(r.text)
                results.extend(page_parser.fileList)
            # run through all results and pull the orbit files
            if results:
                for result in results:
                    tbef, taft, mission = fileToRange(os.path.basename(result))
                    if (tbef <= fileTSStart) and (taft >= fileTS):
                        datestr2 = FileToTimeStamp(result)[0].strftime(queryfmt2) 
                        match = (server2 + spec[1].replace('aux_', '').upper() +
                                 '/' +datestr2+ result + '.EOF')
                        break
                if match is not None:
                    success = True
-        except Exception as e:
+        except:
            print('Exception - something went wrong with the web scraper:')
            print('Exception: {}'.format(e))
            print('Continuing process')
            pass
-        if match is None:
+        if match is not None:
-            print('Failed to find {0} orbits for Time {1}'.format(oType, fileTS))
+            res = download_file(match, inps.outdir, session)
        if success:
            break
    if match:
        res = download_file(match, inps.outdir, session=session)
            if res is False:
                print('Failed to download URL: ', match)
-
+        else:
-    session.close()
+            print('Failed to find {1} orbits for tref {0}'.format(fileTS, satName))
--- a/examples/input_files/reference_TOPS_SENTINEL1.xml
+++ b/examples/input_files/reference_TOPS_SENTINEL1.xml
@ -24,7 +24,7 @@ All file paths in the input files should either be absolute paths or relative to
    For more details on what these corrections are and where to get the aux files, see: https://sentinel.esa.int/documents/247904/1653440/Sentinel-1-IPF_EAP_Phase_correction
    Aux data can be accessed here: 
-    https://qc.sentinel1.eo.esa.int/aux_cal/?instrument_configuration_id=3
+    https://aux.sentinel1.eo.esa.int/AUX_CAL/
    Note 3: Precise orbits
@ -33,10 +33,10 @@ All file paths in the input files should either be absolute paths or relative to
    Use of precise / restituted orbits are highly recommend for precise processing of Sentinel-1A interferograms. You can dump all the orbits in a common folder and ISCE will automatically identify the right orbit file to use with your data.
    Precise orbit data can be accessed here and are typically available 3 weeks after the SLCs are available:
-    https://qc.sentinel1.eo.esa.int/aux_poeorb/
+    https://aux.sentinel1.eo.esa.int/POEORB/
    Restituted orbits can be accessed here and are available at the same time as the SLCs:
-    https://qc.sentinel1.eo.esa.int/aux_resorb/
+    https://aux.sentinel1.eo.esa.int/RESORB/
    Note 3: Multiple slices