Accessing Datasets under an Access Control List (ACL)¶

NASA Earthdata API Client 🌍¶

Note: Before we can use earthaccess we need an account with NASA EDL

In [1]:

Copied!

from earthaccess import Auth, DataCollections, DataGranules, Store

auth = Auth()
from earthaccess import Auth, DataCollections, DataGranules, Store

auth = Auth()

Auth()¶

earthaccess's Auth class provides 3 different strategies to authenticate ourselves with NASA EDL.

netrc: Do we have a .netrc file with our EDL credentials? if so, we can use it with earthaccess. If we don't have it and want to create one we can, earthaccess allows users to type their credentials and persist them into a .netrc file.
environment: If we have our EDL credentials as environment variables
- EARTHDATA_USERNAME
- EARTHDATA_PASSWORD
interactive: We will be asked for our EDL credentials with optional persistence to .netrc

To persist our credentials to a .netrc file we have to do the following:

auth.login(strategy="interactive", persist=True)

In this notebook we'll use the environment method followed by the netrc strategy. You can of course use the interactive strategy if you don't have a .netrc file.

In [2]:

Copied!





auth.login(strategy="environment")
# are we authenticated?
if not auth.authenticated:
    auth.login(strategy="netrc")
auth.login(strategy="environment")
# are we authenticated?
if not auth.authenticated:
    auth.login(strategy="netrc")

Authentication with Earthdata Login failed with:
{"error":"invalid_credentials","error_description":"Invalid user credentials"}
NoneType: None

---------------------------------------------------------------------------
LoginAttemptFailure                       Traceback (most recent call last)
Cell In[2], line 1
----> 1 auth.login(strategy="environment")
      2 # are we authenticated?
      3 if not auth.authenticated:

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/auth.py:152, in Auth.login(self, strategy, persist, system)
    150     self._netrc()
    151 elif strategy == "environment":
--> 152     self._environment()
    154 return self

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/auth.py:305, in Auth._environment(self)
    302     raise LoginStrategyUnavailable(msg)
    304 logger.debug("Using environment variables for EDL")
--> 305 return self._get_credentials(username, password, token)

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/auth.py:324, in Auth._get_credentials(self, username, password, user_token)
    322     msg = f"Authentication with Earthdata Login failed with:\n{token_resp.text}"
    323     logger.exception(msg)
--> 324     raise LoginAttemptFailure(msg)
    326 logger.info("You're now authenticated with NASA Earthdata Login")
    328 token = token_resp.json()

LoginAttemptFailure: Authentication with Earthdata Login failed with:
{"error":"invalid_credentials","error_description":"Invalid user credentials"}

Querying for restricted datasets¶

The DataCollection client can query CMR for any collection (dataset) using all of CMR's Query parameters and has built-in functions to extract useful information from the response.

auth.refresh_tokens()

If we belong to an early adopter group within NASA we can pass the Auth object to the other classes when we instantiate them.

# An anonymous query to CMR
Query = DataCollections().keyword('elevation')
# An authenticated query to CMR
Query = DataCollections(auth).keyword('elevation')

and it's the same with DataGranules

# An anonymous query to CMR
Query = DataGranules().keyword('elevation')
# An authenticated query to CMR
Query = DataGranules(auth).keyword('elevation')

Note: Some collections under an access control list are flagged by CMR and won't count when asking about results with hits().

In [3]:

Copied!





# The first step is to create a DataCollections query
Query = DataCollections()

# Use chain methods to customize our query
Query.short_name("ATL06").version("006")

print(f"Collections found: {Query.hits()}")

# filtering what UMM fields to print, to see the full record we omit the fields filters
# meta is always included as
collections = Query.fields(["ShortName", "Version"]).get(5)
# Inspect some results printing just the ShortName and Abstract
collections
# The first step is to create a DataCollections query
Query = DataCollections()

# Use chain methods to customize our query
Query.short_name("ATL06").version("006")

print(f"Collections found: {Query.hits()}")

# filtering what UMM fields to print, to see the full record we omit the fields filters
# meta is always included as
collections = Query.fields(["ShortName", "Version"]).get(5)
# Inspect some results printing just the ShortName and Abstract
collections

Collections found: 1

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/core/formatters.py:770, in PlainTextFormatter.__call__(self, obj)
    763 stream = StringIO()
    764 printer = pretty.RepresentationPrinter(stream, self.verbose,
    765     self.max_width, self.newline,
    766     max_seq_length=self.max_seq_length,
    767     singleton_pprinters=self.singleton_printers,
    768     type_pprinters=self.type_printers,
    769     deferred_pprinters=self.deferred_printers)
--> 770 printer.pretty(obj)
    771 printer.flush()
    772 return stream.getvalue()

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:386, in RepresentationPrinter.pretty(self, obj)
    383 for cls in _get_mro(obj_class):
    384     if cls in self.type_pprinters:
    385         # printer registered in self.type_pprinters
--> 386         return self.type_pprinters[cls](obj, self, cycle)
    387     else:
    388         # deferred printer
    389         printer = self._in_deferred_types(cls)

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:641, in _seq_pprinter_factory.<locals>.inner(obj, p, cycle)
    639         p.text(',')
    640         p.breakable()
--> 641     p.pretty(x)
    642 if len(obj) == 1 and isinstance(obj, tuple):
    643     # Special case for 1-item tuples.
    644     p.text(',')

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:411, in RepresentationPrinter.pretty(self, obj)
    400                         return meth(obj, self, cycle)
    401                 if (
    402                     cls is not object
    403                     # check if cls defines __repr__
   (...)    409                     and callable(_safe_getattr(cls, "__repr__", None))
    410                 ):
--> 411                     return _repr_pprint(obj, self, cycle)
    413     return _default_pprint(obj, self, cycle)
    414 finally:

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:786, in _repr_pprint(obj, p, cycle)
    784 """A pprint that just redirects to the normal repr function."""
    785 # Find newlines and replace them with p.break_()
--> 786 output = repr(obj)
    787 lines = output.splitlines()
    788 with p.group():

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/results.py:315, in DataCollection.__repr__(self)
    314 def __repr__(self) -> str:
--> 315     return json.dumps(
    316         self.render_dict,
    317         sort_keys=False,
    318         indent=2,
    319         separators=(",", ": "),
    320     )

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/__init__.py:238, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    232 if cls is None:
    233     cls = JSONEncoder
    234 return cls(
    235     skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236     check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237     separators=separators, default=default, sort_keys=sort_keys,
--> 238     **kw).encode(obj)

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:202, in JSONEncoder.encode(self, o)
    200 chunks = self.iterencode(o, _one_shot=True)
    201 if not isinstance(chunks, (list, tuple)):
--> 202     chunks = list(chunks)
    203 return ''.join(chunks)

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:432, in _make_iterencode.<locals>._iterencode(o, _current_indent_level)
    430     yield from _iterencode_list(o, _current_indent_level)
    431 elif isinstance(o, dict):
--> 432     yield from _iterencode_dict(o, _current_indent_level)
    433 else:
    434     if markers is not None:

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:406, in _make_iterencode.<locals>._iterencode_dict(dct, _current_indent_level)
    404         else:
    405             chunks = _iterencode(value, _current_indent_level)
--> 406         yield from chunks
    407 if newline_indent is not None:
    408     _current_indent_level -= 1

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:439, in _make_iterencode.<locals>._iterencode(o, _current_indent_level)
    437         raise ValueError("Circular reference detected")
    438     markers[markerid] = o
--> 439 o = _default(o)
    440 yield from _iterencode(o, _current_indent_level)
    441 if markers is not None:

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:180, in JSONEncoder.default(self, o)
    161 def default(self, o):
    162     """Implement this method in a subclass such that it returns
    163     a serializable object for ``o``, or calls the base implementation
    164     (to raise a ``TypeError``).
   (...)    178 
    179     """
--> 180     raise TypeError(f'Object of type {o.__class__.__name__} '
    181                     f'is not JSON serializable')

TypeError: Object of type set is not JSON serializable

In [4]:

Copied!

if not auth.refresh_tokens():
    print("Something went wrong, we may need to regenerate our tokens manually")
if not auth.refresh_tokens():
    print("Something went wrong, we may need to regenerate our tokens manually")

Something went wrong, we may need to regenerate our tokens manually

/tmp/ipykernel_1622/2137380767.py:1: DeprecationWarning: No replacement, as tokens are now refreshed automatically.
  if not auth.refresh_tokens():

In [5]:

Copied!





Query = DataCollections(auth)

# Use chain methods to customize our query
Query.short_name("ATL06").version("006")

# This will say 1, even though we get 2 back.
print(f"Collections found: {Query.hits()}")

collections = Query.fields(["ShortName", "Version"]).get()
# Inspect some results printing just the ShortName and Abstract
collections
Query = DataCollections(auth)

# Use chain methods to customize our query
Query.short_name("ATL06").version("006")

# This will say 1, even though we get 2 back.
print(f"Collections found: {Query.hits()}")

collections = Query.fields(["ShortName", "Version"]).get()
# Inspect some results printing just the ShortName and Abstract
collections

Collections found: 1

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/core/formatters.py:770, in PlainTextFormatter.__call__(self, obj)
    763 stream = StringIO()
    764 printer = pretty.RepresentationPrinter(stream, self.verbose,
    765     self.max_width, self.newline,
    766     max_seq_length=self.max_seq_length,
    767     singleton_pprinters=self.singleton_printers,
    768     type_pprinters=self.type_printers,
    769     deferred_pprinters=self.deferred_printers)
--> 770 printer.pretty(obj)
    771 printer.flush()
    772 return stream.getvalue()

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:386, in RepresentationPrinter.pretty(self, obj)
    383 for cls in _get_mro(obj_class):
    384     if cls in self.type_pprinters:
    385         # printer registered in self.type_pprinters
--> 386         return self.type_pprinters[cls](obj, self, cycle)
    387     else:
    388         # deferred printer
    389         printer = self._in_deferred_types(cls)

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:641, in _seq_pprinter_factory.<locals>.inner(obj, p, cycle)
    639         p.text(',')
    640         p.breakable()
--> 641     p.pretty(x)
    642 if len(obj) == 1 and isinstance(obj, tuple):
    643     # Special case for 1-item tuples.
    644     p.text(',')

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:411, in RepresentationPrinter.pretty(self, obj)
    400                         return meth(obj, self, cycle)
    401                 if (
    402                     cls is not object
    403                     # check if cls defines __repr__
   (...)    409                     and callable(_safe_getattr(cls, "__repr__", None))
    410                 ):
--> 411                     return _repr_pprint(obj, self, cycle)
    413     return _default_pprint(obj, self, cycle)
    414 finally:

File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1362/lib/python3.12/site-packages/IPython/lib/pretty.py:786, in _repr_pprint(obj, p, cycle)
    784 """A pprint that just redirects to the normal repr function."""
    785 # Find newlines and replace them with p.break_()
--> 786 output = repr(obj)
    787 lines = output.splitlines()
    788 with p.group():

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/results.py:315, in DataCollection.__repr__(self)
    314 def __repr__(self) -> str:
--> 315     return json.dumps(
    316         self.render_dict,
    317         sort_keys=False,
    318         indent=2,
    319         separators=(",", ": "),
    320     )

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/__init__.py:238, in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    232 if cls is None:
    233     cls = JSONEncoder
    234 return cls(
    235     skipkeys=skipkeys, ensure_ascii=ensure_ascii,
    236     check_circular=check_circular, allow_nan=allow_nan, indent=indent,
    237     separators=separators, default=default, sort_keys=sort_keys,
--> 238     **kw).encode(obj)

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:202, in JSONEncoder.encode(self, o)
    200 chunks = self.iterencode(o, _one_shot=True)
    201 if not isinstance(chunks, (list, tuple)):
--> 202     chunks = list(chunks)
    203 return ''.join(chunks)

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:432, in _make_iterencode.<locals>._iterencode(o, _current_indent_level)
    430     yield from _iterencode_list(o, _current_indent_level)
    431 elif isinstance(o, dict):
--> 432     yield from _iterencode_dict(o, _current_indent_level)
    433 else:
    434     if markers is not None:

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:406, in _make_iterencode.<locals>._iterencode_dict(dct, _current_indent_level)
    404         else:
    405             chunks = _iterencode(value, _current_indent_level)
--> 406         yield from chunks
    407 if newline_indent is not None:
    408     _current_indent_level -= 1

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:439, in _make_iterencode.<locals>._iterencode(o, _current_indent_level)
    437         raise ValueError("Circular reference detected")
    438     markers[markerid] = o
--> 439 o = _default(o)
    440 yield from _iterencode(o, _current_indent_level)
    441 if markers is not None:

File ~/.asdf/installs/python/3.12.12/lib/python3.12/json/encoder.py:180, in JSONEncoder.default(self, o)
    161 def default(self, o):
    162     """Implement this method in a subclass such that it returns
    163     a serializable object for ``o``, or calls the base implementation
    164     (to raise a ``TypeError``).
   (...)    178 
    179     """
--> 180     raise TypeError(f'Object of type {o.__class__.__name__} '
    181                     f'is not JSON serializable')

TypeError: Object of type set is not JSON serializable

Oh no! What!? only 1 collection found even though we got 2 results back?!

Interpreting the results¶

The hits() method above will tell you the number of query hits, but only for publicly available data sets. In this case because cloud hosted ICESat-2 data are not yet publicly available, CMR will return “1” hits, if you filtered DataCollections by provider = NSIDC_CPRD you'll get 0 hits. For now we need an alternative method of seeing how many cloud data sets are available at NSIDC. This is only temporary until cloud-hosted ICESat-2 become publicly available. We can create a collections object (we’re going to want one of these soon anyhow) and print the len() of the collections object to see the true number of hits.

Note: Since we cannot rely on hits() we need to be aware that get() may get us too many metadata records depending on the dataset and how broad our query is.

In [6]:

Copied!





Query = (
    DataGranules(auth)
    .concept_id("C2153572614-NSIDC_CPRD")
    .bounding_box(-134.7, 58.9, -133.9, 59.2)
    .temporal("2020-03-01", "2020-03-30")
)

# Unfortunately the hits() methods will behave the same for granule queries
print(f"Granules found with hits(): {Query.hits()}")

cloud_granules = Query.get()

print(f"Actual number found: {len(cloud_granules)}")
Query = (
    DataGranules(auth)
    .concept_id("C2153572614-NSIDC_CPRD")
    .bounding_box(-134.7, 58.9, -133.9, 59.2)
    .temporal("2020-03-01", "2020-03-30")
)

# Unfortunately the hits() methods will behave the same for granule queries
print(f"Granules found with hits(): {Query.hits()}")

cloud_granules = Query.get()

print(f"Actual number found: {len(cloud_granules)}")

Granules found with hits(): 0
Actual number found: 0

In [7]:

Copied!

store = Store(auth)
files = store.get(cloud_granules, "./data/C2153572614-NSIDC_CPRD/")
store = Store(auth)
files = store.get(cloud_granules, "./data/C2153572614-NSIDC_CPRD/")

The current session is not authenticated with NASA

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[7], line 1
----> 1 store = Store(auth)
      2 files = store.get(cloud_granules, "./data/C2153572614-NSIDC_CPRD/")

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/store.py:264, in Store.__init__(self, auth, pre_authorize)
    262     logger.warning("The current session is not authenticated with NASA")
    263     self.auth = None
--> 264 self.in_region = self._running_in_us_west_2()

File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1362/earthaccess/store.py:285, in Store._running_in_us_west_2(self)
    284 def _running_in_us_west_2(self) -> bool:
--> 285     session = self.auth.get_session()
    286     try:
    287         # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
    288         token_ = session.put(
    289             "http://169.254.169.254/latest/api/token",
    290             headers={"X-aws-ec2-metadata-token-ttl-seconds": "21600"},
    291             timeout=1,
    292         )

AttributeError: 'NoneType' object has no attribute 'get_session'

In [ ]: