Computing monthly averages from a large dataset poses challenges - but GES DISC has several tools to help make this as easy (and computationally efficient) as possible. I would recommend using the
OPeNDAP in the Cloud Service, which allows you to subset a dataset on a remote server, so you avoid having to download and open all the granules (files) in the collection (GLDAS_CLSM025_DA1_D_2.2).
The following Python code will resample GLDAS data from daily to monthly. You may want to modify the lat/lon bounds to hone in on New Mexico.
Code: Select all
import xarray as xr
import earthaccess
import getpass
import requests
from pydap.net import create_session
import matplotlib.pyplot as plt
import geopandas
# Define latitude and longitude bounds for area of interest
lat_min, lat_max = 24.5, 49.4 # Latitude bounds
lon_min, lon_max = -124.7, -66.9 # Longitude bounds
# Search for GLDAS granules and obtain the corresponding Cloud OpenDAP URLs
results = earthaccess.search_data(
doi="10.5067/TXBMLX370XX8", # This is the DOI for the GLDAS_CLSM025_DA1_D data collection
temporal=('2023-01-01 00:00', '2023-01-31 23:00'),
bounding_box=(lon_min, lat_min, lon_max, lat_max)
)
# Construct OpenDAP URLs with constraint expresssions for subsetting
opendap_urls = []
for item in results:
for urls in item['umm']['RelatedUrls']: # Iterate over RelatedUrls in each request step
if 'OPENDAP' in urls.get('Description', '').upper(): # Check if 'OPENDAP' is in the Description
# Extract OPeNDAP URL, replace
url = urls['URL'].replace('https', 'dap4')
# Select groundwater storage, lat, lon, and time (these will be subset out of the larger file)
# To view all variables, comment out these two lines
ce = "?dap4.ce=/{}%3B/{}%3B/{}%3B/{}".format("GWS_tavg", "lat", "lon", "time")
url = url + ce
# Add URL to list
opendap_urls.append(url)
# Use netrc file to authenticate
my_session = create_session()
# Load dataset object and metadata, but don't open the values yet
# Remove the session parameter if you are just using a .netrc file to authenticate
ds = xr.open_mfdataset(opendap_urls, engine="pydap", session=my_session)
ds_sub = ds.sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max)) # subset to lat/lon bounds
monthly_groundwater = ds_sub['GWS_tavg'].resample(time='1M').mean() # resample to monthly
Some other links that might be helpful: