Extending Many Variables To Cover The Same Time Domain ¶

In this tutorial we create two variables over two different but overlapping time domains. We will grow both variables to span the same (union) domain, filling the added times with missing data

The CDAT software was developed by LLNL. This tutorial was written by Charles Doutriaux. This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344.

Download the Jupyter Notebook

import cdms2
import numpy
import MV2
import cdtime
import cdutil

def createTimeAxis(start, end):
    """ Create a monthly time axis going from a start date to another"""
    if isinstance(start, int):
        start = cdtime.comptime(start)
    if isinstance(end, int):
        end = cdtime.comptime(end, 12, 31)
    # Figure out how many month are there
    units = "months since {}".format(start)
    n = end.torelative(units).value + 1
    # Create time axis
    time = cdms2.createAxis(numpy.arange(n))
    time.id = "time"
    time.units = units
    time.designateTime()
    cdutil.setTimeBoundsMonthly(time)
    return time

def createData(start, end, shape=()):
    """ Create an array of random monthly data going from start to end, you can also pass shape for additional data"""
    time = createTimeAxis(start, end)
    n = len(time)
    # Add time at beginning
    shape = (n,) + shape
    data = MV2.array(numpy.random.randn(*shape))
    data.setAxis(0,time)
    data.id = "data"
    return data

data1 = createData(1989, 2010)
data2 = createData(2000, 2017)

Function Create "Grown" Data ¶

Back to Top

def missingMonths(time, start, end):
    """ given a time axis and a staert and end date, returns how mny months are missing before and after the time axis"""
    # Before the time axis starts
    if time[0].cmp(start)<=0:
        before = 0
    else:
        units = "months since {}".format(start)
        before = time[0].torelative(units).value
        
    # After the time axis ends
    if time[-1].cmp(end)>=0:
        end = 0
    else:
        units = "months since {}".format(time[-1])
        end = end.torelative(units).value
    return int(before), int(end)

def grow(data, start, end):
    """ Given an array and a start and end date, grows the array to fill the full time range """
    order = data.getOrder(ids=True)
    data = data(order=('t...'))
    tc = data.getTime().asComponentTime()
    b, e = missingMonths(tc, start, end)
    # Prepare the new data
    sh = list(data.shape)
    sh[0] = sh[0] + b + e
    new = MV2.ones(sh)
    new = MV2.masked_greater(new,0.)  # mask everywhere
    if e != 0:
        new[b:-e] = data[:]
    else:
        new[b:] = data[:]
    new_time = cdms2.createAxis(numpy.arange(sh[0]))
    new_time.units= "months since {}".format(start)
    new_time.id = "time"
    new_time.designateTime()
    cdutil.setAxisTimeBoundsMonthly(new_time)
    new.setAxis(0,new_time)
    # set the old axes
    for i, axis in enumerate(data.getAxisList()[1:]):
        new.setAxis(i+1,ax)
        
    new.id = data.id
    return new(order=order)
    
        
def growDatasets(*arrays):
    """ Given N cdms2 transient variables, grow them both to start and end at the same time, filling the rest with missing values"""
    
    start = None
    end = None
    for data in arrays:
        # Get time axis (as component time)
        tc = data.getTime().asComponentTime()
    
        # figure out which data start first and end last
        if start is None or tc[0].cmp(start) <=0:  # data starts first
            start = tc[0]
        if end is None or tc[-1].cmp(end) >=0:  # data ends last
            end = tc[-1]
        
    # Figure out how many months are needed
    for data in arrays:
        yield grow(data, start, end)

Use the function¶

Back to Top

data1, data2 = growDatasets(data1, data2)

print(data1.shape, data1[0], data1[-1], data2[0], data2[-1])

(348,) 0.13940522192757862 -- -- -0.7736267051786273

/software/anaconda53/envs/cdms2/lib/python3.7/site-packages/numpy/ma/core.py:3174: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  dout = self.data[indx]
/software/anaconda53/envs/cdms2/lib/python3.7/site-packages/numpy/ma/core.py:3206: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  mout = _mask[indx]

Extending Many Variables To Cover The Same Time Domain ¶

Table of Contents

Creating the Variables ¶

Function Create "Grown" Data ¶

Use the function¶

Extending Many Variables To Cover The Same Time Domain ¶

Table of Contents

Creating the Variables¶

Function Create "Grown" Data¶

Use the function¶

Creating the Variables ¶

Function Create "Grown" Data ¶