Measuring the water level of a Theewaterskloof Dam in South Africa

Natebook showcases an example of Earth observation processing chain that determines water levels of any water body (dam, reservoir, lake, …) from satellite imagery. The entire processing chain is performed using the eo-learn package. The user simply needs to provide a polygon with water body’s nominal water extent.

%reload_ext autoreload
%autoreload 2
%matplotlib inline


# For manipulating geo-spatial vector dataset (polygons of nominal water extent)
import geopandas as gpd

# import matplotlib
import matplotlib.pyplot as plt

# The golden standard: numpy and matplotlib
import numpy as np

# Loading polygon of nominal water extent
import shapely.wkt

# Image manipulations
# Our water detector is going to be based on a simple threshold
# of Normalised Difference Water Index (NDWI) grayscale image
from skimage.filters import threshold_otsu

# sentinelhub-py package
from sentinelhub import CRS, BBox, DataCollection

# eo-learn core building blocks
from eolearn.core import EOTask, EOWorkflow, FeatureType, OutputTask, linearly_connect_tasks

# filtering of scenes
from eolearn.features import NormalizedDifferenceIndexTask, SimpleFilterTask

# burning the vectorised polygon to raster
from eolearn.geometry import VectorToRasterTask
from import SentinelHubInputTask

Water level extraction EOWorkflow

Our basic logic of the example workflow is:

  1. Download all available Sentinel-2 sattelite imagery of Theewaterskloof Dam from beginning of 2016 and today

    • We want to calculate NDWI and also have a true color visualization of the area. We need the following bands:

      • B02, B03, B04 for TRUE_COLOR for visualisations

      • B03, B08 for NDWI calculation

      • CLM (provided by Sentinel Hub) for cloud masking

  2. Clouds are very often obscuring the view of the ground. In order to correctly determine the water level of the dam all images with clouds need to be filtered out. We will use the cloud masks provided by Sentinel Hub to filter clouds without the need for time consuming processing locally.

  3. Apply adaptive thresholding to NDWI grayscale images

  4. Extract water level from a comparison of measured water extent with the nominal one

Each step in the above overview of the workflow is accomplished by adding an EOTask to the EOWorkflow

Load the Polygon of nominal water extent and define a BBox

The BBox defines an area of interest and will be used to create an EOPatch.

# The polygon of the dam is written in wkt format and WGS84 coordinate reference system
with open("theewaterskloof_dam_nominal.wkt", "r") as f:
    dam_wkt =

dam_nominal = shapely.wkt.loads(dam_wkt)

# inflate the BBOX
inflate_bbox = 0.1
minx, miny, maxx, maxy = dam_nominal.bounds

delx = maxx - minx
dely = maxy - miny
minx = minx - delx * inflate_bbox
maxx = maxx + delx * inflate_bbox
miny = miny - dely * inflate_bbox
maxy = maxy + dely * inflate_bbox

dam_bbox = BBox([minx, miny, maxx, maxy], crs=CRS.WGS84)

dam_bbox.geometry - dam_nominal

Step 1: Intialize (and implement workflow specific) EOTasks

Create an EOPatch and add all EO features (satellite imagery data)

download_task = SentinelHubInputTask(
    bands_feature=(FeatureType.DATA, "BANDS"),
    bands=["B02", "B03", "B04", "B08"],
    additional_data=[(FeatureType.MASK, "dataMask", "IS_DATA"), (FeatureType.MASK, "CLM")],

calculate_ndwi = NormalizedDifferenceIndexTask((FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDWI"), (1, 3))

Burn in the nominal water extent

The VectorToRaster task expects the vectorised dataset in geopandas dataframe.

dam_gdf = gpd.GeoDataFrame(crs=CRS.WGS84.pyproj_crs(), geometry=[dam_nominal])
add_nominal_water = VectorToRasterTask(
    raster_shape=(FeatureType.MASK, "IS_DATA"),

The cloud mask is already provided by Sentinel Hub, we use it to calulcate the valid data mask.

Define a VALID_DATA layer: pixel has to contain data and should be classified as clear sky by the cloud detector (CLM equals 0)

class AddValidDataMaskTask(EOTask):
    def execute(self, eopatch):
        is_data_mask = eopatch[FeatureType.MASK, "IS_DATA"].astype(bool)
        cloud_mask = ~eopatch[FeatureType.MASK, "CLM"].astype(bool)
        eopatch[FeatureType.MASK, "VALID_DATA"] = np.logical_and(is_data_mask, cloud_mask)
        return eopatch

add_valid_mask = AddValidDataMaskTask()

Calculate fraction of valid pixels per frame and store it as SCALAR feature

def calculate_coverage(array):
    return 1.0 - np.count_nonzero(array) / np.size(array)

class AddValidDataCoverageTask(EOTask):
    def execute(self, eopatch):
        valid_data = eopatch[FeatureType.MASK, "VALID_DATA"]
        time, height, width, channels = valid_data.shape

        coverage = np.apply_along_axis(calculate_coverage, 1, valid_data.reshape((time, height * width * channels)))

        eopatch[FeatureType.SCALAR, "COVERAGE"] = coverage[:, np.newaxis]
        return eopatch

add_coverage = AddValidDataCoverageTask()

Filter out too cloudy scenes. We filter out all observation which have cloud coverage of more than 5%.

cloud_coverage_threshold = 0.05

class ValidDataCoveragePredicate:
    def __init__(self, threshold):
        self.threshold = threshold

    def __call__(self, array):
        return calculate_coverage(array) < self.threshold

remove_cloudy_scenes = SimpleFilterTask(
    (FeatureType.MASK, "VALID_DATA"), ValidDataCoveragePredicate(cloud_coverage_threshold)

Apply Water Detection

class WaterDetectionTask(EOTask):
    def detect_water(ndwi):
        """Very simple water detector based on Otsu thresholding method of NDWI."""
        otsu_thr = 1.0
        if len(np.unique(ndwi)) > 1:
            ndwi[np.isnan(ndwi)] = -1
            otsu_thr = threshold_otsu(ndwi)

        return ndwi > otsu_thr

    def execute(self, eopatch):
        water_masks = np.asarray([self.detect_water(ndwi[..., 0]) for ndwi in["NDWI"]])

        # we're only interested in the water within the dam borders
        water_masks = water_masks[..., np.newaxis] * eopatch.mask_timeless["NOMINAL_WATER"]

        water_levels = np.asarray(
            [np.count_nonzero(mask) / np.count_nonzero(eopatch.mask_timeless["NOMINAL_WATER"]) for mask in water_masks]

        eopatch[FeatureType.MASK, "WATER_MASK"] = water_masks
        eopatch[FeatureType.SCALAR, "WATER_LEVEL"] = water_levels[..., np.newaxis]

        return eopatch

water_detection = WaterDetectionTask()

Step 2: Define the EOWorkflow

# To access the final data we use the OutputTask (alternatively we could save the eopatch)
output_task = OutputTask("final_eopatch")

workflow_nodes = linearly_connect_tasks(
workflow = EOWorkflow(workflow_nodes)

Step 3: Run the workflow

Process all Sentinel-2 acquisitions from beginning of 2016 and until beginning of June 2020.

time_interval = ["2017-01-01", "2020-06-01"]
# The download task requires additional arguments at execution. These are linked to the node the task is in.
download_node = workflow_nodes[0]

result = workflow.execute(
        download_node: {"bbox": dam_bbox, "time_interval": time_interval},

Print content of eopatch at the end of the workflow execution

patch = result.outputs["final_eopatch"]
    BANDS: numpy.ndarray(shape=(98, 632, 919, 4), dtype=float32)
    NDWI: numpy.ndarray(shape=(98, 632, 919, 1), dtype=float32)
    CLM: numpy.ndarray(shape=(98, 632, 919, 1), dtype=uint8)
    IS_DATA: numpy.ndarray(shape=(98, 632, 919, 1), dtype=bool)
    VALID_DATA: numpy.ndarray(shape=(98, 632, 919, 1), dtype=bool)
    WATER_MASK: numpy.ndarray(shape=(98, 632, 919, 1), dtype=uint8)
    COVERAGE: numpy.ndarray(shape=(98, 1), dtype=float64)
    WATER_LEVEL: numpy.ndarray(shape=(98, 1), dtype=float64)
    NOMINAL_WATER: numpy.ndarray(shape=(632, 919, 1), dtype=uint8)
    maxcc: 0.5
    size_x: 919
    size_y: 632
    time_difference: 1.0
    time_interval: ('2017-01-01T00:00:00', '2020-06-01T23:59:59')
  bbox=BBox(((19.10818927, -34.08851246), (19.30962163, -33.977424140000004)), crs=CRS('4326'))
  timestamps=[datetime.datetime(2017, 1, 1, 8, 45, 43), ..., datetime.datetime(2020, 5, 30, 8, 49, 51)], length=98

Plot results

from skimage.filters import sobel
from skimage.morphology import closing, disk
def plot_rgb_w_water(eopatch, idx):
    ratio = np.abs(eopatch.bbox.max_x - eopatch.bbox.min_x) / np.abs(eopatch.bbox.max_y - eopatch.bbox.min_y)
    fig, ax = plt.subplots(figsize=(ratio * 10, 10))

    ax.imshow(np.clip(2.5 *["BANDS"][..., [2, 1, 0]][idx], 0, 1))

    observed = closing(eopatch.mask["WATER_MASK"][idx, ..., 0], disk(1))
    nominal = sobel(eopatch.mask_timeless["NOMINAL_WATER"][..., 0])
    observed = sobel(observed)
    nominal =, nominal)
    observed =, observed)

plot_rgb_w_water(patch, 0)
plot_rgb_w_water(patch, -1)
def plot_water_levels(eopatch, max_coverage=1.0):
    fig, ax = plt.subplots(figsize=(20, 7))

    dates = np.asarray(eopatch.timestamps)
        dates[eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
        eopatch.scalar["WATER_LEVEL"][eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
        dates[eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
        eopatch.scalar["COVERAGE"][eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
    ax.set_ylim(0.0, 1.1)
    ax.set_ylabel("Water level")
    ax.set_title("Theewaterskloof Dam Water Levels")
    return ax
ax = plot_water_levels(patch, 1.0)