Measuring the water level of a Theewaterskloof Dam in South Africa
Natebook showcases an example of Earth observation processing chain that determines water levels of any water body (dam, reservoir, lake, …) from satellite imagery. The entire processing chain is performed using the eo-learn
package. The user simply needs to provide a polygon with water body’s nominal water extent.
[1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
Imports
[3]:
# For manipulating geo-spatial vector dataset (polygons of nominal water extent)
import geopandas as gpd
# import matplotlib
import matplotlib.pyplot as plt
# The golden standard: numpy and matplotlib
import numpy as np
# Loading polygon of nominal water extent
import shapely.wkt
# Image manipulations
# Our water detector is going to be based on a simple threshold
# of Normalised Difference Water Index (NDWI) grayscale image
from skimage.filters import threshold_otsu
# sentinelhub-py package
from sentinelhub import CRS, BBox, DataCollection
# eo-learn core building blocks
from eolearn.core import EOTask, EOWorkflow, FeatureType, OutputTask, linearly_connect_tasks
# filtering of scenes
from eolearn.features import NormalizedDifferenceIndexTask, SimpleFilterTask
# burning the vectorised polygon to raster
from eolearn.geometry import VectorToRasterTask
from eolearn.io import SentinelHubInputTask
Water level extraction EOWorkflow
Our basic logic of the example workflow is:
Download all available Sentinel-2 sattelite imagery of Theewaterskloof Dam from beginning of 2016 and today
We want to calculate NDWI and also have a true color visualization of the area. We need the following bands:
B02, B03, B04 for
TRUE_COLOR
for visualisationsB03, B08 for NDWI calculation
CLM (provided by Sentinel Hub) for cloud masking
Clouds are very often obscuring the view of the ground. In order to correctly determine the water level of the dam all images with clouds need to be filtered out. We will use the cloud masks provided by Sentinel Hub to filter clouds without the need for time consuming processing locally.
Apply adaptive thresholding to
NDWI
grayscale imagesExtract water level from a comparison of measured water extent with the nominal one
Each step in the above overview of the workflow is accomplished by adding an EOTask
to the EOWorkflow
Load the Polygon of nominal water extent and define a BBox
The BBox
defines an area of interest and will be used to create an EOPatch.
[4]:
# The polygon of the dam is written in wkt format and WGS84 coordinate reference system
with open("theewaterskloof_dam_nominal.wkt", "r") as f:
dam_wkt = f.read()
dam_nominal = shapely.wkt.loads(dam_wkt)
# inflate the BBOX
inflate_bbox = 0.1
minx, miny, maxx, maxy = dam_nominal.bounds
delx = maxx - minx
dely = maxy - miny
minx = minx - delx * inflate_bbox
maxx = maxx + delx * inflate_bbox
miny = miny - dely * inflate_bbox
maxy = maxy + dely * inflate_bbox
dam_bbox = BBox([minx, miny, maxx, maxy], crs=CRS.WGS84)
dam_bbox.geometry - dam_nominal
[4]:
Step 1: Intialize (and implement workflow specific) EOTasks
Create an EOPatch and add all EO features (satellite imagery data)
[5]:
download_task = SentinelHubInputTask(
data_collection=DataCollection.SENTINEL2_L1C,
bands_feature=(FeatureType.DATA, "BANDS"),
resolution=20,
maxcc=0.5,
bands=["B02", "B03", "B04", "B08"],
additional_data=[(FeatureType.MASK, "dataMask", "IS_DATA"), (FeatureType.MASK, "CLM")],
cache_folder="cached_data",
)
calculate_ndwi = NormalizedDifferenceIndexTask((FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDWI"), (1, 3))
Burn in the nominal water extent
The VectorToRaster
task expects the vectorised dataset in geopandas dataframe.
[6]:
dam_gdf = gpd.GeoDataFrame(crs=CRS.WGS84.pyproj_crs(), geometry=[dam_nominal])
[7]:
dam_gdf.plot();

[8]:
add_nominal_water = VectorToRasterTask(
dam_gdf,
(FeatureType.MASK_TIMELESS, "NOMINAL_WATER"),
values=1,
raster_shape=(FeatureType.MASK, "IS_DATA"),
raster_dtype=np.uint8,
)
The cloud mask is already provided by Sentinel Hub, we use it to calulcate the valid data mask.
Define a VALID_DATA
layer: pixel has to contain data and should be classified as clear sky by the cloud detector (CLM
equals 0)
[9]:
class AddValidDataMaskTask(EOTask):
def execute(self, eopatch):
is_data_mask = eopatch[FeatureType.MASK, "IS_DATA"].astype(bool)
cloud_mask = ~eopatch[FeatureType.MASK, "CLM"].astype(bool)
eopatch[FeatureType.MASK, "VALID_DATA"] = np.logical_and(is_data_mask, cloud_mask)
return eopatch
add_valid_mask = AddValidDataMaskTask()
Calculate fraction of valid pixels per frame and store it as SCALAR
feature
[10]:
def calculate_coverage(array):
return 1.0 - np.count_nonzero(array) / np.size(array)
class AddValidDataCoverageTask(EOTask):
def execute(self, eopatch):
valid_data = eopatch[FeatureType.MASK, "VALID_DATA"]
time, height, width, channels = valid_data.shape
coverage = np.apply_along_axis(calculate_coverage, 1, valid_data.reshape((time, height * width * channels)))
eopatch[FeatureType.SCALAR, "COVERAGE"] = coverage[:, np.newaxis]
return eopatch
add_coverage = AddValidDataCoverageTask()
Filter out too cloudy scenes. We filter out all observation which have cloud coverage of more than 5%.
[11]:
cloud_coverage_threshold = 0.05
class ValidDataCoveragePredicate:
def __init__(self, threshold):
self.threshold = threshold
def __call__(self, array):
return calculate_coverage(array) < self.threshold
remove_cloudy_scenes = SimpleFilterTask(
(FeatureType.MASK, "VALID_DATA"), ValidDataCoveragePredicate(cloud_coverage_threshold)
)
Apply Water Detection
[12]:
class WaterDetectionTask(EOTask):
@staticmethod
def detect_water(ndwi):
"""Very simple water detector based on Otsu thresholding method of NDWI."""
otsu_thr = 1.0
if len(np.unique(ndwi)) > 1:
ndwi[np.isnan(ndwi)] = -1
otsu_thr = threshold_otsu(ndwi)
return ndwi > otsu_thr
def execute(self, eopatch):
water_masks = np.asarray([self.detect_water(ndwi[..., 0]) for ndwi in eopatch.data["NDWI"]])
# we're only interested in the water within the dam borders
water_masks = water_masks[..., np.newaxis] * eopatch.mask_timeless["NOMINAL_WATER"]
water_levels = np.asarray(
[np.count_nonzero(mask) / np.count_nonzero(eopatch.mask_timeless["NOMINAL_WATER"]) for mask in water_masks]
)
eopatch[FeatureType.MASK, "WATER_MASK"] = water_masks
eopatch[FeatureType.SCALAR, "WATER_LEVEL"] = water_levels[..., np.newaxis]
return eopatch
water_detection = WaterDetectionTask()
Step 2: Define the EOWorkflow
[13]:
# To access the final data we use the OutputTask (alternatively we could save the eopatch)
output_task = OutputTask("final_eopatch")
workflow_nodes = linearly_connect_tasks(
download_task,
calculate_ndwi,
add_nominal_water,
add_valid_mask,
add_coverage,
remove_cloudy_scenes,
water_detection,
output_task,
)
workflow = EOWorkflow(workflow_nodes)
Step 3: Run the workflow
Process all Sentinel-2 acquisitions from beginning of 2016 and until beginning of June 2020.
[14]:
time_interval = ["2017-01-01", "2020-06-01"]
[15]:
# The download task requires additional arguments at execution. These are linked to the node the task is in.
download_node = workflow_nodes[0]
result = workflow.execute(
{
download_node: {"bbox": dam_bbox, "time_interval": time_interval},
}
)
Print content of eopatch at the end of the workflow execution
[16]:
patch = result.outputs["final_eopatch"]
patch
[16]:
EOPatch(
data={
BANDS: numpy.ndarray(shape=(98, 632, 919, 4), dtype=float32)
NDWI: numpy.ndarray(shape=(98, 632, 919, 1), dtype=float32)
}
mask={
CLM: numpy.ndarray(shape=(98, 632, 919, 1), dtype=uint8)
IS_DATA: numpy.ndarray(shape=(98, 632, 919, 1), dtype=bool)
VALID_DATA: numpy.ndarray(shape=(98, 632, 919, 1), dtype=bool)
WATER_MASK: numpy.ndarray(shape=(98, 632, 919, 1), dtype=uint8)
}
scalar={
COVERAGE: numpy.ndarray(shape=(98, 1), dtype=float64)
WATER_LEVEL: numpy.ndarray(shape=(98, 1), dtype=float64)
}
mask_timeless={
NOMINAL_WATER: numpy.ndarray(shape=(632, 919, 1), dtype=uint8)
}
meta_info={
maxcc: 0.5
size_x: 919
size_y: 632
time_difference: 1.0
time_interval: ('2017-01-01T00:00:00', '2020-06-01T23:59:59')
}
bbox=BBox(((19.10818927, -34.08851246), (19.30962163, -33.977424140000004)), crs=CRS('4326'))
timestamps=[datetime.datetime(2017, 1, 1, 8, 45, 43), ..., datetime.datetime(2020, 5, 30, 8, 49, 51)], length=98
)
Plot results
[17]:
from skimage.filters import sobel
from skimage.morphology import closing, disk
[18]:
def plot_rgb_w_water(eopatch, idx):
ratio = np.abs(eopatch.bbox.max_x - eopatch.bbox.min_x) / np.abs(eopatch.bbox.max_y - eopatch.bbox.min_y)
fig, ax = plt.subplots(figsize=(ratio * 10, 10))
ax.imshow(np.clip(2.5 * eopatch.data["BANDS"][..., [2, 1, 0]][idx], 0, 1))
observed = closing(eopatch.mask["WATER_MASK"][idx, ..., 0], disk(1))
nominal = sobel(eopatch.mask_timeless["NOMINAL_WATER"][..., 0])
observed = sobel(observed)
nominal = np.ma.masked_where(~nominal.astype(bool), nominal)
observed = np.ma.masked_where(~observed.astype(bool), observed)
ax.imshow(nominal, cmap=plt.cm.Reds)
ax.imshow(observed, cmap=plt.cm.Blues)
ax.axis("off")
[19]:
plot_rgb_w_water(patch, 0)

[20]:
plot_rgb_w_water(patch, -1)

[21]:
def plot_water_levels(eopatch, max_coverage=1.0):
fig, ax = plt.subplots(figsize=(20, 7))
dates = np.asarray(eopatch.timestamps)
ax.plot(
dates[eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
eopatch.scalar["WATER_LEVEL"][eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
"bo-",
alpha=0.7,
)
ax.plot(
dates[eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
eopatch.scalar["COVERAGE"][eopatch.scalar["COVERAGE"][..., 0] < max_coverage],
"--",
color="gray",
alpha=0.7,
)
ax.set_ylim(0.0, 1.1)
ax.set_xlabel("Date")
ax.set_ylabel("Water level")
ax.set_title("Theewaterskloof Dam Water Levels")
ax.grid(axis="y")
return ax
[22]:
ax = plot_water_levels(patch, 1.0)
