I have a geopandas.GeoDataFrame
containing point observations that I would like to rasterize into a grid with a certain resolution, such that, for cells that contain multiple observations, the resulting grid cell value is the average value of the contained points.
In an ideal world I'd use geocube.api.core.make_geocube
due to its ease of use. My current code looks something like this.
import json
import geopandas as gpd
import numpy as np
from geocube.api.core import make_geocube
from shapely.geometry import box, mapping, Point
# global bounding box
bbox = (-180, -90, 180, 90)
# some random points
points = [Point(np.random.uniform(-180, 180), np.random.uniform(-90, 90)) for _ in range(1000)]
# some random data values
data = np.random.rand(1000)
# create GeoDataFrame
gdf = gpd.GeoDataFrame({'data': data}, geometry=points, crs='EPSG:4326')
grid = make_geocube(
vector_data=gdf,
measurements=[column],
resolution=(-0.5, 0.5),
geom=json.dumps(mapping(box(*bbox))),
)
The problem here, if I understand correctly, is that geocube
's default rasterize_function
(geocube.rasterize.rasterize_image
) utilizes rasterio.enums.MergeAlg
as the only options for merging values that coincide in a single cell, i.e. GDAL's REPLACE or ADD methods. I'll add that in my case I don't want to do any interpolation of missing values.
Is there a way to easily implement a custom AVERAGE merge algorithm in this case, or will I need to do it all manually, e.g. with rioxarray
or other tools?
1 Answer 1
I ended up implementing this semi-manually without geocube
:
import os
from typing import Union
import numpy as np
import rasterio as rio
from rasterio.transform import from_origin
def rasterize_points(
points: np.ndarray, res: Union[int, float], bbox: tuple
) -> np.ndarray:
"""Rasterize points into a grid with a given resolution.
Args:
points (np.ndarray): Points to rasterize, with columns (x, y, value) (for
geographic coordinates, use (lon, lat, value))
res (Union[int, float]): Resolution of the grid
bbox (tuple): Bounding box of the grid
Returns:
np.ndarray: Rasterized grid
"""
width = int((bbox[2] - bbox[0]) / res)
height = int((bbox[3] - bbox[1]) / res)
rast = np.zeros((height, width), dtype=np.float32)
count_array = np.zeros_like(rast)
for x, y, value in points:
col = int((x - bbox[0]) / res)
row = int((bbox[3] - y) / res)
rast[row, col] += value
count_array[row, col] += 1
# Avoid division by zero
count_array[count_array == 0] = 1
# Calculate the average
rast = rast / count_array
return rast
def write_raster(
raster: np.ndarray, res: Union[int, float], bbox: tuple, filename: os.PathLike
) -> None:
"""Write a raster to a GeoTIFF file.
Args:
raster (np.ndarray): Raster matrix to write
res (Union[int, float]): Resolution of the raster
bbox (tuple): Bounding box of the raster
filename (os.PathLike): Path to the output file
"""
width = int((bbox[2] - bbox[0]) / res)
height = int((bbox[3] - bbox[1]) / res)
with rio.open(
filename,
"w",
driver="GTiff",
height=height,
width=width,
nodata=0,
count=1,
dtype=rio.float32,
crs="EPSG:4326",
transform=from_origin(bbox[0], bbox[3], res, res),
compress="zstd",
) as dst:
dst.write(raster, 1)
# Set bounding box and output resolution
bbox = (-180, -90, 180, 90)
res = 0.01
# Some random points in (lon, lat, value) format
points = [(np.random.uniform(-180, 180), np.random.uniform(-90, 90), np.random.rand()) for _ in range(1000)]
# Rasterize to a grid
grid = rasterize_points(points, res, bbox)
# Write to file
write_raster(grid, res, bbox, "path/to/raster.tif")