Fix pandas 3.0 compatibility #561

Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,20 +7,28 @@ on:
		branches: [main]

		jobs:
	lint:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v4
	- uses: astral-sh/ruff-action@v2
	with:
	version: 0.6.2
	src: src

		build:
		runs-on: ubuntu-latest
		strategy:
		matrix:
		python-version: ["3.10", "3.14"]
	pandas-version: ["pandas2", "pandas3"]
	exclude:
	- python-version: "3.10"
	pandas-version: "pandas3"

		steps:
		- uses: actions/checkout@v4

	- uses: astral-sh/ruff-action@v2
	with:
	version: 0.6.2
	src: src

		- name: Set up uv
		uses: astral-sh/setup-uv@v6
		with:
Expand All		@@ -30,6 +38,17 @@ jobs:
		- name: Install dependencies
		run: uv sync --group test --no-dev

	- name: Install pandas 2.x
	if: matrix.pandas-version == 'pandas2'
	run: uv run pip install "pandas>=2.0,<3.0"

	- name: Install pandas 3.x
	if: matrix.pandas-version == 'pandas3'
	run: uv run pip install "pandas>=3.0,<4.0"

	- name: Show pandas version
	run: uv run python -c "import pandas; print(f'pandas {pandas.__version__}')"

		- name: Type check
		run: make typecheck

Expand Down

16 changes: 14 additions & 2 deletions src/modelskill/comparison/_comparison.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -55,6 +55,18 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset:
		# matched_data = self._matched_data_to_xarray(matched_data)
		assert "Observation" in data.data_vars

	# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
	# Different data sources may have different precisions (datetime64[s], datetime64[us], etc.)
	# Use nanoseconds (ns) for backward compatibility with pandas 2.x
	# Note: The dtype.kind == "M" check is required because some datasets use
	# non-datetime indexes (e.g., RangeIndex in tests). Only DatetimeIndex has
	# the .as_unit() method, so we must skip normalization for other index types.
	if data.time.dtype.kind == "M": # M = datetime64
	time_pd = data.time.to_index() # Preserves freq attribute
	if time_pd.dtype != "datetime64[ns]":
	time_index = time_pd.as_unit("ns")
	data = data.assign_coords(time=time_index)

		# no missing values allowed in Observation
		if data["Observation"].isnull().any():
		raise ValueError("Observation data must not contain missing values.")
Expand Down Expand Up		@@ -331,12 +343,12 @@ def _matched_data_to_xarray(
		)

		# check that items.obs and items.model are numeric
	if not np.issubdtype(df[items.obs].dtype, np.number):
	if not pd.api.types.is_numeric_dtype(df[items.obs].dtype):
		raise ValueError(
		"Observation data is of type {df[items.obs].dtype}, it must be numeric"
		)
		for m in items.model:
	if not np.issubdtype(df[m].dtype, np.number):
	if not pd.api.types.is_numeric_dtype(df[m].dtype):
		raise ValueError(
		f"Model data: {m} is of type {df[m].dtype}, it must be numeric"
		)
Expand Down

4 changes: 2 additions & 2 deletions src/modelskill/comparison/_utils.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -46,9 +46,9 @@ def _add_spatial_grid_to_df(
		bins_y = np.arange(y_start, y_end + binsize / 2, binsize)
		# cut and get bin centre
		df["xBin"] = pd.cut(df.x, bins=bins_x)
	df["xBin"] = df["xBin"].apply(lambda x: x.mid)
	df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x)
		df["yBin"] = pd.cut(df.y, bins=bins_y)
	df["yBin"] = df["yBin"].apply(lambda x: x.mid)
	df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x)

		return df

Expand Down

8 changes: 5 additions & 3 deletions src/modelskill/metrics.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -588,9 +588,11 @@ def peak_ratio(
		time = obs.index

		# Calculate number of years
	dt_int = (time[1:].values - time[0:-1].values).view("int64")
	dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
	N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
	# Use total_seconds() to handle any datetime precision (ns, us, ms, s)
	dt = time[1:] - time[:-1]
	dt_seconds = dt.total_seconds().values
	dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0])
	N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time)
		peak_index, AAP_ = _partial_duration_series(
		time,
		obs,
Expand Down

2 changes: 1 addition & 1 deletion src/modelskill/model/dummy.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -28,7 +28,7 @@ class DummyModelResult:
		--------
		>>> import pandas as pd
		>>> import modelskill as ms
	>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2))
	>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2))
		>>> obs = ms.PointObservation(df, name="foo")
		>>> mr = ms.DummyModelResult(strategy='mean')
		>>> pmr = mr.extract(obs)
Expand Down

11 changes: 11 additions & 0 deletions src/modelskill/timeseries/_point.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -122,6 +122,17 @@ def _convert_to_dataset(
		data = data.rename({time_dim_name: "time"})
		ds = data

	# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
	# Different data sources (dfs0 files, DataFrames) may have different precisions
	# (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp()
	# fails when interpolating between datasets with mismatched precisions.
	# Use nanoseconds (ns) for backward compatibility with pandas 2.x
	if ds.time.dtype.kind == "M": # M = datetime
	time_pd = ds.time.to_index() # Preserves freq attribute
	if time_pd.dtype != "datetime64[ns]":
	time_index = time_pd.as_unit("ns")
	ds = ds.assign_coords(time=time_index)

		name = _validate_data_var_name(varname)

		n_unique_times = len(ds.time.to_index().unique())
Expand Down

31 changes: 25 additions & 6 deletions tests/test_comparercollection.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function):
		assert a, b == figsize


	def test_peak_ratio(cc):
	"""Non existent peak ratio"""
	cc = cc.sel(model="m1")
	sk = cc.skill(metrics=["peak_ratio"])

	assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999)
	def test_peak_ratio():
	"""Test peak_ratio with synthetic data containing clear, verifiable peaks"""
	# Create data with 2 clear peaks:
	# Peak 1: obs=5.0, model=5.5 → ratio=1.1
	# Peak 2: obs=6.0, model=6.6 → ratio=1.1
	# Expected peak_ratio = mean([1.1, 1.1]) = 1.1
	times = pd.date_range("2020年01月01日", periods=100, freq="h")
	obs_vals = np.zeros(100)
	mod_vals = np.zeros(100)

	# Create peak 1 around index 10
	obs_vals[8:13] = [0, 1, 5, 1, 0]
	mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0]

	# Create peak 2 around index 50
	obs_vals[48:53] = [0, 1, 6, 1, 0]
	mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0]

	df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times)

	cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks")
	sk = cmp.skill(metrics=["peak_ratio"])

	# Model peaks are 1.1x observation peaks
	assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01)


		def test_peak_ratio_2(cc_pr):
Expand Down

4 changes: 2 additions & 2 deletions tests/test_simple_compare.py

Show comments View file Open in desktop

Original file line number	Diff line number	Diff line change
Expand Up		@@ -78,12 +78,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible():
		# ignore the data
		tdf = pd.DataFrame(
		{"x": [1, 2], "y": [1, 2], "m1": [0, 0]},
	index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4S"),
	index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4s"),
		)
		mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y")
		pdf = pd.DataFrame(
		data={"level": [0.0, 0.0]},
	index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4S"),
	index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4s"),
		)
		obs = ms.PointObservation(pdf, item="level")
		with pytest.raises(TypeError, match="TrackModelResult"):
Expand Down

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix pandas 3.0 compatibility #561

Are you sure you want to change the base?

Uh oh!

Fix pandas 3.0 compatibility #561

Filter by extension

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing