Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Fix pandas 3.0 compatibility #561

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
ecomodeller wants to merge 7 commits into main
base: main
Choose a base branch
Loading
from fix/pandas3-compatibility
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions .github/workflows/full_test.yml
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,28 @@ on:
branches: [main]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.14"]
pandas-version: ["pandas2", "pandas3"]
exclude:
- python-version: "3.10"
pandas-version: "pandas3"

steps:
- uses: actions/checkout@v4

- uses: astral-sh/ruff-action@v2
with:
version: 0.6.2
src: src

- name: Set up uv
uses: astral-sh/setup-uv@v6
with:
Expand All @@ -30,6 +38,17 @@ jobs:
- name: Install dependencies
run: uv sync --group test --no-dev

- name: Install pandas 2.x
if: matrix.pandas-version == 'pandas2'
run: uv run pip install "pandas>=2.0,<3.0"

- name: Install pandas 3.x
if: matrix.pandas-version == 'pandas3'
run: uv run pip install "pandas>=3.0,<4.0"

- name: Show pandas version
run: uv run python -c "import pandas; print(f'pandas {pandas.__version__}')"

- name: Type check
run: make typecheck

Expand Down
16 changes: 14 additions & 2 deletions src/modelskill/comparison/_comparison.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,18 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset:
# matched_data = self._matched_data_to_xarray(matched_data)
assert "Observation" in data.data_vars

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources may have different precisions (datetime64[s], datetime64[us], etc.)
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
# Note: The dtype.kind == "M" check is required because some datasets use
# non-datetime indexes (e.g., RangeIndex in tests). Only DatetimeIndex has
# the .as_unit() method, so we must skip normalization for other index types.
if data.time.dtype.kind == "M": # M = datetime64
time_pd = data.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
data = data.assign_coords(time=time_index)

# no missing values allowed in Observation
if data["Observation"].isnull().any():
raise ValueError("Observation data must not contain missing values.")
Expand Down Expand Up @@ -331,12 +343,12 @@ def _matched_data_to_xarray(
)

# check that items.obs and items.model are numeric
if not np.issubdtype(df[items.obs].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[items.obs].dtype):
raise ValueError(
"Observation data is of type {df[items.obs].dtype}, it must be numeric"
)
for m in items.model:
if not np.issubdtype(df[m].dtype, np.number):
if not pd.api.types.is_numeric_dtype(df[m].dtype):
raise ValueError(
f"Model data: {m} is of type {df[m].dtype}, it must be numeric"
)
Expand Down
4 changes: 2 additions & 2 deletions src/modelskill/comparison/_utils.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def _add_spatial_grid_to_df(
bins_y = np.arange(y_start, y_end + binsize / 2, binsize)
# cut and get bin centre
df["xBin"] = pd.cut(df.x, bins=bins_x)
df["xBin"] = df["xBin"].apply(lambda x: x.mid)
df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x)
df["yBin"] = pd.cut(df.y, bins=bins_y)
df["yBin"] = df["yBin"].apply(lambda x: x.mid)
df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x)

return df

Expand Down
8 changes: 5 additions & 3 deletions src/modelskill/metrics.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,11 @@ def peak_ratio(
time = obs.index

# Calculate number of years
dt_int = (time[1:].values - time[0:-1].values).view("int64")
dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds
N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time)
# Use total_seconds() to handle any datetime precision (ns, us, ms, s)
dt = time[1:] - time[:-1]
dt_seconds = dt.total_seconds().values
dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0])
N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time)
peak_index, AAP_ = _partial_duration_series(
time,
obs,
Expand Down
2 changes: 1 addition & 1 deletion src/modelskill/model/dummy.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DummyModelResult:
--------
>>> import pandas as pd
>>> import modelskill as ms
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2))
>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2))
>>> obs = ms.PointObservation(df, name="foo")
>>> mr = ms.DummyModelResult(strategy='mean')
>>> pmr = mr.extract(obs)
Expand Down
11 changes: 11 additions & 0 deletions src/modelskill/timeseries/_point.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,17 @@ def _convert_to_dataset(
data = data.rename({time_dim_name: "time"})
ds = data

# Normalize datetime precision to avoid xarray interp issues with pandas 3.0
# Different data sources (dfs0 files, DataFrames) may have different precisions
# (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp()
# fails when interpolating between datasets with mismatched precisions.
# Use nanoseconds (ns) for backward compatibility with pandas 2.x
if ds.time.dtype.kind == "M": # M = datetime
time_pd = ds.time.to_index() # Preserves freq attribute
if time_pd.dtype != "datetime64[ns]":
time_index = time_pd.as_unit("ns")
ds = ds.assign_coords(time=time_index)

name = _validate_data_var_name(varname)

n_unique_times = len(ds.time.to_index().unique())
Expand Down
31 changes: 25 additions & 6 deletions tests/test_comparercollection.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function):
assert a, b == figsize


def test_peak_ratio(cc):
"""Non existent peak ratio"""
cc = cc.sel(model="m1")
sk = cc.skill(metrics=["peak_ratio"])

assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999)
def test_peak_ratio():
"""Test peak_ratio with synthetic data containing clear, verifiable peaks"""
# Create data with 2 clear peaks:
# Peak 1: obs=5.0, model=5.5 → ratio=1.1
# Peak 2: obs=6.0, model=6.6 → ratio=1.1
# Expected peak_ratio = mean([1.1, 1.1]) = 1.1
times = pd.date_range("2020年01月01日", periods=100, freq="h")
obs_vals = np.zeros(100)
mod_vals = np.zeros(100)

# Create peak 1 around index 10
obs_vals[8:13] = [0, 1, 5, 1, 0]
mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0]

# Create peak 2 around index 50
obs_vals[48:53] = [0, 1, 6, 1, 0]
mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0]

df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times)

cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks")
sk = cmp.skill(metrics=["peak_ratio"])

# Model peaks are 1.1x observation peaks
assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01)


def test_peak_ratio_2(cc_pr):
Expand Down
4 changes: 2 additions & 2 deletions tests/test_simple_compare.py
View file Open in desktop
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible():
# ignore the data
tdf = pd.DataFrame(
{"x": [1, 2], "y": [1, 2], "m1": [0, 0]},
index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4s"),
)
mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y")
pdf = pd.DataFrame(
data={"level": [0.0, 0.0]},
index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4S"),
index=pd.date_range("2017年10月27日 13:00:01", periods=2, freq="4s"),
)
obs = ms.PointObservation(pdf, item="level")
with pytest.raises(TypeError, match="TrackModelResult"):
Expand Down

AltStyle によって変換されたページ (->オリジナル) /