diff --git a/.github/workflows/full_test.yml b/.github/workflows/full_test.yml index 4eb8aee7d..b78690b34 100644 --- a/.github/workflows/full_test.yml +++ b/.github/workflows/full_test.yml @@ -7,20 +7,28 @@ on: branches: [main] jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/ruff-action@v2 + with: + version: 0.6.2 + src: src + build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.10", "3.14"] + pandas-version: ["pandas2", "pandas3"] + exclude: + - python-version: "3.10" + pandas-version: "pandas3" steps: - uses: actions/checkout@v4 - - uses: astral-sh/ruff-action@v2 - with: - version: 0.6.2 - src: src - - name: Set up uv uses: astral-sh/setup-uv@v6 with: @@ -30,6 +38,17 @@ jobs: - name: Install dependencies run: uv sync --group test --no-dev + - name: Install pandas 2.x + if: matrix.pandas-version == 'pandas2' + run: uv run pip install "pandas>=2.0,<3.0" + + - name: Install pandas 3.x + if: matrix.pandas-version == 'pandas3' + run: uv run pip install "pandas>=3.0,<4.0" + + - name: Show pandas version + run: uv run python -c "import pandas; print(f'pandas {pandas.__version__}')" + - name: Type check run: make typecheck diff --git a/src/modelskill/comparison/_comparison.py b/src/modelskill/comparison/_comparison.py index f8fedef75..70addb1f4 100644 --- a/src/modelskill/comparison/_comparison.py +++ b/src/modelskill/comparison/_comparison.py @@ -55,6 +55,18 @@ def _parse_dataset(data: xr.Dataset) -> xr.Dataset: # matched_data = self._matched_data_to_xarray(matched_data) assert "Observation" in data.data_vars + # Normalize datetime precision to avoid xarray interp issues with pandas 3.0 + # Different data sources may have different precisions (datetime64[s], datetime64[us], etc.) + # Use nanoseconds (ns) for backward compatibility with pandas 2.x + # Note: The dtype.kind == "M" check is required because some datasets use + # non-datetime indexes (e.g., RangeIndex in tests). Only DatetimeIndex has + # the .as_unit() method, so we must skip normalization for other index types. + if data.time.dtype.kind == "M": # M = datetime64 + time_pd = data.time.to_index() # Preserves freq attribute + if time_pd.dtype != "datetime64[ns]": + time_index = time_pd.as_unit("ns") + data = data.assign_coords(time=time_index) + # no missing values allowed in Observation if data["Observation"].isnull().any(): raise ValueError("Observation data must not contain missing values.") @@ -331,12 +343,12 @@ def _matched_data_to_xarray( ) # check that items.obs and items.model are numeric - if not np.issubdtype(df[items.obs].dtype, np.number): + if not pd.api.types.is_numeric_dtype(df[items.obs].dtype): raise ValueError( "Observation data is of type {df[items.obs].dtype}, it must be numeric" ) for m in items.model: - if not np.issubdtype(df[m].dtype, np.number): + if not pd.api.types.is_numeric_dtype(df[m].dtype): raise ValueError( f"Model data: {m} is of type {df[m].dtype}, it must be numeric" ) diff --git a/src/modelskill/comparison/_utils.py b/src/modelskill/comparison/_utils.py index f8d399d1a..d2a78f83a 100644 --- a/src/modelskill/comparison/_utils.py +++ b/src/modelskill/comparison/_utils.py @@ -46,9 +46,9 @@ def _add_spatial_grid_to_df( bins_y = np.arange(y_start, y_end + binsize / 2, binsize) # cut and get bin centre df["xBin"] = pd.cut(df.x, bins=bins_x) - df["xBin"] = df["xBin"].apply(lambda x: x.mid) + df["xBin"] = df["xBin"].apply(lambda x: x.mid if pd.notna(x) else x) df["yBin"] = pd.cut(df.y, bins=bins_y) - df["yBin"] = df["yBin"].apply(lambda x: x.mid) + df["yBin"] = df["yBin"].apply(lambda x: x.mid if pd.notna(x) else x) return df diff --git a/src/modelskill/metrics.py b/src/modelskill/metrics.py index 30e4b02bc..22099df9b 100644 --- a/src/modelskill/metrics.py +++ b/src/modelskill/metrics.py @@ -588,9 +588,11 @@ def peak_ratio( time = obs.index # Calculate number of years - dt_int = (time[1:].values - time[0:-1].values).view("int64") - dt_int_mode = float(stats.mode(dt_int, keepdims=False)[0]) / 1e9 # in seconds - N_years = dt_int_mode / 24 / 3600 / 365.25 * len(time) + # Use total_seconds() to handle any datetime precision (ns, us, ms, s) + dt = time[1:] - time[:-1] + dt_seconds = dt.total_seconds().values + dt_mode_seconds = float(stats.mode(dt_seconds, keepdims=False)[0]) + N_years = dt_mode_seconds / 24 / 3600 / 365.25 * len(time) peak_index, AAP_ = _partial_duration_series( time, obs, diff --git a/src/modelskill/model/dummy.py b/src/modelskill/model/dummy.py index 6a1efea0d..251598123 100644 --- a/src/modelskill/model/dummy.py +++ b/src/modelskill/model/dummy.py @@ -28,7 +28,7 @@ class DummyModelResult: -------- >>> import pandas as pd >>> import modelskill as ms ->>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="H", periods=2)) +>>> df = pd.DataFrame([0.0, 1.0], index=pd.date_range("2000", freq="h", periods=2)) >>> obs = ms.PointObservation(df, name="foo") >>> mr = ms.DummyModelResult(strategy='mean') >>> pmr = mr.extract(obs) diff --git a/src/modelskill/timeseries/_point.py b/src/modelskill/timeseries/_point.py index 5e2a0c86a..43675a4e3 100644 --- a/src/modelskill/timeseries/_point.py +++ b/src/modelskill/timeseries/_point.py @@ -122,6 +122,17 @@ def _convert_to_dataset( data = data.rename({time_dim_name: "time"}) ds = data + # Normalize datetime precision to avoid xarray interp issues with pandas 3.0 + # Different data sources (dfs0 files, DataFrames) may have different precisions + # (datetime64[s], datetime64[us], datetime64[ns], etc.), and xarray.interp() + # fails when interpolating between datasets with mismatched precisions. + # Use nanoseconds (ns) for backward compatibility with pandas 2.x + if ds.time.dtype.kind == "M": # M = datetime + time_pd = ds.time.to_index() # Preserves freq attribute + if time_pd.dtype != "datetime64[ns]": + time_index = time_pd.as_unit("ns") + ds = ds.assign_coords(time=time_index) + name = _validate_data_var_name(varname) n_unique_times = len(ds.time.to_index().unique()) diff --git a/tests/test_comparercollection.py b/tests/test_comparercollection.py index a291cd0f5..f884ff7f4 100644 --- a/tests/test_comparercollection.py +++ b/tests/test_comparercollection.py @@ -570,12 +570,31 @@ def test_plot_accepts_figsize(cc_plot_function): assert a, b == figsize -def test_peak_ratio(cc): - """Non existent peak ratio""" - cc = cc.sel(model="m1") - sk = cc.skill(metrics=["peak_ratio"]) - - assert sk.loc["fake point obs", "peak_ratio"] == pytest.approx(1.119999999) +def test_peak_ratio(): + """Test peak_ratio with synthetic data containing clear, verifiable peaks""" + # Create data with 2 clear peaks: + # Peak 1: obs=5.0, model=5.5 → ratio=1.1 + # Peak 2: obs=6.0, model=6.6 → ratio=1.1 + # Expected peak_ratio = mean([1.1, 1.1]) = 1.1 + times = pd.date_range("2020-01-01", periods=100, freq="h") + obs_vals = np.zeros(100) + mod_vals = np.zeros(100) + + # Create peak 1 around index 10 + obs_vals[8:13] = [0, 1, 5, 1, 0] + mod_vals[8:13] = [0, 1.1, 5.5, 1.1, 0] + + # Create peak 2 around index 50 + obs_vals[48:53] = [0, 1, 6, 1, 0] + mod_vals[48:53] = [0, 1.1, 6.6, 1.1, 0] + + df = pd.DataFrame({"Observation": obs_vals, "model": mod_vals}, index=times) + + cmp = ms.from_matched(df, obs_item=0, name="synthetic_peaks") + sk = cmp.skill(metrics=["peak_ratio"]) + + # Model peaks are 1.1x observation peaks + assert sk.to_dataframe()["peak_ratio"].values == pytest.approx(1.1, abs=0.01) def test_peak_ratio_2(cc_pr): diff --git a/tests/test_simple_compare.py b/tests/test_simple_compare.py index e4081bf94..f2e04ee93 100644 --- a/tests/test_simple_compare.py +++ b/tests/test_simple_compare.py @@ -78,12 +78,12 @@ def test_matching_pointobservation_with_trackmodelresult_is_not_possible(): # ignore the data tdf = pd.DataFrame( {"x": [1, 2], "y": [1, 2], "m1": [0, 0]}, - index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"), + index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"), ) mr = ms.TrackModelResult(tdf, item="m1", x_item="x", y_item="y") pdf = pd.DataFrame( data={"level": [0.0, 0.0]}, - index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4S"), + index=pd.date_range("2017-10-27 13:00:01", periods=2, freq="4s"), ) obs = ms.PointObservation(pdf, item="level") with pytest.raises(TypeError, match="TrackModelResult"):