0

I use Python version 3.9.18 to reading wrfout files (name like: wrfout_d02_2020年01月01日_00:00:00) and get T2, Q2, PSFC, U10, V10, ACSWDNB variables and combine all days in the month to a output netcdf file (name like : WRF_PGW_2C_re-202001-d02-hourly.nc)。

Here is the code:

import xarray as xr
import glob
from metpy.units import units
import numpy as np
import pandas as pd
import calendar
from tqdm import tqdm
def get_file_list(input_dir: str, year: int, month: int) -> list:
 """
 Get list of WRF output files for a given year and month.
 
 Args:
 input_dir (str): Directory containing WRF output files.
 year (int): Year of the data.
 month (int): Month of the data.
 
 Returns:
 list: Sorted list of file paths.
 """
 file_pattern = f'{input_dir}wrfout_d02_{year}-{month:02}-*_00:00:00'
 return sorted(glob.glob(file_pattern))
def process_single_file(file: str) -> xr.Dataset:
 """
 Process a single WRF output file and extract specified variables.
 
 Args:
 file (str): Path to the WRF output file.
 
 Returns:
 xr.Dataset: Dataset with extracted variables and units.
 """
 ds = xr.open_dataset(file, engine='netcdf4')
 
 # Extract variables with units
 data_vars = {
 'Times': ds['Times'],
 'T2': ds['T2'] * units.kelvin,
 'Q2': ds['Q2'] * units('kg/kg'),
 'PSFC': ds['PSFC'] * units.pascal,
 'U10': ds['U10'] * units('m/s'),
 'V10': ds['V10'] * units('m/s'),
 'ACSWDNB': ds['ACSWDNB'] * units('J/m^2'),
 }
 
 # Create new dataset
 single_ds = xr.Dataset(data_vars)
 
 # Copy attributes
 for var in data_vars:
 single_ds[var].attrs = ds[var].attrs
 
 ds.close()
 return single_ds
def merge_datasets(datasets: list, year: int, month: int) -> xr.Dataset:
 """
 Merge datasets along the 'ymd' dimension.
 
 Args:
 datasets (list): List of xarray Datasets.
 year (int): Year of the data.
 month (int): Month of the data.
 
 Returns:
 xr.Dataset: Merged dataset.
 """
# merged_ds = xr.concat(datasets, dim='ymd', data_vars='minimal', coords='minimal')
 merged_ds = xr.concat(datasets, dim='ymd')
 return merged_ds.transpose('ymd', 'Time', 'south_north', 'west_east')
def save_to_netcdf(merged_ds: xr.Dataset, output_path: str, year: int, month: int) -> None:
 """
 Save merged dataset to a NetCDF file.
 
 Args:
 merged_ds (xr.Dataset): Merged dataset to save.
 output_path (str): Directory to save the NetCDF file.
 year (int): Year of the data.
 month (int): Month of the data.
 """
 output_file = f'WRF_PGW_2C_re-{year}{month:02}-d02-hourly.nc' 
 merged_ds.to_netcdf(
 f'{output_path}{output_file}',
 format='NETCDF4',
 engine='netcdf4',
 encoding={
 'Times': {'dtype': 'S1'},
 'T2': {'dtype': 'float32'},
 'Q2': {'dtype': 'float32'},
 'PSFC': {'dtype': 'float32'},
 'U10': {'dtype': 'float32'},
 'V10': {'dtype': 'float32'},
 'ACSWDNB': {'dtype': 'float32'},
 }
 )
def main():
 """
 Main function to process WRF output files, merge daily data into monthly files,
 and save as NetCDF.
 """
 
 base_input_path = '/home/user/work/share/WRF3type/WRF_PGW/2C_re/' #for PGW_2C run
 output_path = '/home/user/work/share/WRF3type/hrdata-TCCIP-WRF3type-nc/PGW_2C/' 
 years = range(2020, 2021)
 
 for year in tqdm(years, desc="Processing years"):
 year_folder = f'2C_re-{year}-PGW_wrfout_ERA5_domainv2_L38/'
 input_dir = f'{base_input_path}{year_folder}test3/'
 
 for month in tqdm(range(1, 2), desc=f"Processing months for {year}", leave=False):
 # Get file list
 file_list = get_file_list(input_dir, year, month)
 
 # Process each file with progress bar
 datasets = [process_single_file(file) for file in tqdm(file_list, desc=f"Processing files for {year}-{month:02}", leave=False)]
 
 # Merge datasets
 merged_ds = merge_datasets(datasets, year, month)
 
 # Save to NetCDF
 save_to_netcdf(merged_ds, output_path, year, month)
if __name__ == "__main__":
 main()

Then ncdump -c the netcdf file:

netcdf WRF_PGW_2C_re-202001-d02-hourly {
dimensions:
 ymd = 31 ;
 Time = 24 ;
 string19 = 19 ;
 south_north = 230 ;
 west_east = 150 ;
variables:
 char Times(ymd, Time, string19) ;
 Times:coordinates = "XTIME" ;
 float T2(ymd, Time, south_north, west_east) ;
 T2:_FillValue = NaNf ;
 T2:FieldType = 104 ;
 T2:MemoryOrder = "XY " ;
 T2:description = "TEMP at 2 M" ;
 T2:units = "K" ;
 T2:stagger = "" ;
 T2:coordinates = "XLAT XLONG XTIME" ;
 float Q2(ymd, Time, south_north, west_east) ;
 Q2:_FillValue = NaNf ;
 Q2:FieldType = 104 ;
 Q2:MemoryOrder = "XY " ;
 Q2:description = "QV at 2 M" ;
 Q2:units = "kg kg-1" ;
 Q2:stagger = "" ;
 Q2:coordinates = "XLAT XLONG XTIME" ;
 float PSFC(ymd, Time, south_north, west_east) ;
 PSFC:_FillValue = NaNf ;
 PSFC:FieldType = 104 ;
 PSFC:MemoryOrder = "XY " ;
 PSFC:description = "SFC PRESSURE" ;
 PSFC:units = "Pa" ;
 PSFC:stagger = "" ;
 PSFC:coordinates = "XLAT XLONG XTIME" ;
 float U10(ymd, Time, south_north, west_east) ;
 U10:_FillValue = NaNf ;
 U10:FieldType = 104 ;
 U10:MemoryOrder = "XY " ;
 U10:description = "U at 10 M" ;
 U10:units = "m s-1" ;
 U10:stagger = "" ;
 U10:coordinates = "XLAT XLONG XTIME" ;
 float V10(ymd, Time, south_north, west_east) ;
 V10:_FillValue = NaNf ;
 V10:FieldType = 104 ;
 V10:MemoryOrder = "XY " ;
 V10:description = "V at 10 M" ;
 V10:units = "m s-1" ;
 V10:stagger = "" ;
 V10:coordinates = "XLAT XLONG XTIME" ;
 float ACSWDNB(ymd, Time, south_north, west_east) ;
 ACSWDNB:_FillValue = NaNf ;
 ACSWDNB:FieldType = 104 ;
 ACSWDNB:MemoryOrder = "XY " ;
 ACSWDNB:description = "ACCUMULATED DOWNWELLING SHORTWAVE FLUX AT BOTTOM" ;
 ACSWDNB:units = "J m-2" ;
 ACSWDNB:stagger = "" ;
 ACSWDNB:coordinates = "XLAT XLONG XTIME" ;
 float XTIME(ymd, Time) ;
 XTIME:_FillValue = NaNf ;
 XTIME:FieldType = 104 ;
 XTIME:MemoryOrder = "0 " ;
 XTIME:description = "minutes since 2020年01月01日 00:00:00" ;
 XTIME:stagger = "" ;
 XTIME:units = "minutes since 2020年01月01日" ;
 XTIME:calendar = "proleptic_gregorian" ;
 float XLAT(Time, south_north, west_east) ;
 XLAT:_FillValue = NaNf ;
 XLAT:FieldType = 104 ;
 XLAT:MemoryOrder = "XY " ;
 XLAT:description = "LATITUDE, SOUTH IS NEGATIVE" ;
 XLAT:units = "degree_north" ;
 XLAT:stagger = "" ;
 XLAT:coordinates = "XLONG XLAT" ;
 float XLONG(Time, south_north, west_east) ;
 XLONG:_FillValue = NaNf ;
 XLONG:FieldType = 104 ;
 XLONG:MemoryOrder = "XY " ;
 XLONG:description = "LONGITUDE, WEST IS NEGATIVE" ;
 XLONG:units = "degree_east" ;
 XLONG:stagger = "" ;
 XLONG:coordinates = "XLONG XLAT" ;
data:
}

I have two questions in mind:

  1. In the Python code, I didn't edit any setting or extracting in XTIME, but I have no idea why it appear in the netcdf file. In other words, I just extract 'Times' variable to reveal the datetime in string. Is there any method to drop XTIME when merging datasets If possible?

  2. XLAT, XLONG for the latitude and longitude information, I hope they in 2D (south_north, west_east), without "Time" or "ymd" dimension, and change the order to the first two variables.

marc_s
760k186 gold badges1.4k silver badges1.5k bronze badges
asked Aug 21, 2025 at 4:13
1
  • XTIME, XLAT and XLONG are coordinates in your dataset, so they are kept with the according data variables. You'd need to drop the unwanted coordinates from the dataset. See docs.xarray.dev/en/stable/generated/… Commented Aug 22, 2025 at 8:23

0

Know someone who can answer? Share a link to this question via email, Twitter, or Facebook.

Your Answer

Draft saved
Draft discarded

Sign up or log in

Sign up using Google
Sign up using Email and Password

Post as a guest

Required, but never shown

Post as a guest

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.