I'm currently working on a thesis project using jupyter notebook on a remote cluster. After running my code I'm not able to save anything anymore and I keep getting the following error:[Errno 28] No space left on device
If I check the space usage with df -h on the cluster frontend, everything is well below 100% and I don't get where the issue is coming from.
I start getting the error when calling a function in a loop. I tried clearing all variables after the loop with %reset -f but that doesn't seem to do anything. The function causing the issue seems to be the resample_on_wavelength called in the fit_single_detector method of the Profile_Fitter class.
This is the full class:
# Define the Profile_Fitter class to find the best SEMIMAJOR_AXIS vs FWHM relation for a whole frame (16 detectors)
import numpy as np
import gelsa
from gelsa.sgs import datastore
from tqdm import tqdm
class Profile_Fitter:
def __init__(self, pt_id_s, cat):
self.pt_id_s = pt_id_s
self.cat = cat
def load_data_to_frame(self, pt_id):
DS = datastore.DataStore(username='', password='',
cachedir='/scratch/astro/benjamin.granett/datastore'
)
# Load the data related to the pointing_id
file_list = DS.load_sir_pack("DR1_R1", pointing_id_list=[pt_id])
# Create a Gelsa object
G = gelsa.Gelsa(config_file="../gelsa-spectra/calib/gelsa_config.json", calibdir="../gelsa-spectra/calib/", zero_order_catalog=None)
# Define the frame associated to the pointing
frame = G.load_spec_frame(**file_list[0])
return frame
def smjax_range(self, smjax_data, center, num=20):
# Finds the interval around one semi-major axis center, containing "num" objects (20 by default)
smjax_data = np.asarray(smjax_data)
# - Compute distance of each object from the center
dist = np.abs(smjax_data - center)
# - Sort by distance and keep the "num" closest objects
idx = np.argsort(dist)[:num]
idx = np.sort(idx)
# - Get the "num" closest objects to "center"
closest_obj = smjax_data[idx]
# - Return those and their indices
return closest_obj, idx
def fit_single_detector(self, det_n, smjax_centers, pt_id, num_per_range=20, batch_size=5):
"""
Fit the SEMIMAJOR_AXIS vs FWHM relation for one detector, processing sources in memory-safe batches.
Args:
det_n (int): Detector number (0-15)
smjax_centers (list/array): Centers of semi-major axis ranges
pt_id (int/str): Pointing ID
num_per_range (int): Number of sources per semi-major axis range
batch_size (int): Number of sources to process at once (memory control)
Returns:
tuple: (slope, intercept) from linear regression
"""
import gc
from sklearn.linear_model import LinearRegression
from scipy.signal import peak_widths, find_peaks
from tqdm import tqdm
import numpy as np
if not (0 <= det_n < 16):
print("Detector number not valid\n")
return
frame = self.load_data_to_frame(pt_id)
print(f"\nCurrently fitting detector {det_n} in pointing {pt_id}")
# Get sources on this detector
x1, y1, det1 = frame.radec_to_pixel(self.cat['RIGHT_ASCENSION'], self.cat['DECLINATION'], wavelength=12000)
x2, y2, det2 = frame.radec_to_pixel(self.cat['RIGHT_ASCENSION'], self.cat['DECLINATION'], wavelength=19000)
on_detector = (det1 == det_n) & (det2 == det_n)
ra_on_detector = self.cat[on_detector]['RIGHT_ASCENSION']
dec_on_detector = self.cat[on_detector]['DECLINATION']
# Prepare semi-major axis ranges
smjax_range_values = []
indices = []
for c in smjax_centers:
masked, index = self.smjax_range(self.cat[on_detector]["SEMIMAJOR_AXIS"], center=c, num=num_per_range)
smjax_range_values.append(masked)
indices.append(index)
# Initialize spectra array
spectra = np.zeros((len(smjax_centers), num_per_range, 11)) # 11 pixels vertically
# Loop over semi-major axis ranges
for i, indix in enumerate(indices):
temp = self.cat[on_detector][indix] # sources in current range
n_sources = len(temp)
# Process sources in small batches
for start in range(0, n_sources, batch_size):
end = min(start + batch_size, n_sources)
batch_ra = temp["RIGHT_ASCENSION"][start:end]
batch_dec = temp["DECLINATION"][start:end]
# Process each source in batch
for j, (ra, dec) in enumerate(zip(batch_ra, batch_dec)):
im, var, norm, pix_bins = frame.resample_on_wavelength(ra, dec, wave_range=frame.params['wavelength_range'], super_sample=1)
spectra[i, start+j, :] = im.sum(axis=1)
# Free memory from this iteration
del im, var, norm, pix_bins
gc.collect()
# Compute average spectra
avg_spectra = spectra.mean(axis=1)
# Compute FWHM for main peak
fwhm = np.zeros(len(avg_spectra))
for i, av in enumerate(avg_spectra):
peaks, _ = find_peaks(av)
if len(peaks) == 0:
fwhm[i] = np.nan
continue
results_half = peak_widths(av, peaks, rel_height=0.5)
max_peak_fwhm = results_half[0][np.argmax(av[peaks])]
fwhm[i] = max_peak_fwhm
# Linear fit ignoring NaNs
valid = ~np.isnan(fwhm)
reg = LinearRegression().fit(np.array(smjax_centers)[valid].reshape(-1, 1), fwhm[valid])
return reg.coef_[0], reg.intercept_
def fit_full_frame(self, smjax_centers, pt_id, num_per_range):
fit_coeffs = np.zeros((16, 2))
for j in range(16):
m_, q_ = fit_single_detector(j, smjax_centers, pt_id, num_per_range)
fit_coeffs[j, :] = m_, q_
return fit_coeffs
Thanks to anyone who can help me sorting this out.
1 Answer 1
[Errno 28] means the system ran out of writable space somewhere. Most likely /tmp, /dev/shm, or your inode quota, not your main disk.
Check what’s full:
df -h /tmp /dev/shm
df -i
If /tmp is full, clean it:
rm -rf /tmp/*
Or redirect temporary files:
import os
os.environ["TMPDIR"] = "/scratch/$USER/tmp"
Your loop resample_on_wavelength probably creates temp files and fills /tmp, which stops Jupyter from saving.
df - h. System may reserve some space for bad sectors on drive. And all information put in question, not in comment. It will be better visible for others.