diff --git a/doc/source/admin/hardware_managers.rst b/doc/source/admin/hardware_managers.rst index f9eb360af..ca9df7afc 100644 --- a/doc/source/admin/hardware_managers.rst +++ b/doc/source/admin/hardware_managers.rst @@ -176,6 +176,9 @@ containing hints to identify the drives. For example:: To prevent software RAID devices from being deleted, put their volume name (defined in the ``target_raid_config``) to the list. +If a volume name is present in the ``skip_block_devices`` property, all logical +disks in the ``target_raid_config`` are required to have volume names defined. + Note: one dictionary with one value for each of the logical disks. For example:: diff --git a/ironic_python_agent/hardware.py b/ironic_python_agent/hardware.py index 19039a0c0..bd276de20 100644 --- a/ironic_python_agent/hardware.py +++ b/ironic_python_agent/hardware.py @@ -498,7 +498,8 @@ def list_all_block_devices(block_type='disk', Broken out as its own function to facilitate custom hardware managers that don't need to subclass GenericHardwareManager. - :param block_type: Type of block device to find + :param block_type: Type(s) of block device to find. + Can be a string or a list of strings. :param ignore_raid: Ignore auto-identified raid devices, example: md0 Defaults to false as these are generally disk devices and should be treated as such if encountered. @@ -525,6 +526,14 @@ def list_all_block_devices(block_type='disk', return True return False + # Normalize block_type to a list + if isinstance(block_type, str): + block_types = [block_type] + elif isinstance(block_type, list): + block_types = block_type + else: + raise ValueError("block_type must be a string or a list of strings") + check_multipath = not ignore_multipath and get_multipath_status() disk_utils.udev_settle() @@ -601,22 +610,22 @@ def list_all_block_devices(block_type='disk', # Other possible type values, which we skip recording: # lvm, part, rom, loop - if devtype != block_type: + if devtype not in block_types: if devtype is None or ignore_raid: LOG.debug( - "TYPE did not match. Wanted: %(block_type)s but found: " + "TYPE did not match. Wanted: %(block_types)s but found: " "%(devtype)s (RAID devices are ignored)", - {'block_type': block_type, 'devtype': devtype}) + {'block_types': block_types, 'devtype': devtype}) continue elif ('raid' in devtype - and block_type in ['raid', 'disk', 'mpath']): + and any(bt in ['raid', 'disk', 'mpath'] + for bt in block_types)): LOG.debug( "TYPE detected to contain 'raid', signifying a " "RAID volume. Found: %(device_raw)s", {'device_raw': device_raw}) elif (devtype == 'md' - and (block_type == 'part' - or block_type == 'md')): + and any(bt in ['part', 'md'] for bt in block_types)): # NOTE(dszumski): Partitions on software RAID devices have type # 'md'. This may also contain RAID devices in a broken state in # rare occasions. See https://review.opendev.org/#/c/670807 for @@ -625,7 +634,8 @@ def list_all_block_devices(block_type='disk', "TYPE detected to contain 'md', signifying a " "RAID partition. Found: %(device_raw)s", {'device_raw': device_raw}) - elif devtype == 'mpath' and block_type == 'disk': + elif (devtype == 'mpath' and any(bt == 'disk' + for bt in block_types)): LOG.debug( "TYPE detected to contain 'mpath', " "signifing a device mapper multipath device. " @@ -633,9 +643,9 @@ def list_all_block_devices(block_type='disk', {'device_raw': device_raw}) else: LOG.debug( - "TYPE did not match. Wanted: %(block_type)s but found: " + "TYPE did not match. Wanted: %(block_types)s but found: " "%(device_raw)s (RAID devices are ignored)", - {'block_type': block_type, 'device_raw': device_raw}) + {'block_types': block_types, 'device_raw': device_raw}) continue # Ensure all required columns are at least present, even if blank @@ -965,14 +975,21 @@ class HardwareManager(object, metaclass=abc.ABCMeta): """ raise errors.IncompatibleHardwareMethodError - def get_skip_list_from_node(self, node, - block_devices=None, just_raids=False): + def get_skip_list_from_node_for_disks(self, node, + block_devices=None): + """Get the skip block devices list from the node for physical disks + + :param node: A node to be check for the 'skip_block_devices' property + :param block_devices: a list of BlockDevices + :returns: A set of names of devices on the skip list + """ + raise errors.IncompatibleHardwareMethodError + + def get_skip_list_from_node_for_raids(self, node): """Get the skip block devices list from the node - :param block_devices: a list of BlockDevices - :param just_raids: a boolean to signify that only RAID devices - are important - :returns: A set of names of devices on the skip list + :param node: A node to be check for the 'skip_block_devices' property + :returns: A set of volume names of RAID arrays on the skip list """ raise errors.IncompatibleHardwareMethodError @@ -1751,15 +1768,12 @@ class GenericHardwareManager(HardwareManager): ) return filter_devices(block_devices) - def get_skip_list_from_node(self, node, - block_devices=None, just_raids=False): + def get_skip_list_from_node_for_disks(self, node, + block_devices=None): properties = node.get('properties', {}) skip_list_hints = properties.get("skip_block_devices", []) if not skip_list_hints: return None - if just_raids: - return {d['volume_name'] for d in skip_list_hints - if 'volume_name' in d} if not block_devices: return None skip_list = set() @@ -1777,17 +1791,50 @@ class GenericHardwareManager(HardwareManager): {'hint': hint, 'devs': ','.join(skipped_devices)}) return skip_list + def get_skip_list_from_node_for_raids(self, node): + properties = node.get('properties', {}) + skip_list_hints = properties.get("skip_block_devices", []) + if not skip_list_hints: + return None + raid_skip_list = {d['volume_name'] for d in skip_list_hints + if 'volume_name' in d} + if len(raid_skip_list) == 0: + return None + else: + return raid_skip_list + def list_block_devices_check_skip_list(self, node, include_partitions=False, - all_serial_and_wwn=False): + all_serial_and_wwn=False, + include_wipe=False): block_devices = self.list_block_devices( include_partitions=include_partitions, all_serial_and_wwn=all_serial_and_wwn) - skip_list = self.get_skip_list_from_node( + skip_list = self.get_skip_list_from_node_for_disks( node, block_devices) if skip_list is not None: block_devices = [d for d in block_devices if d.name not in skip_list] + # Note(kubajj): match volume_names to raid_device names + raid_skip_list = self.get_skip_list_from_node_for_raids(node) + if raid_skip_list is not None: + # Find all raid devices and remove anyone with 'keep' + # (and 'wipe' if include_wipe is true) + raid_devices = list_all_block_devices(block_type=['raid', 'md'], + ignore_raid=False, + ignore_empty=False) + raid_skip_list_dict = self._handle_raid_skip_list(raid_devices, + raid_skip_list) + delete_raid_devices = raid_skip_list_dict['delete_raid_devices'] + block_devices = [ + rd for rd in block_devices + if delete_raid_devices.get(rd.name) != 'keep' + ] + if include_wipe: + block_devices = [ + rd for rd in block_devices + if delete_raid_devices.get(rd.name) != 'wipe' + ] return block_devices def get_os_install_device(self, permit_refresh=False): @@ -1972,7 +2019,7 @@ class GenericHardwareManager(HardwareManager): def _list_erasable_devices(self, node): block_devices = self.list_block_devices_check_skip_list( - node, include_partitions=True) + node, include_partitions=True, include_wipe=True) # NOTE(coreywright): Reverse sort by device name so a partition (eg # sda1) is processed before it disappears when its associated disk (eg # sda) has its partition table erased and the kernel notified. @@ -2928,6 +2975,128 @@ class GenericHardwareManager(HardwareManager): self.delete_configuration(node, ports) return self._do_create_configuration(node, ports, raid_config) + def _handle_raid_skip_list(self, raid_devices, skip_list): + '''Handle the RAID skip list + + This function analyzes the existing RAID devices and the provided + skip list to determine which RAID devices should be deleted, wiped, + or kept. + + :param raid_devices: A list of BlockDevice objects representing + existing RAID devices + :param skip_list: A list of volume names to skip + :returns: A dictionary with three keys: + - 'delete_raid_devices': A dictionary mapping RAID device + names to actions ('delete', 'wipe', 'keep') + - 'volume_name_of_raid_devices': A dictionary mapping + RAID device names to their volume names + - 'cause_of_not_deleting': A dictionary mapping RAID + device names to the volume names of RAID devices + that caused them to be kept + ''' + # NOTE(kubajj): + # Options in the dictionary delete_raid_devices: + # 1. Delete both superblock and data - delete + # 2. Keep superblock and wipe data - wipe + # 3. Do not touch RAID array - keep + delete_raid_devices = {} + + volume_name_of_raid_devices = {} + cause_of_not_deleting = {} + + raid_devices_on_holder_disks = {} + volume_name_on_skip_list = {} + esp_part = None + for raid_device in raid_devices: + delete_raid_devices[raid_device.name] = 'delete' + esp_part = self._analyze_raid_device(raid_device, skip_list, + raid_devices_on_holder_disks, + volume_name_on_skip_list, + volume_name_of_raid_devices) + for raid_device in raid_devices: + if volume_name_on_skip_list.get(raid_device.name): + self._handle_raids_with_volume_name_on_skip_list( + raid_device.name, delete_raid_devices, + cause_of_not_deleting, raid_devices_on_holder_disks, + volume_name_of_raid_devices) + # NOTE(kubajj): If ESP partition was supposed to be wiped, + # we keep it so that it can be found by raid_utils.find_esp_raid + if esp_part is not None and \ + delete_raid_devices.get(esp_part) == 'wipe': + delete_raid_devices[esp_part] = 'keep' + return {'delete_raid_devices': delete_raid_devices, + 'volume_name_of_raid_devices': volume_name_of_raid_devices, + 'cause_of_not_deleting': cause_of_not_deleting} + + def _analyze_raid_device(self, raid_device, skip_list, + raid_devices_on_holder_disks, + volume_name_on_skip_list, + volume_name_of_raid_devices): + '''Analyze a RAID device + + This function figures out which holder disks a RAID device is on, + checks if its volume name is on the skip list, and checks whether + it is an ESP partition - in which case it returns its name. + It also updates the provided dictionaries with information about + the RAID device. + + :param raid_device: A BlockDevice object representing a RAID device + :param skip_list: A list of volume names to skip + :param raid_devices_on_holder_disks: A dictionary mapping holder disks + to lists of RAID devices on them + :param volume_name_on_skip_list: A dictionary mapping RAID device names + to booleans indicating whether their volume name is on the skip + list + :param volume_name_of_raid_devices: A dictionary mapping RAID device + names to their volume names + :returns: The name of the ESP partition if the RAID device is an ESP + partition, None otherwise + ''' + esp_part = None + holder_disks = get_holder_disks(raid_device.name) + for holder_disk in holder_disks: + if raid_devices_on_holder_disks.get(holder_disk): + raid_devices_on_holder_disks.get(holder_disk).append( + raid_device.name) + else: + raid_devices_on_holder_disks[holder_disk] = [ + raid_device.name] + volume_name = raid_utils.get_volume_name_of_raid_device( + raid_device.name) + if volume_name == 'esp': + esp_part = raid_device.name + volume_name_of_raid_devices[raid_device.name] = volume_name + if volume_name: + LOG.info("Software RAID device %(dev)s has volume name " + "%(name)s", {'dev': raid_device.name, + 'name': volume_name}) + if volume_name in skip_list: + LOG.warning("RAID device %s will not be deleted", + raid_device.name) + volume_name_on_skip_list[raid_device.name] = True + else: + volume_name_on_skip_list[raid_device.name] = False + return esp_part + + def _handle_raids_with_volume_name_on_skip_list( + self, raid_device_name, delete_raid_devices, + cause_of_not_deleting, raid_devices_on_holder_disks, + volume_name_of_raid_devices): + # NOTE(kubajj): Keep this raid_device + # wipe all other RAID arrays on these holder disks + # unless they have 'keep' already + delete_raid_devices[raid_device_name] = 'keep' + holder_disks = get_holder_disks(raid_device_name) + for holder_disk in holder_disks: + for neighbour_raid_device in raid_devices_on_holder_disks[ + holder_disk]: + if not neighbour_raid_device == raid_device_name and \ + delete_raid_devices[neighbour_raid_device] \ + == 'delete': + delete_raid_devices[neighbour_raid_device] = 'wipe' + cause_of_not_deleting[neighbour_raid_device] = \ + volume_name_of_raid_devices[raid_device_name] + def create_configuration(self, node, ports): """Create a RAID configuration. @@ -2951,30 +3120,103 @@ class GenericHardwareManager(HardwareManager): return self._do_create_configuration(node, ports, raid_config) def _do_create_configuration(self, node, ports, raid_config): - def _get_volume_names_of_existing_raids(): - list_of_raids = [] - raid_devices = list_all_block_devices(block_type='raid', + def _create_raid_ignore_list(logical_disks, skip_list): + """Create list of RAID devices to ignore during creation. + + Creates a list of RAID devices to ignore when trying to create a + the target raid config. This list includes RAIDs that are left + over due to safeguards (skip_block_devices) as well as others + which are affected by it. + + :param logical_disks: A list of logical disks from the target RAID + config. + :returns: The list of RAID devices to ignore during creation of + the target RAID config. + :raises: SoftwareRAIDError if the desired configuration is not + valid or if there was an error when creating the RAID + devices. + """ + raid_devices = list_all_block_devices(block_type=['raid', 'md'], ignore_raid=False, ignore_empty=False) - raid_devices.extend( - list_all_block_devices(block_type='md', - ignore_raid=False, - ignore_empty=False) - ) - for raid_device in raid_devices: - device = raid_device.name - try: - utils.execute('mdadm', '--examine', - device, use_standard_locale=True) - except processutils.ProcessExecutionError as e: - if "No md superblock detected" in str(e): - continue - volume_name = raid_utils.get_volume_name_of_raid_device(device) - if volume_name: - list_of_raids.append(volume_name) - else: - list_of_raids.append("unnamed_raid") - return list_of_raids + + if not raid_devices: + # Escape the function if no RAID devices are present + return [] + + raid_skip_list_dict = self._handle_raid_skip_list( + raid_devices, skip_list) + delete_raid_devices = raid_skip_list_dict['delete_raid_devices'] + volume_name_of_raid_devices = raid_skip_list_dict[ + 'volume_name_of_raid_devices'] + cause_of_not_deleting = raid_skip_list_dict[ + 'cause_of_not_deleting'] + list_of_raids = [volume_name for volume_name in + volume_name_of_raid_devices.values()] + for volume_name in list_of_raids: + if volume_name is None or volume_name == '': + msg = ("A Software RAID device detected that does not " + "have a volume name. This is not allowed " + "because there is a volume_name hint in the " + "property 'skip_block_devices'.") + raise errors.SoftwareRAIDError(msg) + + rm_from_list = \ + _determine_which_logical_disks_should_not_be_created( + logical_disks, skip_list, delete_raid_devices, + volume_name_of_raid_devices, cause_of_not_deleting, + list_of_raids) + if 'esp' in list_of_raids: + # Note(kubajj): The EFI partition is present after deletion, + # therefore, it had to have 'keep' label in delete_raid_devices + # as it is on a holder disk with a safeguarded RAID array. + rd_name = [rd_name for rd_name, v_name in + volume_name_of_raid_devices.items() + if 'esp' == v_name] + if delete_raid_devices[rd_name[0]] == 'keep': + list_of_raids.remove('esp') + LOG.debug("Keeping EFI partition as it is on a holder " + "disk with a safeguarded RAID array %s", + cause_of_not_deleting.get(rd_name[0])) + # NOTE(kubajj): Raise an error if there is an existing software + # RAID device that either does not have a volume name or does not + # match one on the skip list + if list_of_raids: + msg = ("Existing Software RAID device detected that should" + " not") + raise errors.SoftwareRAIDError(msg) + return rm_from_list + + def _determine_which_logical_disks_should_not_be_created( + logical_disks, skip_list, delete_raid_devices, + volume_name_of_raid_devices, cause_of_not_deleting, + list_of_raids): + rm_from_list = [] + for ld in logical_disks: + volume_name = ld.get('volume_name') + + # Volume name in the list_of_raids + if volume_name in list_of_raids: + # Remove LD that are on skip_list + if volume_name in skip_list: + LOG.debug("Software RAID device with volume name %s " + "exists and is, therefore, not going to be " + "created", volume_name) + # Remove LD that are on the same disk as skip_list + elif volume_name not in skip_list: + # Fetch raid device using volume name + rd_name = [rd_name for rd_name, v_name in + volume_name_of_raid_devices.items() + if volume_name == v_name] + if delete_raid_devices[rd_name[0]] != 'wipe' or \ + cause_of_not_deleting.get(rd_name[0]) is None: + msg = ("Existing Software RAID device detected " + "that should not") + raise errors.SoftwareRAIDError(msg) + + list_of_raids.remove(volume_name) + rm_from_list.append(ld) + return rm_from_list # No 'software' controller: do nothing. If 'controller' is # set to 'software' on only one of the drives, the validation @@ -2997,29 +3239,12 @@ class GenericHardwareManager(HardwareManager): # Remove any logical disk from being eligible for inclusion in the # RAID if it's on the skip list - skip_list = self.get_skip_list_from_node( - node, just_raids=True) + skip_list = self.get_skip_list_from_node_for_raids(node) rm_from_list = [] if skip_list: - present_raids = _get_volume_names_of_existing_raids() - if present_raids: - for ld in logical_disks: - volume_name = ld.get('volume_name', None) - if volume_name in skip_list \ - and volume_name in present_raids: - rm_from_list.append(ld) - LOG.debug("Software RAID device with volume name %s " - "exists and is, therefore, not going to be " - "created", volume_name) - present_raids.remove(volume_name) - # NOTE(kubajj): Raise an error if there is an existing software - # RAID device that either does not have a volume name or does not - # match one on the skip list - if present_raids: - msg = ("Existing Software RAID device detected that should" - " not") - raise errors.SoftwareRAIDError(msg) - logical_disks = [d for d in logical_disks if d not in rm_from_list] + rm_from_list = _create_raid_ignore_list(logical_disks, skip_list) + + logical_disks = [d for d in logical_disks if d not in rm_from_list] # Log the validated target_raid_configuration. LOG.debug("Target Software RAID configuration: %s", raid_config) @@ -3044,72 +3269,77 @@ class GenericHardwareManager(HardwareManager): partition_table_type = utils.get_partition_table_type_from_specs(node) target_boot_mode = utils.get_node_boot_mode(node) - parted_start_dict = raid_utils.create_raid_partition_tables( - block_devices, partition_table_type, target_boot_mode) + # Only create partitions if some are missing + # (this is no longer guaranteed) + if block_devices and logical_disks: + parted_start_dict = raid_utils.create_raid_partition_tables( + block_devices, partition_table_type, target_boot_mode) - LOG.debug("First available sectors per devices %s", parted_start_dict) + LOG.debug("First available sectors per devices %s", + parted_start_dict) - # Reorder logical disks so that MAX comes last if any: - reordered_logical_disks = [] - max_disk = None - for logical_disk in logical_disks: - psize = logical_disk['size_gb'] - if psize == 'MAX': - max_disk = logical_disk - else: - reordered_logical_disks.append(logical_disk) - if max_disk: - reordered_logical_disks.append(max_disk) - logical_disks = reordered_logical_disks + # Reorder logical disks so that MAX comes last if any: + reordered_logical_disks = [] + max_disk = None + for logical_disk in logical_disks: + psize = logical_disk['size_gb'] + if psize == 'MAX': + max_disk = logical_disk + else: + reordered_logical_disks.append(logical_disk) + if max_disk: + reordered_logical_disks.append(max_disk) + logical_disks = reordered_logical_disks - # With the partitioning below, the first partition is not - # exactly the size_gb provided, but rather the size minus a small - # amount (often 2048*512B=1MiB, depending on the disk geometry). - # Easier to ignore. Another way could be to use sgdisk, which is really - # user-friendly to compute part boundaries automatically, instead of - # parted, then convert back to mbr table if needed and possible. + # With the partitioning below, the first partition is not + # exactly the size_gb provided, but rather the size minus a small + # amount (often 2048*512B=1MiB, depending on the disk geometry). + # Easier to ignore. Another way could be to use sgdisk, which is + # really user-friendly to compute part boundaries automatically, + # instead of parted, then convert back to mbr table if needed + # and possible. - for logical_disk in logical_disks: - # Note: from the doc, - # https://docs.openstack.org/ironic/latest/admin/raid.html#target-raid-configuration - # size_gb unit is GiB + for logical_disk in logical_disks: + # Note: from the doc, + # https://docs.openstack.org/ironic/latest/admin/raid.html#target-raid-configuration + # size_gb unit is GiB - psize = logical_disk['size_gb'] - if psize == 'MAX': - psize = -1 - else: - psize = int(psize) + psize = logical_disk['size_gb'] + if psize == 'MAX': + psize = -1 + else: + psize = int(psize) - # NOTE(dtantsur): populated in get_block_devices_for_raid - disk_names = logical_disk['block_devices'] - for device in disk_names: - start = parted_start_dict[device] - start_str, end_str, end = ( - raid_utils.calc_raid_partition_sectors(psize, start) - ) - try: - LOG.debug("Creating partition on %(dev)s: %(str)s %(end)s", - {'dev': device, 'str': start_str, - 'end': end_str}) + # NOTE(dtantsur): populated in get_block_devices_for_raid + disk_names = logical_disk['block_devices'] + for device in disk_names: + start = parted_start_dict[device] + start_str, end_str, end = ( + raid_utils.calc_raid_partition_sectors(psize, start) + ) + try: + LOG.debug("Creating partition on %(dev)s: %(str)s " + "%(end)s", {'dev': device, 'str': start_str, + 'end': end_str}) - utils.execute('parted', device, '-s', '-a', - 'optimal', '--', 'mkpart', 'primary', - start_str, end_str) + utils.execute('parted', device, '-s', '-a', + 'optimal', '--', 'mkpart', 'primary', + start_str, end_str) - except processutils.ProcessExecutionError as e: - msg = "Failed to create partitions on {}: {}".format( - device, e) - raise errors.SoftwareRAIDError(msg) + except processutils.ProcessExecutionError as e: + msg = "Failed to create partitions on {}: {}".format( + device, e) + raise errors.SoftwareRAIDError(msg) - utils.rescan_device(device) + utils.rescan_device(device) - parted_start_dict[device] = end + parted_start_dict[device] = end - # Create the RAID devices. The indices mapping tracks the last used - # partition index for each physical device. - indices = {} - for index, logical_disk in enumerate(logical_disks): - raid_utils.create_raid_device(index, logical_disk, indices) + # Create the RAID devices. The indices mapping tracks the last used + # partition index for each physical device. + indices = {} + for index, logical_disk in enumerate(logical_disks): + raid_utils.create_raid_device(index, logical_disk, indices) LOG.info("Successfully created Software RAID") @@ -3133,29 +3363,28 @@ class GenericHardwareManager(HardwareManager): def _scan_raids(): utils.execute('mdadm', '--assemble', '--scan', check_exit_code=False) - raid_devices = list_all_block_devices(block_type='raid', + # # NOTE(dszumski): Fetch all devices of type 'md'. This + # # will generally contain partitions on a software RAID + # # device, but crucially may also contain devices in a + # # broken state. See https://review.opendev.org/#/c/670807/ + # # for more detail. + raid_devices = list_all_block_devices(block_type=['raid', 'md'], ignore_raid=False, ignore_empty=False) - # NOTE(dszumski): Fetch all devices of type 'md'. This - # will generally contain partitions on a software RAID - # device, but crucially may also contain devices in a - # broken state. See https://review.opendev.org/#/c/670807/ - # for more detail. - raid_devices.extend( - list_all_block_devices(block_type='md', - ignore_raid=False, - ignore_empty=False) - ) return raid_devices raid_devices = _scan_raids() - skip_list = self.get_skip_list_from_node( - node, just_raids=True) + skip_list = self.get_skip_list_from_node_for_raids( + node) attempts = 0 while attempts < 2: attempts += 1 - self._delete_config_pass(raid_devices, skip_list) + delete_raid_devices = self._delete_config_pass(raid_devices, + skip_list) raid_devices = _scan_raids() + if skip_list: + raid_devices = [rd for rd in raid_devices if + delete_raid_devices[rd.name] == 'delete'] if not raid_devices: break else: @@ -3168,18 +3397,18 @@ class GenericHardwareManager(HardwareManager): all_holder_disks = [] do_not_delete_devices = set() delete_partitions = {} + delete_raid_devices = {dev.name: 'delete' for dev in raid_devices} + volume_name_of_raid_devices = {} + cause_of_not_deleting = {} + if skip_list: + raid_skip_list_dict = self._handle_raid_skip_list(raid_devices, + skip_list) + delete_raid_devices = raid_skip_list_dict['delete_raid_devices'] + volume_name_of_raid_devices = raid_skip_list_dict[ + 'volume_name_of_raid_devices'] + cause_of_not_deleting = raid_skip_list_dict[ + 'cause_of_not_deleting'] for raid_device in raid_devices: - do_not_delete = False - volume_name = raid_utils.get_volume_name_of_raid_device( - raid_device.name) - if volume_name: - LOG.info("Software RAID device %(dev)s has volume name" - "%(name)s", {'dev': raid_device.name, - 'name': volume_name}) - if skip_list and volume_name in skip_list: - LOG.warning("RAID device %s will not be deleted", - raid_device.name) - do_not_delete = True component_devices = get_component_devices(raid_device.name) if not component_devices: # A "Software RAID device" without components is usually @@ -3190,74 +3419,101 @@ class GenericHardwareManager(HardwareManager): "partition %s", raid_device.name) continue holder_disks = get_holder_disks(raid_device.name) - - if do_not_delete: - LOG.warning("Software RAID device %(dev)s is not going to be " - "deleted as its volume name - %(vn)s - is on the " - "skip list", {'dev': raid_device.name, - 'vn': volume_name}) - else: - LOG.info("Deleting Software RAID device %s", raid_device.name) - LOG.debug('Found component devices %s', component_devices) - LOG.debug('Found holder disks %s', holder_disks) - - if not do_not_delete: - # Remove md devices. - try: - utils.execute('wipefs', '-af', raid_device.name) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to wipefs %(device)s: %(err)s', - {'device': raid_device.name, 'err': e}) - try: - utils.execute('mdadm', '--stop', raid_device.name) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to stop %(device)s: %(err)s', - {'device': raid_device.name, 'err': e}) - - # Remove md metadata from component devices. - for component_device in component_devices: - try: - utils.execute('mdadm', '--examine', - component_device, - use_standard_locale=True) - except processutils.ProcessExecutionError as e: - if "No md superblock detected" in str(e): - # actually not a component device - continue - else: - msg = "Failed to examine device {}: {}".format( - component_device, e) - raise errors.SoftwareRAIDError(msg) - - LOG.debug('Deleting md superblock on %s', component_device) - try: - utils.execute('mdadm', '--zero-superblock', - component_device) - except processutils.ProcessExecutionError as e: - LOG.warning('Failed to remove superblock from' - '%(device)s: %(err)s', - {'device': raid_device.name, 'err': e}) - if skip_list: - dev, part = utils.split_device_and_partition_number( - component_device) - if dev in delete_partitions: - delete_partitions[dev].append(part) - else: - delete_partitions[dev] = [part] - else: - for component_device in component_devices: - do_not_delete_devices.add(component_device) - # NOTE(arne_wiebalck): We cannot delete the partitions right # away since there may be other partitions on the same disks # which are members of other RAID devices. So we remember them # for later. all_holder_disks.extend(holder_disks) - if do_not_delete: + delete_raid_device = delete_raid_devices.get(raid_device.name) + if not delete_raid_device == 'delete': + for component_device in component_devices: + do_not_delete_devices.add(component_device) + if delete_raid_device == 'keep': + volume_name = volume_name_of_raid_devices[raid_device.name] + if volume_name == 'esp': + cause_volume_name = cause_of_not_deleting.get( + raid_device.name) + LOG.warning("EFI RAID device %(dev)s is not going " + "to be deleted because device %(cvn)s is " + "on the skip list and is present on the " + "same holder disk.", + {'dev': raid_device.name, + 'cvn': cause_volume_name}) + else: + LOG.warning("Software RAID device %(dev)s is not " + "going to be deleted as its volume name " + "- %(vn)s - is on the skip list", + {'dev': raid_device.name, + 'vn': volume_name}) + elif delete_raid_device == 'wipe': + cause_volume_name = cause_of_not_deleting.get( + raid_device.name) + LOG.warning("Software RAID device %(dev)s is not going " + "to be deleted because device %(cvn)s is on " + "the skip list and is present on the same " + "holder disk. The device will be wiped.", + {'dev': raid_device.name, + 'cvn': cause_volume_name}) + try: + utils.execute('wipefs', '-af', raid_device.name) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to wipefs %(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + else: + LOG.warning("Software raid device %(device)s has unknown " + "action %(action)s. Skipping.", + {'device': raid_device.name, + 'action': delete_raid_device}) + LOG.warning("Software RAID device %s was not deleted", raid_device.name) - else: - LOG.info('Deleted Software RAID device %s', raid_device.name) + continue + + LOG.info("Deleting Software RAID device %s", raid_device.name) + + # Remove md devices. + try: + utils.execute('wipefs', '-af', raid_device.name) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to wipefs %(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + try: + utils.execute('mdadm', '--stop', raid_device.name) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to stop %(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + + # Remove md metadata from component devices. + for component_device in component_devices: + try: + utils.execute('mdadm', '--examine', + component_device, + use_standard_locale=True) + except processutils.ProcessExecutionError as e: + if "No md superblock detected" in str(e): + # actually not a component device + continue + else: + msg = "Failed to examine device {}: {}".format( + component_device, e) + raise errors.SoftwareRAIDError(msg) + + LOG.debug('Deleting md superblock on %s', component_device) + try: + utils.execute('mdadm', '--zero-superblock', + component_device) + except processutils.ProcessExecutionError as e: + LOG.warning('Failed to remove superblock from' + '%(device)s: %(err)s', + {'device': raid_device.name, 'err': e}) + if skip_list: + dev, part = utils.split_device_and_partition_number( + component_device) + if dev in delete_partitions: + delete_partitions[dev].append(part) + else: + delete_partitions[dev] = [part] + LOG.info('Deleted Software RAID device %s', raid_device.name) # Remove all remaining raid traces from any drives, in case some # drives or partitions have been member of some raid once @@ -3313,7 +3569,7 @@ class GenericHardwareManager(HardwareManager): for holder_disk in all_holder_disks_uniq: if holder_disk in do_not_delete_disks: # Remove just partitions not listed in keep_partitions - del_list = delete_partitions[holder_disk] + del_list = delete_partitions.get(holder_disk) if del_list: LOG.warning('Holder disk %(dev)s contains logical disk ' 'on the skip list. Deleting just partitions: ' @@ -3322,7 +3578,7 @@ class GenericHardwareManager(HardwareManager): for part in del_list: utils.execute('parted', holder_disk, 'rm', part) else: - LOG.warning('Holder disk %(dev)s contains only logical ' + LOG.warning('Holder disk %s contains only logical ' 'disk(s) on the skip list', holder_disk) continue LOG.info('Removing partitions on holder disk %s', holder_disk) @@ -3334,6 +3590,9 @@ class GenericHardwareManager(HardwareManager): LOG.debug("Finished deleting Software RAID(s)") + # Return dictionary with deletion decisions for RAID devices + return delete_raid_devices + def validate_configuration(self, raid_config, node): """Validate a (software) RAID configuration @@ -3362,6 +3621,7 @@ class GenericHardwareManager(HardwareManager): "two logical disks") raid_errors.append(msg) + raid_skip_list = self.get_skip_list_from_node_for_raids(node) volume_names = [] # All disks need to be flagged for Software RAID for logical_disk in logical_disks: @@ -3371,7 +3631,13 @@ class GenericHardwareManager(HardwareManager): raid_errors.append(msg) volume_name = logical_disk.get('volume_name') - if volume_name is not None: + if volume_name is None: + if raid_skip_list: + msg = ("All logical disks are required to have a volume " + "name specified as a volume name is mentioned in " + "the property skip block devices.") + raid_errors.append(msg) + else: if volume_name in volume_names: msg = ("Duplicate software RAID device name %s " "detected" % volume_name) diff --git a/ironic_python_agent/raid_utils.py b/ironic_python_agent/raid_utils.py index 48867407a..31f340ff3 100644 --- a/ironic_python_agent/raid_utils.py +++ b/ironic_python_agent/raid_utils.py @@ -278,17 +278,22 @@ def get_next_free_raid_device(): raise errors.SoftwareRAIDError("No free md (RAID) devices are left") -def get_volume_name_of_raid_device(raid_device): +def get_volume_name_of_raid_device(raid_device, examine=False): """Get the volume name of a RAID device :param raid_device: A Software RAID block device name. + :param examine: Use --examine instead of --detail :returns: volume name of the device, or None """ if not raid_device: return None try: - out, _ = utils.execute('mdadm', '--detail', raid_device, - use_standard_locale=True) + if examine: + out, _ = utils.execute('mdadm', '--examine', raid_device, + use_standard_locale=True) + else: + out, _ = utils.execute('mdadm', '--detail', raid_device, + use_standard_locale=True) except processutils.ProcessExecutionError as e: LOG.warning('Could not retrieve the volume name of %(dev)s: %(err)s', {'dev': raid_device, 'err': e}) diff --git a/ironic_python_agent/tests/unit/samples/hardware_samples.py b/ironic_python_agent/tests/unit/samples/hardware_samples.py index 5a865871d..6205fdadd 100644 --- a/ironic_python_agent/tests/unit/samples/hardware_samples.py +++ b/ironic_python_agent/tests/unit/samples/hardware_samples.py @@ -1402,7 +1402,7 @@ MDADM_EXAMINE_OUTPUT_MEMBER = ("""/dev/sda1: Version : 1.2 Feature Map : 0x0 Array UUID : 83143055:2781ddf5:2c8f44c7:9b45d92e - Name : horse.cern.ch:1 (local to host abc.xyz.com) + Name : horse.cern.ch:this_name (local to host abc.xyz.com) Creation Time : Tue Jun 11 12:43:37 2019 Raid Level : raid1 Raid Devices : 2 diff --git a/ironic_python_agent/tests/unit/test_hardware.py b/ironic_python_agent/tests/unit/test_hardware.py index 3c3d8be97..7e656f107 100644 --- a/ironic_python_agent/tests/unit/test_hardware.py +++ b/ironic_python_agent/tests/unit/test_hardware.py @@ -1291,7 +1291,8 @@ class TestGenericHardwareManager(base.IronicAgentTest): list_mock.assert_called_once_with(all_serial_and_wwn=False) - def test_get_skip_list_from_node_block_devices_with_skip_list(self): + def test_get_skip_list_from_node_for_disks_block_devices_with_skip_list( + self): block_devices = [ hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), @@ -1305,12 +1306,12 @@ class TestGenericHardwareManager(base.IronicAgentTest): }] } - skip_list = self.hardware.get_skip_list_from_node(node, - block_devices) + skip_list = self.hardware.get_skip_list_from_node_for_disks( + node, block_devices) self.assertEqual(expected_skip_list, skip_list) - def test_get_skip_list_from_node_block_devices_just_raids(self): + def test_get_skip_list_from_node_for_raids_block_devices(self): expected_skip_list = {'large'} node = self.node @@ -1322,20 +1323,20 @@ class TestGenericHardwareManager(base.IronicAgentTest): }] } - skip_list = self.hardware.get_skip_list_from_node(node, - just_raids=True) + skip_list = self.hardware.get_skip_list_from_node_for_raids(node) self.assertEqual(expected_skip_list, skip_list) - def test_get_skip_list_from_node_block_devices_no_skip_list(self): + def test_get_skip_list_from_node_for_disks_block_devices_no_skip_list( + self): block_devices = [ hardware.BlockDevice('/dev/sdj', 'big', 1073741824, True), hardware.BlockDevice('/dev/hdaa', 'small', 65535, False), ] node = self.node - skip_list = self.hardware.get_skip_list_from_node(node, - block_devices) + skip_list = self.hardware.get_skip_list_from_node_for_disks( + node, block_devices) self.assertIsNone(skip_list) @@ -3625,6 +3626,180 @@ class TestGenericHardwareManager(base.IronicAgentTest): mocked_create.assert_called_once_with(self.hardware, self.node, [], raid_config) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__handle_raid_skip_list_partial_skip_list( + self, mocked_get_holder_disks, mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', + 2147483648, True) + raid_devices = [raid_device1, raid_device2] + skip_list = ['data'] + mocked_get_holder_disks.side_effect = [ + ["/dev/sda", "/dev/sdb"], + ["/dev/sda", "/dev/sdb"], + ["/dev/sda", "/dev/sdb"] + ] + mocked_get_volume_name.side_effect = [ + "root", "data" + ] + raid_skip_list_dict = self.hardware._handle_raid_skip_list( + raid_devices, skip_list) + delete_raid_devices = raid_skip_list_dict['delete_raid_devices'] + volume_name_of_raid_devices = raid_skip_list_dict[ + 'volume_name_of_raid_devices'] + cause_of_not_deleting = raid_skip_list_dict['cause_of_not_deleting'] + self.assertEqual(delete_raid_devices, + {'/dev/md0': 'wipe', '/dev/md1': 'keep'}) + self.assertEqual(volume_name_of_raid_devices, + {'/dev/md0': 'root', '/dev/md1': 'data'}) + self.assertEqual(cause_of_not_deleting, + {'/dev/md0': 'data'}) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__handle_raid_skip_list_complete_skip_list( + self, mocked_get_holder_disks, mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', + 2147483648, True) + raid_devices = [raid_device1, raid_device2] + skip_list = ['data1', 'data2'] + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_volume_name.side_effect = [ + 'data1', 'data2' + ] + raid_skip_list_dict = self.hardware._handle_raid_skip_list( + raid_devices, skip_list) + delete_raid_devices = raid_skip_list_dict['delete_raid_devices'] + volume_name_of_raid_devices = raid_skip_list_dict[ + 'volume_name_of_raid_devices'] + cause_of_not_deleting = raid_skip_list_dict['cause_of_not_deleting'] + self.assertEqual(delete_raid_devices, + {'/dev/md0': 'keep', '/dev/md1': 'keep'}) + self.assertEqual(volume_name_of_raid_devices, + {'/dev/md0': 'data1', '/dev/md1': 'data2'}) + # When evaluating /dev/md0, it is marked as the cause + self.assertEqual(cause_of_not_deleting, {'/dev/md1': 'data1'}) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__analyze_raid_device_device_on_skip_list( + self, mocked_get_holder_disks, mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + skip_list = ['data1', 'data2'] + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_volume_name.return_value = 'data1' + volume_name_of_raid_devices = {} + raid_devices_on_holder_disks = {} + volume_name_on_skip_list = {} + esp_part = self.hardware._analyze_raid_device( + raid_device1, skip_list, + raid_devices_on_holder_disks, + volume_name_on_skip_list, + volume_name_of_raid_devices) + self.assertIsNone(esp_part) + self.assertEqual(volume_name_of_raid_devices, + {'/dev/md0': 'data1'}) + self.assertEqual(raid_devices_on_holder_disks, + {'/dev/sda': ['/dev/md0'], '/dev/sdb': ['/dev/md0']}) + self.assertEqual(volume_name_on_skip_list, + {'/dev/md0': True}) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__analyze_raid_device_efi_device( + self, mocked_get_holder_disks, mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + skip_list = ['data1', 'data2'] + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_volume_name.return_value = 'esp' + volume_name_of_raid_devices = {} + raid_devices_on_holder_disks = {} + volume_name_on_skip_list = {} + esp_part = self.hardware._analyze_raid_device( + raid_device1, skip_list, + raid_devices_on_holder_disks, + volume_name_on_skip_list, + volume_name_of_raid_devices) + self.assertEqual(esp_part, raid_device1.name) + self.assertEqual(volume_name_of_raid_devices, + {'/dev/md0': 'esp'}) + self.assertEqual(raid_devices_on_holder_disks, + {'/dev/sda': ['/dev/md0'], '/dev/sdb': ['/dev/md0']}) + self.assertEqual(volume_name_on_skip_list, + {'/dev/md0': False}) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__analyze_raid_device_device_not_on_skip_list( + self, mocked_get_holder_disks, mocked_get_volume_name): + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + skip_list = ['data1', 'data2'] + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_volume_name.return_value = 'root' + volume_name_of_raid_devices = {} + raid_devices_on_holder_disks = {} + volume_name_on_skip_list = {} + esp_part = self.hardware._analyze_raid_device( + raid_device1, skip_list, + raid_devices_on_holder_disks, + volume_name_on_skip_list, + volume_name_of_raid_devices) + self.assertIsNone(esp_part) + self.assertEqual(volume_name_of_raid_devices, + {'/dev/md0': 'root'}) + self.assertEqual(raid_devices_on_holder_disks, + {'/dev/sda': ['/dev/md0'], '/dev/sdb': ['/dev/md0']}) + self.assertEqual(volume_name_on_skip_list, + {'/dev/md0': False}) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + def test__handle_raids_with_volume_name_on_skip_list( + self, mocked_get_holder_disks, mocked_get_volume_name): + hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-0', + 2147483648, True) + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'] + ] + cause_of_not_deleting = {} + delete_raid_devices = {'/dev/md0': 'delete', '/dev/md1': 'delete'} + raid_devices_on_holder_disks = {'/dev/sda': ['/dev/md0', '/dev/md1'], + '/dev/sdb': ['/dev/md0', '/dev/md1']} + volume_name_of_raid_devices = \ + {'/dev/md0': 'root', '/dev/md1': 'data'} + mocked_get_volume_name.return_value = 'data' + self.hardware._handle_raids_with_volume_name_on_skip_list( + raid_device2.name, delete_raid_devices, + cause_of_not_deleting, raid_devices_on_holder_disks, + volume_name_of_raid_devices) + self.assertEqual(cause_of_not_deleting, {'/dev/md0': 'data'}) + @mock.patch.object(raid_utils, '_get_actual_component_devices', autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True) @@ -4476,6 +4651,138 @@ class TestGenericHardwareManager(base.IronicAgentTest): '/dev/sdc', '/dev/sdd'] ]) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(disk_utils, 'list_partitions', autospec=True, + return_value=[]) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_configuration_with_different_disks_skip_list( + self, mocked_execute, mock_list_parts, mocked_get_components, + mocked_list_all_devices, mocked_get_holder_disks, + mocked_actual_comp, mocked_get_volume_name): + # The array on skip list is already present + # We expect to create just the one which is not on skip list, i.e. root + node = self.node + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "root", + "physical_disks": [{'name': '/dev/sda'}, + {'name': '/dev/sdb'}], + }, + { + "size_gb": "MAX", + "raid_level": "1", + "controller": "software", + "volume_name": "data", + "physical_disks": [{'name': '/dev/sdc'}, + {'name': '/dev/sdd'}], + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'data'}]} + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + device3 = hardware.BlockDevice('/dev/sdc', 'sdc', 107374182400, True) + device4 = hardware.BlockDevice('/dev/sdd', 'sdd', 107374182400, True) + raid_device1 = hardware.BlockDevice('/dev/md1', 'RAID-1', + 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [ + device1, + device2, + device3, + device4, + ] + hardware.list_all_block_devices.side_effect = [ + # Calls from _create_raid_ignore_list + [raid_device1], # block_type raid + [] # block type md + ] + mocked_get_volume_name.return_value = 'data' + mocked_get_holder_disks.return_value = ['/dev/sdc', '/dev/sdd'] + mocked_get_components.return_value = ['/dev/sdc1', '/dev/sdd1'] + + mocked_execute.side_effect = [ + None, # mklabel sda + ('42', None), # sgdisk -F sda + None, # mklabel sda + ('42', None), # sgdisk -F sdb + None, None, None, # parted + partx + udevadm_settle sda + None, None, None, # parted + partx + udevadm_settle sdb + None # mdadm create md0 + ] + + mocked_actual_comp.side_effect = [ + ('/dev/sda1', '/dev/sdb1'), + ('/dev/sdc1', '/dev/sdd1'), + ] + + result = self.hardware.create_configuration(node, []) + + mocked_execute.assert_has_calls([ + mock.call('parted', '/dev/sda', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sda'), + mock.call('parted', '/dev/sdb', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdb'), + mock.call('parted', '/dev/sda', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sda', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdb', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '10GiB'), + mock.call('partx', '-av', '/dev/sdb', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'root', + '--raid-devices', 2, '/dev/sda1', '/dev/sdb1')]) + self.assertEqual(raid_config, result) + + # disk_utils.list_partitions should not be called as the rm_from_list + # is not None + self.assertEqual(0, mock_list_parts.call_count) + + def test_create_configuration_with_skip_list_and_unnamed_raid(self): + node = self.node + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software" + }, + { + "size_gb": "MAX", + "raid_level": "1", + "controller": "software", + "volume_name": "data" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'data'}]} + + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + + self.assertRaises(errors.SoftwareRAIDError, + self.hardware.create_configuration, + self.node, []) + @mock.patch.object(utils, 'execute', autospec=True) @mock.patch.object(os.path, 'isdir', autospec=True, return_value=False) def test_create_configuration_invalid_raid_config(self, @@ -4893,12 +5200,15 @@ class TestGenericHardwareManager(base.IronicAgentTest): autospec=True) @mock.patch.object(raid_utils, '_get_actual_component_devices', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(disk_utils, 'list_partitions', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_with_skip_list( self, mocked_execute, mock_list_parts, mocked_list_all_devices, - mocked_actual_comp, mocked_get_volume_name): + mocked_get_components, mocked_get_holder_disks, mocked_actual_comp, + mocked_get_volume_name): node = self.node raid_config = { @@ -4931,8 +5241,10 @@ class TestGenericHardwareManager(base.IronicAgentTest): ] mocked_get_volume_name.return_value = "large" + mocked_get_holder_disks.return_value = ['/dev/sda', '/dev/sdb'] + mocked_get_components.return_value = ['/dev/sda1', '/dev/sdb1'] + mocked_execute.side_effect = [ - None, # examine md0 None, # mklabel sda ('42', None), # sgdisk -F sda None, # mklabel sda @@ -4951,8 +5263,6 @@ class TestGenericHardwareManager(base.IronicAgentTest): result = self.hardware.create_configuration(node, []) mocked_execute.assert_has_calls([ - mock.call('mdadm', '--examine', '/dev/md0', - use_standard_locale=True), mock.call('parted', '/dev/sda', '-s', '--', 'mklabel', 'msdos'), mock.call('sgdisk', '-F', '/dev/sda'), mock.call('parted', '/dev/sdb', '-s', '--', 'mklabel', 'msdos'), @@ -4974,12 +5284,277 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.assertEqual(0, mock_list_parts.call_count) + @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(disk_utils, 'list_partitions', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_configuration_with_skip_list_root_on_more_disks_than_data( + self, mocked_execute, mock_list_parts, mocked_list_all_devices, + mocked_get_components, mocked_get_holder_disks, + mocked_get_volume_name): + # Please note that this test case is solely for testing purposes + # and does not make sense in real deployments. + # There are two RAID arrays: root and data + # - root spans 100GB on all 4 disks present on the node + # - data spans the rest of 2 of those disks and is in skip list + # In this case, root should be only wiped, not deleted, so no array + # should be created + node = self.node + + raid_config = { + "logical_disks": [ + { + "size_gb": "100", + "raid_level": "1", + "controller": "software", + "volume_name": "root", + "physical_disks": [{'name': '/dev/sda'}, + {'name': '/dev/sdb'}, + {'name': '/dev/sdc'}, + {'name': '/dev/sdd'}], + }, + { + "size_gb": "MAX", + "raid_level": "1", + "controller": "software", + "volume_name": "data", + "physical_disks": [{'name': '/dev/sda'}, + {'name': '/dev/sdb'}], + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'data'}]} + device1 = hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + raid_device_root = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + raid_device_data = hardware.BlockDevice('/dev/md1', 'RAID-1', + 966367641600, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + hardware.list_all_block_devices.side_effect = [ + [raid_device_root, raid_device_data], # block_type raid + [] # block type md + ] + mocked_get_volume_name.side_effect = [ + 'root', 'data', # handle raid skip list + 'root', 'data' # create ignore list + ] + + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_components.side_effect = [ + ['/dev/sda1', '/dev/sdb1', '/dev/sdc1', '/dev/sdd1'], + ['/dev/sda2', '/dev/sdb2'] + ] + + result = self.hardware.create_configuration(node, []) + self.assertEqual(raid_config, result) + + self.assertEqual(0, mocked_execute.call_count) + self.assertEqual(0, mock_list_parts.call_count) + + @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_configuration_with_skip_list_multiple_raids_on_disk( + self, mocked_execute, mocked_get_volume_name, + mocked_get_component_devices, mocked_list_all_block_devices): + # We expect that both root and data to already be present, + # so we do not create any + node = self.node + raid_config = { + "logical_disks": [ + { + "size_gb": "10", + "raid_level": "1", + "controller": "software", + "volume_name": "root" + }, + { + "size_gb": "MAX", + "raid_level": "1", + "controller": "software", + "volume_name": "data" + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = {'skip_block_devices': [{'volume_name': 'data'}]} + + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + sda1 = hardware.BlockDevice('/dev/sda1', 'model12', 10737418240, True) + sdb1 = hardware.BlockDevice('/dev/sdb1', 'model12', 10737418240, True) + sda2 = hardware.BlockDevice('/dev/sda2', 'model12', 96636764160, True) + sdb2 = hardware.BlockDevice('/dev/sdb2', 'model12', 96636764160, True) + raid_device1 = hardware.BlockDevice('/dev/md0', 'RAID-1', + 10737418240, True) + raid_device2 = hardware.BlockDevice('/dev/md1', 'RAID-1', + 96636764160, True) + + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [device1, device2] + mocked_get_component_devices.side_effect = [ + [sda1, sdb1], + [sda2, sdb2] + ] + mocked_list_all_block_devices.side_effect = [ + [raid_device1, raid_device2], # block type raid + [], # block type md + ] + mocked_get_volume_name.side_effect = [ + 'root', 'data' # create ignore list + ] + + self.hardware._handle_raid_skip_list = mock.Mock() + self.hardware._handle_raid_skip_list.return_value = { + 'delete_raid_devices': {raid_device1.name: 'wipe', + raid_device2.name: 'keep'}, + 'volume_name_of_raid_devices': {raid_device1.name: 'root', + raid_device2.name: 'data'}, + 'cause_of_not_deleting': {raid_device1.name: 'data'} + } + mocked_execute.side_effect = [ + None, # Examine /dev/sda1 + None, # Examine /dev/sda2 + ] + + result = self.hardware.create_configuration(node, []) + + self.assertEqual(0, mocked_execute.call_count) + self.assertEqual(raid_config, result) + + @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(raid_utils, '_get_actual_component_devices', + autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_create_configuration_with_skip_list_on_separate_disks( + self, mocked_execute, mocked_get_actual_component_devices, + mocked_get_volume_name, mocked_get_component_devices, + mocked_list_all_block_devices): + # The array new_volume is on two disks + # The other array saved_data is on other disks and is already present + # We only expect to create new_volume + node = self.node + raid_config = { + "logical_disks": [ + { + "size_gb": "100", + "raid_level": "1", + "controller": "software", + "volume_name": "new_volume", + "physical_disks": [{'name': '/dev/sdc'}, + {'name': '/dev/sdd'}], + }, + { + "size_gb": "MAX", + "raid_level": "1", + "controller": "software", + "volume_name": "saved_data", + "physical_disks": [{'name': '/dev/sda'}, + {'name': '/dev/sdb'}], + }, + ] + } + node['target_raid_config'] = raid_config + node['properties'] = { + 'skip_block_devices': [{'volume_name': 'saved_data'}] + } + + device1 = hardware.BlockDevice('/dev/sda', 'sda', 107374182400, True) + device2 = hardware.BlockDevice('/dev/sdb', 'sdb', 107374182400, True) + device3 = hardware.BlockDevice('/dev/sdc', 'sdc', 107374182400, True) + device4 = hardware.BlockDevice('/dev/sdd', 'sdd', 107374182400, True) + sda1 = hardware.BlockDevice('/dev/sda1', 'model12', 107374182400, True) + sdb1 = hardware.BlockDevice('/dev/sdb1', 'model12', 107374182400, True) + raid_device1 = hardware.BlockDevice('/dev/md127', 'RAID-1', + 107374182400, True) + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [ + device1, device2, device3, device4 + ] + mocked_list_all_block_devices.side_effect = [ + # create ignore list + [raid_device1], # block type raid + [], # block type md + ] + mocked_get_component_devices.side_effect = [ + [sda1, sdb1] + ] + mocked_get_volume_name.side_effect = [ + 'saved_data' # create ignore list + ] + + self.hardware._handle_raid_skip_list = mock.Mock() + self.hardware._handle_raid_skip_list.return_value = { + 'delete_raid_devices': {raid_device1.name: 'keep'}, + 'volume_name_of_raid_devices': {raid_device1.name: 'saved_data'}, + 'cause_of_not_deleting': {raid_device1.name: 'saved_data'} + } + mocked_execute.side_effect = [ + None, # mklabel sdc + ('42', None), # sgdisk -F sdc + None, # mklabel sdd + ('42', None), # sgdisk -F sdd + None, None, None, # parted + partx + udevadm_settle sdc + None, None, None, # parted + partx + udevadm_settle sdd + None # mdadm --create /dev/md0 + ] + mocked_get_actual_component_devices.side_effect = [ + ('/dev/sdc1', '/dev/sdd1') + ] + + result = self.hardware.create_configuration(node, []) + + mocked_execute.assert_has_calls([ + mock.call('parted', '/dev/sdc', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdc'), + mock.call('parted', '/dev/sdd', '-s', '--', 'mklabel', 'msdos'), + mock.call('sgdisk', '-F', '/dev/sdd'), + mock.call('parted', '/dev/sdc', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '100GiB'), + mock.call('partx', '-av', '/dev/sdc', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('parted', '/dev/sdd', '-s', '-a', 'optimal', '--', + 'mkpart', 'primary', '42s', '100GiB'), + mock.call('partx', '-av', '/dev/sdd', attempts=3, + delay_on_retry=True), + mock.call('udevadm', 'settle'), + mock.call('mdadm', '--create', '/dev/md0', '--force', '--run', + '--metadata=1', '--level', '1', '--name', 'new_volume', + '--raid-devices', 2, '/dev/sdc1', '/dev/sdd1') + ]) + self.assertEqual(raid_config, result) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_skip_list_existing_device_does_not_match( self, mocked_execute, mocked_list_all_devices, + mocked_get_components, mocked_get_holder_disks, mocked_get_volume_name): node = self.node @@ -5013,6 +5588,9 @@ class TestGenericHardwareManager(base.IronicAgentTest): ] mocked_get_volume_name.return_value = "small" + mocked_get_holder_disks.return_value = ['/dev/sda', '/dev/sdb'] + mocked_get_components.return_value = ['/dev/sda1', '/dev/sdb1'] + error_regex = "Existing Software RAID device detected that should not" mocked_execute.side_effect = [ processutils.ProcessExecutionError] @@ -5020,8 +5598,7 @@ class TestGenericHardwareManager(base.IronicAgentTest): self.hardware.create_configuration, self.node, []) - mocked_execute.assert_called_once_with( - 'mdadm', '--examine', '/dev/md0', use_standard_locale=True) + self.assertEqual(0, mocked_execute.call_count) @mock.patch.object(utils, 'get_node_boot_mode', lambda node: 'bios') @mock.patch.object(raid_utils, '_get_actual_component_devices', @@ -5126,11 +5703,14 @@ class TestGenericHardwareManager(base.IronicAgentTest): autospec=True) @mock.patch.object(raid_utils, '_get_actual_component_devices', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @mock.patch.object(utils, 'execute', autospec=True) def test_create_configuration_with_complete_skip_list( - self, mocked_execute, mocked_ls_all_devs, - mocked_actual_comp, mocked_get_volume_name): + self, mocked_execute, mocked_ls_all_devs, mocked_get_components, + mocked_get_holder_disks, mocked_actual_comp, + mocked_get_volume_name): node = self.node raid_config = { @@ -5167,16 +5747,21 @@ class TestGenericHardwareManager(base.IronicAgentTest): mocked_get_volume_name.side_effect = [ "small", "large", + "small", + "large" ] + mocked_get_holder_disks.side_effect = [ + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb']] + mocked_get_components.side_effect = [ + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sda2', '/dev/sdb2']] self.hardware.create_configuration(node, []) - mocked_execute.assert_has_calls([ - mock.call('mdadm', '--examine', '/dev/md0', - use_standard_locale=True), - mock.call('mdadm', '--examine', '/dev/md1', - use_standard_locale=True), - ]) - self.assertEqual(2, mocked_execute.call_count) + self.assertEqual(0, mocked_execute.call_count) + self.assertEqual(0, mocked_get_components.call_count) @mock.patch.object(utils, 'execute', autospec=True) def test__get_md_uuid(self, mocked_execute): @@ -5380,12 +5965,10 @@ class TestGenericHardwareManager(base.IronicAgentTest): raid_device1_part1 = hardware.BlockDevice('/dev/md0p1', 'RAID-1', 1073741824, True) hardware.list_all_block_devices.side_effect = [ - [], # list_all_block_devices raid - [raid_device1_part1], # list_all_block_devices raid (md) + [raid_device1_part1], # list_all_block_devices raid + md [], # list_all_block_devices disks [], # list_all_block_devices parts - [], # list_all_block_devices raid - [], # list_all_block_devices raid (md) + [], # list_all_block_devices raid + md ] mocked_get_volume_name.return_value = None mocked_get_component.return_value = [] @@ -5411,16 +5994,13 @@ class TestGenericHardwareManager(base.IronicAgentTest): 107374182400, True) hardware.list_all_block_devices.side_effect = [ - [raid_device1], # list_all_block_devices raid - [], # list_all_block_devices raid (type md) + [raid_device1], # list_all_block_devices raid + md [], # list_all_block_devices disks [], # list_all_block_devices parts - [raid_device1], # list_all_block_devices raid - [], # list_all_block_devices raid (type md) + [raid_device1], # list_all_block_devices raid + md [], # list_all_block_devices disks [], # list_all_block_devices parts - [raid_device1], # list_all_block_devices raid - [], # list_all_block_devices raid (type md) + [raid_device1], # list_all_block_devices raid + md ] mocked_get_component.return_value = [] mocked_get_volume_name.return_value = "/dev/md0" @@ -5440,7 +6020,7 @@ class TestGenericHardwareManager(base.IronicAgentTest): @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', autospec=True) @mock.patch.object(hardware.GenericHardwareManager, - 'get_skip_list_from_node', autospec=True) + 'get_skip_list_from_node_for_raids', autospec=True) @mock.patch.object(hardware, 'get_holder_disks', autospec=True) @mock.patch.object(hardware, 'get_component_devices', autospec=True) @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) @@ -5466,17 +6046,18 @@ class TestGenericHardwareManager(base.IronicAgentTest): ] hardware.list_all_block_devices.side_effect = [ - [raid_device1, raid_device2], # list_all_block_devices raid - [], # list_all_block_devices raid (md) + [raid_device1, raid_device2], # list_all_block_devices raid + md [sda, sdb, sdc], # list_all_block_devices disks partitions, # list_all_block_devices parts - [], # list_all_block_devices raid - [], # list_all_block_devices raid (md) + [], # list_all_block_devices raid + md ] mocked_get_component.side_effect = [ ["/dev/sda1", "/dev/sdb1"], ["/dev/sda2", "/dev/sdb2"]] mocked_get_holder.side_effect = [ + ["/dev/sda", "/dev/sdb"], + ["/dev/sda", "/dev/sdb"], + ["/dev/sda", "/dev/sdb"], ["/dev/sda", "/dev/sdb"], ["/dev/sda", "/dev/sdb"]] mocked_get_volume_name.side_effect = [ @@ -5489,27 +6070,383 @@ class TestGenericHardwareManager(base.IronicAgentTest): mocked_execute.assert_has_calls([ mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), mock.call('wipefs', '-af', '/dev/md0'), - mock.call('mdadm', '--stop', '/dev/md0'), - mock.call('mdadm', '--examine', '/dev/sda1', - use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sda1'), - mock.call('mdadm', '--examine', '/dev/sdb1', - use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), - mock.call('mdadm', '--examine', '/dev/sda1', - use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sda1'), - mock.call('mdadm', '--examine', '/dev/sdb1', - use_standard_locale=True), - mock.call('mdadm', '--zero-superblock', '/dev/sdb1'), mock.call('mdadm', '--examine', '/dev/sdc', use_standard_locale=True), mock.call('mdadm', '--zero-superblock', '/dev/sdc'), - mock.call('parted', '/dev/sda', 'rm', '1'), - mock.call('parted', '/dev/sdb', 'rm', '1'), mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), ]) + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + 'get_skip_list_from_node_for_raids', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_delete_configuration_skip_list_root_on_more_disks_than_data( + self, mocked_execute, mocked_list, mocked_get_components, + mocked_get_holder, mocked_get_skip_list, mocked_get_volume_name): + # Please note that this test case is solely for testing purposes + # and does not make sense in real deployments. + # There are two RAID arrays: root and data + # - root spans 100GB on all 4 disks present on the node + # - data spans the rest of 2 of those disks and is in skip list + # The data should be untouched, the root should be wiped, + # but not deleted + sda = hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + sdb = hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + sdc = hardware.BlockDevice('/dev/sdc', 'sdc', 1073741824000, True) + sdd = hardware.BlockDevice('/dev/sdd', 'sdd', 1073741824000, True) + rd_root = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + rd_data = hardware.BlockDevice('/dev/md1', 'RAID-1', + 966367641600, True) + partitions = [ + hardware.BlockDevice('/dev/sda1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sdb1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sdc1', 'raid_member', 107374182400, + False), + hardware.BlockDevice('/dev/sdd1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sda2', 'raid-member', 966367641600, + False), + hardware.BlockDevice('/dev/sdb2', 'raid-member', 966367641600, + False), + ] + + mocked_list.side_effect = [ + [rd_root, rd_data], # list_all_block_devices raid + md + [sda, sdb, sdc, sdd], # list_all_block_devices disks + partitions, # list_all_block_devices parts + [rd_root, rd_data], # list_all_block_devices raid + md + ] + mocked_get_volume_name.side_effect = [ + 'root', 'data' + ] + + mocked_get_holder.side_effect = [ + ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'] + ] + mocked_get_components.side_effect = [ + ['/dev/sda1', '/dev/sdb1', '/dev/sdc1', '/dev/sdd1'], + ['/dev/sda2', '/dev/sdb2'], + ['/dev/sda1', '/dev/sdb1', '/dev/sdc1', '/dev/sdd1'], + ['/dev/sda2', '/dev/sdb2'], + ] + mocked_get_skip_list.return_value = set({'data'}) + + self.hardware.delete_configuration(self.node, []) + mocked_list.assert_has_calls([ + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + mock.call(all_serial_and_wwn=False), + mock.call(block_type='part', ignore_raid=True), + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + ]) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('wipefs', '-af', '/dev/md0'), # wipe root + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False)]) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + 'get_skip_list_from_node_for_raids', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_delete_configuration_skip_list_uefi( + self, mocked_execute, mocked_list, mocked_get_components, + mocked_get_holder, mocked_get_skip_list, mocked_get_volume_name): + # Raid arrays: root, data, and esp (EFI partition) span two disks + # Data is on the skip list, so none of the partitions gets deleted + # Root gets wiped, esp stays to keep the filesystem + sda = hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + sdb = hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + rd_root = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + rd_data = hardware.BlockDevice('/dev/md1', 'RAID-1', + 966367641088, True) + rd_esp = hardware.BlockDevice('/dev/md2', 'RAID-1', + 512, True) + partitions = [ + hardware.BlockDevice('/dev/sda1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sdb1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sda2', 'raid-member', 9663676416088, + False), + hardware.BlockDevice('/dev/sdb2', 'raid-member', 966367641088, + False), + hardware.BlockDevice('/dev/sda3', 'raid-member', 512, False), + hardware.BlockDevice('/dev/sdb3', 'raid-member', 512, False), + ] + + mocked_list.side_effect = [ + [rd_root, rd_data, rd_esp], # list_all_block_devices raid + md + [sda, sdb], # list_all_block_devices disks + partitions, # list_all_block_devices parts + [rd_root, rd_data, rd_esp], # list_all_block_devices raid + md + ] + mocked_get_volume_name.side_effect = [ + 'root', 'data', 'esp' + ] + + mocked_get_holder.side_effect = [ + ['/dev/sda', '/dev/sdb'], # md0 handle raid skip list + ['/dev/sda', '/dev/sdb'], # md1 handle raid skip list + ['/dev/sda', '/dev/sdb'], # md2 handle raid skip list + ['/dev/sda', '/dev/sdb'], # data handle raid skip list + ['/dev/sda', '/dev/sdb'], # md0 delete config pass + ['/dev/sda', '/dev/sdb'], # md1 delete config pass + ['/dev/sda', '/dev/sdb'], # md2 delete config pass + ] + mocked_get_components.side_effect = [ + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sda2', '/dev/sdb2'], + ['/dev/sda3', '/dev/sdb3'], + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sda2', '/dev/sdb2'], + ['/dev/sda3', '/dev/sdb3'], + ] + mocked_get_skip_list.return_value = set({'data'}) + + self.hardware.delete_configuration(self.node, []) + mocked_list.assert_has_calls([ + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + mock.call(all_serial_and_wwn=False), + mock.call(block_type='part', ignore_raid=True), + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + ]) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('wipefs', '-af', '/dev/md0'), # wipe root + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False)]) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + 'get_skip_list_from_node_for_raids', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_delete_configuration_skip_list_root_weirdly_placed_with_data( + self, mocked_execute, mocked_list, mocked_get_components, + mocked_get_holder, mocked_get_skip_list, mocked_get_volume_name): + # Please note that this test case is solely for testing purposes + # and does not make sense in real deployments. + # There are two RAID arrays: root and data + # - root spans 100GB on 2 disks present on the node + # - data spans the rest of 1 of those disks and most of a third disk + # and it is in skip list + # The data should be untouched, the root should be wiped, + # but not deleted + sda = hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + sdb = hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + sdc = hardware.BlockDevice('/dev/sdc', 'sdc', 1073741824000, True) + rd_root = hardware.BlockDevice('/dev/md0', 'RAID-1', + 107374182400, True) + rd_data = hardware.BlockDevice('/dev/md1', 'RAID-1', + 966367641600, True) + partitions = [ + hardware.BlockDevice('/dev/sda1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sdb1', 'raid-member', 107374182400, + False), + hardware.BlockDevice('/dev/sdb2', 'raid-member', 966367641600, + False), + hardware.BlockDevice('/dev/sdc1', 'raid_member', 966367641600, + False), + ] + + mocked_list.side_effect = [ + [rd_root, rd_data], # list_all_block_devices raid + md + [sda, sdb, sdc], # list_all_block_devices disks + partitions, # list_all_block_devices parts + [rd_root, rd_data], # list_all_block_devices raid + md + ] + mocked_get_volume_name.side_effect = [ + 'root', 'data' + ] + + mocked_get_holder.side_effect = [ + ['/dev/sda', '/dev/sdb'], + ['/dev/sdb', '/dev/sdc'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sdb', '/dev/sdc'], + ['/dev/sda', '/dev/sdb'], + ['/dev/sdb', '/dev/sdc'], + ] + mocked_get_components.side_effect = [ + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sdb2', '/dev/sdc1'], + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sdb2', '/dev/sdc1'], + ] + mocked_get_skip_list.return_value = set({'data'}) + + self.hardware.delete_configuration(self.node, []) + mocked_list.assert_has_calls([ + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + mock.call(all_serial_and_wwn=False), + mock.call(block_type='part', ignore_raid=True), + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + ]) + mocked_execute.assert_has_calls([ + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + mock.call('wipefs', '-af', '/dev/md0'), # wipe root + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False)]) + + @mock.patch.object(raid_utils, 'get_volume_name_of_raid_device', + autospec=True) + @mock.patch.object(hardware.GenericHardwareManager, + 'get_skip_list_from_node_for_raids', autospec=True) + @mock.patch.object(hardware, 'get_holder_disks', autospec=True) + @mock.patch.object(hardware, 'get_component_devices', autospec=True) + @mock.patch.object(hardware, 'list_all_block_devices', autospec=True) + @mock.patch.object(utils, 'execute', autospec=True) + def test_delete_configuration_skip_list_separated_raid_volumes_with_uefi( + self, mocked_execute, mocked_list_all_block_devices, + mocked_get_components, mocked_get_holder, mocked_get_skip_list, + mocked_get_volume_name): + # Three raid volumes: 'root', 'data' and 'esp' (EFI System Partition). + # 'data' is a raid1 on sda and sdb (it should not be deleted) + # 'root' and 'esp' are both raid1 and they share sdc and sdd + # both 'root' and 'esp' should be deleted + sda = hardware.BlockDevice('/dev/sda', 'sda', 1073741824000, True) + sdb = hardware.BlockDevice('/dev/sdb', 'sdb', 1073741824000, True) + sdc = hardware.BlockDevice('/dev/sdc', 'sdc', 1073741824000, True) + sdd = hardware.BlockDevice('/dev/sdd', 'sdd', 1073741824000, True) + rd_data = hardware.BlockDevice('/dev/md0', 'RAID-1', + 1073741824000, True) + rd_root = hardware.BlockDevice('/dev/md1', 'RAID-1', + 1073741823488, True) + rd_esp = hardware.BlockDevice('/dev/md2', 'RAID-1', + 512, True) + partitions = [ + hardware.BlockDevice('/dev/sda1', 'raid-member', 1073741824000, + False), + hardware.BlockDevice('/dev/sdb1', 'raid-member', 1073741824000, + False), + hardware.BlockDevice('/dev/sdc1', 'raid-member', 1073741823488, + False), + hardware.BlockDevice('/dev/sdd1', 'raid-member', 1073741823488, + False), + hardware.BlockDevice('/dev/sdc2', 'raid-member', 512, + False), + hardware.BlockDevice('/dev/sdd2', 'raid-member', 512, + False) + ] + self.hardware.list_block_devices = mock.Mock() + self.hardware.list_block_devices.return_value = [ + sda, sdb, sdc, sdd, partitions[0], partitions[1], # disk + part + partitions[2], partitions[3], partitions[4], partitions[5] + ] + + mocked_list_all_block_devices.side_effect = [ + # scan_raids - before deletion + [rd_data, rd_root, rd_esp], # block type raid + md + # scan_raids - after deletion + [rd_data], # block type raid + md + ] + + mocked_get_volume_name.side_effect = [ + 'data', 'root', 'esp' + ] + + mocked_get_holder.side_effect = [ + # _handle_skip_raid_devices + ['/dev/sda', '/dev/sdb'], + ['/dev/sdc', '/dev/sdd'], + ['/dev/sdc', '/dev/sdd'], + ['/dev/sda', '/dev/sdb'], + # _delete_config_pass + ['/dev/sda', '/dev/sdb'], + ['/dev/sdc', '/dev/sdd'], + ['/dev/sdc', '/dev/sdd'], + + ] + mocked_get_components.side_effect = [ + # _delete_config_pass + ['/dev/sda1', '/dev/sdb1'], + ['/dev/sdc1', '/dev/sdd1'], + ['/dev/sdc2', '/dev/sdd2'] + + ] + mocked_get_skip_list.return_value = set({'data'}) + + self.hardware.delete_configuration(self.node, []) + + assert mocked_list_all_block_devices.call_count == 2 + mocked_list_all_block_devices.assert_has_calls([ + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + mock.call(block_type=['raid', 'md'], ignore_raid=False, + ignore_empty=False), + ]) + mocked_execute.assert_has_calls([ + # Scan raids + mock.call('mdadm', '--assemble', '--scan', check_exit_code=False), + # Delete root + mock.call('wipefs', '-af', '/dev/md1'), + mock.call('mdadm', '--stop', '/dev/md1'), + mock.call('mdadm', '--examine', '/dev/sdc1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc1'), + mock.call('mdadm', '--examine', '/dev/sdd1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdd1'), + # Delete esp + mock.call('wipefs', '-af', '/dev/md2'), + mock.call('mdadm', '--stop', '/dev/md2'), + mock.call('mdadm', '--examine', '/dev/sdc2', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc2'), + mock.call('mdadm', '--examine', '/dev/sdd2', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdd2'), + # Remove remaining raid traces from disks + mock.call('mdadm', '--examine', '/dev/sdd2', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdd2'), + mock.call('mdadm', '--examine', '/dev/sdc2', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc2'), + mock.call('mdadm', '--examine', '/dev/sdd1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdd1'), + mock.call('mdadm', '--examine', '/dev/sdc1', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc1'), + mock.call('mdadm', '--examine', '/dev/sdd', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdd'), + mock.call('mdadm', '--examine', '/dev/sdc', + use_standard_locale=True), + mock.call('mdadm', '--zero-superblock', '/dev/sdc'), + # Wipe fs on non-saved disks + mock.call('wipefs', '-af', '/dev/sdc'), + mock.call('wipefs', '-af', '/dev/sdd'), + # Scan raids + mock.call('mdadm', '--assemble', '--scan', + check_exit_code=False) + ]) + @mock.patch.object(utils, 'execute', autospec=True) def test_validate_configuration_valid_raid1(self, mocked_execute): raid_config = { diff --git a/ironic_python_agent/tests/unit/test_raid_utils.py b/ironic_python_agent/tests/unit/test_raid_utils.py index c1341960f..3825783e8 100644 --- a/ironic_python_agent/tests/unit/test_raid_utils.py +++ b/ironic_python_agent/tests/unit/test_raid_utils.py @@ -175,6 +175,13 @@ class TestRaidUtils(base.IronicAgentTest): volume_name = raid_utils.get_volume_name_of_raid_device('/dev/md0') self.assertIsNone(volume_name) + @mock.patch.object(utils, 'execute', autospec=True) + def test_get_volume_name_of_raid_device_examine(self, mock_execute): + mock_execute.side_effect = [(hws.MDADM_EXAMINE_OUTPUT_MEMBER, '')] + volume_name = raid_utils.get_volume_name_of_raid_device( + '/dev/sda1', examine=True) + self.assertEqual("this_name", volume_name) + @mock.patch.object(raid_utils, 'find_esp_raid', autospec=True) @mock.patch.object(disk_utils, 'trigger_device_rescan', autospec=True) @mock.patch.object(raid_utils, 'get_next_free_raid_device', autospec=True, diff --git a/releasenotes/notes/safeguards_raid_fix-6521157d95aed18f.yaml b/releasenotes/notes/safeguards_raid_fix-6521157d95aed18f.yaml new file mode 100644 index 000000000..f4c75cdda --- /dev/null +++ b/releasenotes/notes/safeguards_raid_fix-6521157d95aed18f.yaml @@ -0,0 +1,12 @@ +--- +deprecations: + - | + If a volume_name is present in the skip_block_devices property, + the function validate_confiuguration will now fail unless + all logical disks in the target_raid_config have a volume_name + specified. +fixes: + - | + The skip_block_devices property now supports RAIDs, including scenarios + with multiple arrays on the same physical disk. This included 6 smaller + bugs that have all been addressed.

AltStyle によって変換されたページ (->オリジナル) /