From fac1a4d9de977cbc5579c51ff8c21f587ced8e22 Mon Sep 17 00:00:00 2001 From: Sean Mooney Date: 2024年5月17日 18:54:55 +0100 Subject: [PATCH] add functional repoducer for bug 2048837 Change-Id: I8ce3044cff198209416d2a458317f01d1177e9da Signed-off-by: Sean Mooney --- nova/tests/functional/integrated_helpers.py | 11 ++ .../regressions/test_bug_2048837.py | 126 ++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_2048837.py diff --git a/nova/tests/functional/integrated_helpers.py b/nova/tests/functional/integrated_helpers.py index fcaab93b730b..50899e45d543 100644 --- a/nova/tests/functional/integrated_helpers.py +++ b/nova/tests/functional/integrated_helpers.py @@ -581,6 +581,17 @@ class InstanceHelperMixin: self.notifier.wait_for_versioned_notifications( 'instance.share_detach.error') + def _attach_volume(self, server, volume_id): + """attach a cinder volume to a server.""" + attachment = self.api.post_server_volume( + server['id'], + {'volumeAttachment': {'volumeId': volume_id}} + ) + self._wait_for_volume_attach(server['id'], volume_id) + self.notifier.wait_for_versioned_notifications( + 'instance.volume_attach.end') + return attachment + def _rebuild_server(self, server, image_uuid, expected_state='ACTIVE'): """Rebuild a server.""" self.api.post_server_action( diff --git a/nova/tests/functional/regressions/test_bug_2048837.py b/nova/tests/functional/regressions/test_bug_2048837.py new file mode 100644 index 000000000000..09c75ae39846 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_2048837.py @@ -0,0 +1,126 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import fixtures +import threading +import time + +from unittest import mock + +from nova import context as nova_context +from nova import objects +from nova.tests.functional.libvirt import base +from nova.virt import libvirt + + +class TestConcurrentMultiAttachCleanup(base.ServersTestBase): + """Regression test for bug 2048837. + + This regression test aims to assert that if a multi attached volume is + attached to two vms on the same host and both are deleted the volume is + correctly cleaned up. + + Nova historically did not guard the critical section where concurrent + delete determined if they were the last user of a the host mounted + multi attach volume. As a result its possible for both delete to believe + the other would call disconnect and the volume can be leaked. + + see https://bugs.launchpad.net/nova/+bug/2048837 for details + + """ + + microversion = 'latest' + CAST_AS_CALL = False + REQUIRES_LOCKING = True + + def setUp(self): + super().setUp() + self._orgi_should_disconnect = ( + libvirt.LibvirtDriver._should_disconnect_target) + self.should_disconnect_mock = self.useFixture(fixtures.MockPatch( + 'nova.virt.libvirt.LibvirtDriver._should_disconnect_target', + mock.Mock(side_effect=self._should_disconnect))).mock + self.disconnect_volume_mock = self.useFixture(fixtures.MockPatch( + 'nova.virt.libvirt.volume.volume.' + 'LibvirtFakeVolumeDriver.disconnect_volume', + mock.Mock())).mock + self.hostname = self.start_compute() + self.compute_manager = self.computes[self.hostname] + + self.volume_id = self.cinder.MULTIATTACH_VOL + # Launch a server and attach a volume + self.server_a = self._create_server(networks='none') + self.notifier.wait_for_versioned_notifications('instance.create.end') + self._attach_volume(self.server_a, self.volume_id) + + # Launch a second server and attempt to attach the same volume again + self.server_b = self._create_server(networks='none') + self.notifier.wait_for_versioned_notifications('instance.create.end') + self._attach_volume(self.server_b, self.volume_id) + self.lock = threading.Lock() + # run periodics to allow async tasks to complete + self._run_periodics() + + def _should_disconnect(self, *args, **kwargs): + with self.lock: + result = self._orgi_should_disconnect( + self.compute_manager.driver, *args, **kwargs) + return result + + def test_serial_server_delete(self): + # Now that we have 2 vms both using the same multi attach volume + # we can delete the volumes serial and confirm that we are cleaning up + self.should_disconnect_mock.assert_not_called() + self.disconnect_volume_mock.assert_not_called() + + self._delete_server(self.server_a) + self.should_disconnect_mock.assert_called() + self.disconnect_volume_mock.assert_not_called() + self._delete_server(self.server_b) + self.disconnect_volume_mock.assert_called() + + def test_concurrent_server_delete(self): + # Now that we have 2 vms both using the same multi attach volume + # we can delete the volumes concurrently and confirm that we are + # cleaning up + self.should_disconnect_mock.assert_not_called() + self.disconnect_volume_mock.assert_not_called() + # emulate concurrent delete + context = nova_context.get_admin_context() + servers_this_host = objects.InstanceList.get_uuids_by_host( + context, self.hostname) + with mock.patch('nova.objects.InstanceList.get_uuids_by_host', + return_value=servers_this_host): + self.lock.acquire() + self.api.delete_server(self.server_a['id']) + self.api.delete_server(self.server_b['id']) + self.disconnect_volume_mock.assert_not_called() + # this mostly stabilizes the test but it may not be 100% reliable. + # locally with time.sleep(1) it passed 42 back to back executions + # im not sure why this is required given the lock but it is likely + # due to a the lock being released before a background task is + # completed. i.e. the conductor or resource trakcer updating state. + time.sleep(1) + self.lock.release() + self._wait_until_deleted(self.server_a) + self._wait_until_deleted(self.server_b) + self.should_disconnect_mock.assert_called() + # Fixme(sean-k-mooney): this is bug 2048837 + try: + self.disconnect_volume_mock.assert_not_called() + except AssertionError: + # NOTE(sean-k-mooney): this reproducer is not 100% + # reliable so we convert a failure to a skip to avoid + # gating issues. the bug is addressed in the follow up patch + # and that test is stable so it is not worth fixing this test + # beyond the time.sleep(1) above. + self.skipTest("Bug 2048837: volume disconnect not called")

AltStyle によって変換されたページ (->オリジナル) /