add functional repoducer for bug 2048837
Change-Id: I8ce3044cff198209416d2a458317f01d1177e9da Signed-off-by: Sean Mooney <work@seanmooney.info>
This commit is contained in:
2 changed files with 137 additions and 0 deletions
@@ -581,6 +581,17 @@ class InstanceHelperMixin:
self.notifier.wait_for_versioned_notifications(
'instance.share_detach.error')
def _attach_volume(self, server, volume_id):
"""attach a cinder volume to a server."""
attachment = self.api.post_server_volume(
server['id'],
{'volumeAttachment': {'volumeId': volume_id}}
)
self._wait_for_volume_attach(server['id'], volume_id)
self.notifier.wait_for_versioned_notifications(
'instance.volume_attach.end')
return attachment
def _rebuild_server(self, server, image_uuid, expected_state='ACTIVE'):
"""Rebuild a server."""
self.api.post_server_action(
126
nova/tests/functional/regressions/test_bug_2048837.py
Normal file
126
nova/tests/functional/regressions/test_bug_2048837.py
Normal file
@@ -0,0 +1,126 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import fixtures
import threading
import time
from unittest import mock
from nova import context as nova_context
from nova import objects
from nova.tests.functional.libvirt import base
from nova.virt import libvirt
class TestConcurrentMultiAttachCleanup(base.ServersTestBase):
"""Regression test for bug 2048837.
This regression test aims to assert that if a multi attached volume is
attached to two vms on the same host and both are deleted the volume is
correctly cleaned up.
Nova historically did not guard the critical section where concurrent
delete determined if they were the last user of a the host mounted
multi attach volume. As a result its possible for both delete to believe
the other would call disconnect and the volume can be leaked.
see https://bugs.launchpad.net/nova/+bug/2048837 for details
"""
microversion = 'latest'
CAST_AS_CALL = False
REQUIRES_LOCKING = True
def setUp(self):
super().setUp()
self._orgi_should_disconnect = (
libvirt.LibvirtDriver._should_disconnect_target)
self.should_disconnect_mock = self.useFixture(fixtures.MockPatch(
'nova.virt.libvirt.LibvirtDriver._should_disconnect_target',
mock.Mock(side_effect=self._should_disconnect))).mock
self.disconnect_volume_mock = self.useFixture(fixtures.MockPatch(
'nova.virt.libvirt.volume.volume.'
'LibvirtFakeVolumeDriver.disconnect_volume',
mock.Mock())).mock
self.hostname = self.start_compute()
self.compute_manager = self.computes[self.hostname]
self.volume_id = self.cinder.MULTIATTACH_VOL
# Launch a server and attach a volume
self.server_a = self._create_server(networks='none')
self.notifier.wait_for_versioned_notifications('instance.create.end')
self._attach_volume(self.server_a, self.volume_id)
# Launch a second server and attempt to attach the same volume again
self.server_b = self._create_server(networks='none')
self.notifier.wait_for_versioned_notifications('instance.create.end')
self._attach_volume(self.server_b, self.volume_id)
self.lock = threading.Lock()
# run periodics to allow async tasks to complete
self._run_periodics()
def _should_disconnect(self, *args, **kwargs):
with self.lock:
result = self._orgi_should_disconnect(
self.compute_manager.driver, *args, **kwargs)
return result
def test_serial_server_delete(self):
# Now that we have 2 vms both using the same multi attach volume
# we can delete the volumes serial and confirm that we are cleaning up
self.should_disconnect_mock.assert_not_called()
self.disconnect_volume_mock.assert_not_called()
self._delete_server(self.server_a)
self.should_disconnect_mock.assert_called()
self.disconnect_volume_mock.assert_not_called()
self._delete_server(self.server_b)
self.disconnect_volume_mock.assert_called()
def test_concurrent_server_delete(self):
# Now that we have 2 vms both using the same multi attach volume
# we can delete the volumes concurrently and confirm that we are
# cleaning up
self.should_disconnect_mock.assert_not_called()
self.disconnect_volume_mock.assert_not_called()
# emulate concurrent delete
context = nova_context.get_admin_context()
servers_this_host = objects.InstanceList.get_uuids_by_host(
context, self.hostname)
with mock.patch('nova.objects.InstanceList.get_uuids_by_host',
return_value=servers_this_host):
self.lock.acquire()
self.api.delete_server(self.server_a['id'])
self.api.delete_server(self.server_b['id'])
self.disconnect_volume_mock.assert_not_called()
# this mostly stabilizes the test but it may not be 100% reliable.
# locally with time.sleep(1) it passed 42 back to back executions
# im not sure why this is required given the lock but it is likely
# due to a the lock being released before a background task is
# completed. i.e. the conductor or resource trakcer updating state.
time.sleep(1)
self.lock.release()
self._wait_until_deleted(self.server_a)
self._wait_until_deleted(self.server_b)
self.should_disconnect_mock.assert_called()
# Fixme(sean-k-mooney): this is bug 2048837
try:
self.disconnect_volume_mock.assert_not_called()
except AssertionError:
# NOTE(sean-k-mooney): this reproducer is not 100%
# reliable so we convert a failure to a skip to avoid
# gating issues. the bug is addressed in the follow up patch
# and that test is stable so it is not worth fixing this test
# beyond the time.sleep(1) above.
self.skipTest("Bug 2048837: volume disconnect not called")
Reference in New Issue
Block a user
Blocking a user prevents them from interacting with repositories, such as opening or commenting on pull requests or issues. Learn more about blocking a user.