Invalidate PCI-in-placement cached RPs during claim

Code Issues Proposed changes

This makes us invalidate our cache of the PCI-in-placement resource
providers when we go to do instance_claim(). This is not technically
required right now, but is setup for the next patch where we will
update that inventory during claim and we need to make sure we are
working with the latest version. Without this, we may consider a
cached version of the inventory to be the same as the proposed one,
and thus not actually update placement when we need to. Since PCI-in-
placement was designed to tolerate external changes to the inventory
(especially/explicitly changing the reserved count), we need to be
careful not to allow our cache to prevent us from taking the action
we intend.
Related to blueprint one-time-use-devices
Change-Id: I89039328af7a2d2e6a4128dd08dbe8e97ecb16cd

This commit is contained in:

Dan Smith

2025年03月12日 11:43:24 -07:00

parent ba00d60b95

commit c5efabbd07

2 changed files with 75 additions and 2 deletions

Show all changes Ignore whitespace when comparing lines Ignore changes in amount of whitespace Ignore changes in whitespace at EOL

Download Patch File Download Diff File Expand all files Collapse all files

30
nova/compute/resource_tracker.py

View File

@@ -138,6 +138,26 @@ class ResourceTracker(object):

'not match host')

self.service_ref = service_ref

def _invalidate_pci_in_placement_cached_rps(self, allocs):

"""Invalidate cache for PCI-in-placement providers.

This invalidates the local cached copy of any provider for which an

allocation of a PCI-in-placement device exists. We do this in case the

reserved count has been modified externally to make sure we see it.

"""

if not allocs:

return

for rp, rp_allocs in allocs.items():

try:

p_data = self.provider_tree.data(rp)

except ValueError:

# Not all allocations for an instance are necessarily against

# a provider in our tree

continue

if os_traits.COMPUTE_MANAGED_PCI_DEVICE in p_data.traits:

self.reportclient.invalidate_resource_provider(

rp, cacheonly=True)

@utils.synchronized(COMPUTE_RESOURCE_SEMAPHORE, fair=True)

def instance_claim(self, context, instance, nodename, allocations,

limits=None):

@@ -207,6 +227,11 @@ class ResourceTracker(object):

claimed_resources = self._claim_resources(allocations)

instance.resources = claimed_resources

# In case we have any allocations for PCI-in-placement devices, be

# sure to invalidate our cache of those providers before we run

# _update() below (which does the PCI-in-placement sync).

self._invalidate_pci_in_placement_cached_rps(allocations)

# Mark resources in-use and update stats

self._update_usage_from_instance(context, instance, nodename)

@@ -370,6 +395,11 @@ class ResourceTracker(object):

instance.migration_context = mig_context

instance.save()

# In case we have any allocations for PCI-in-placement devices, be

# sure to invalidate our cache of those providers before we run

# _update() below (which does the PCI-in-placement sync).

self._invalidate_pci_in_placement_cached_rps(allocations)

# Mark the resources in-use for the resize landing on this

# compute host:

self._update_usage_from_migration(context, instance, migration,

47
nova/tests/unit/compute/test_resource_tracker.py

View File

@@ -3424,9 +3424,12 @@ class TestLiveMigration(BaseTestCase):

mock.patch.object(objects.Instance, 'save'),

mock.patch.object(self.rt, '_update'),

mock.patch.object(self.rt.pci_tracker, 'claim_instance'),

mock.patch.object(self.rt, '_update_usage_from_migration')

mock.patch.object(self.rt, '_update_usage_from_migration'),

mock.patch.object(self.rt,

'_invalidate_pci_in_placement_cached_rps')

) as (mock_from_instance, mock_migration_save, mock_instance_save,

mock_update, mock_pci_claim_instance, mock_update_usage):

mock_update, mock_pci_claim_instance, mock_update_usage,

mock_invalidate):

claim = self.rt.live_migration_claim(ctxt, instance, _NODENAME,

migration, limits=None,

allocs=None)

@@ -3441,6 +3444,7 @@ class TestLiveMigration(BaseTestCase):

mock_pci_claim_instance.assert_not_called()

mock_update_usage.assert_called_with(ctxt, instance, migration,

_NODENAME)

mock_invalidate.assert_called()

class TestUpdateUsageFromMigration(test.NoDBTestCase):

@@ -4350,6 +4354,45 @@ class ResourceTrackerTestCase(test.NoDBTestCase):

self.assertRaises(AssertionError, _test_explict_unfair)

self.assertRaises(AssertionError, _test_implicit_unfair)

def test_invalidate_pci_in_placement(self):

client = mock.MagicMock()

rt = resource_tracker.ResourceTracker(

_HOSTNAME, mock.sentinel.driver, client)

# NOTE(danms): We want to cover the case where we have allocations

# against a compute node, a PCI-in-placement child, and another

# provider outside that tree. Further, we use a slightly-unrealistic

# PCI-in-placement allocation with multiple RCs just to make sure

# that we call invalidate once per *provider* and not once per

# RC.

fake_allocs = {

str(uuids.compute): {

'resources': {

'VCPU': 1,

'MEMORY_MB': 512,

},

str(uuids.othercompute): {

'resources': {

'VCPU': 1,

}

},

str(uuids.pci): {

'resources': {

'CUSTOM_PCI_DEAD_BEEF': 1,

'SOME_OTHER_TRAIT': 1,

}

with mock.patch.object(rt, 'provider_tree') as mock_pt:

mock_pt.data.side_effect = [

mock.MagicMock(traits=[]),

ValueError,

mock.MagicMock(traits=['COMPUTE_MANAGED_PCI_DEVICE']),

]

rt._invalidate_pci_in_placement_cached_rps(fake_allocs)

client.invalidate_resource_provider.assert_called_once_with(

uuids.pci, cacheonly=True)

def test_set_service_ref(self):

rt = resource_tracker.ResourceTracker(

_HOSTNAME, mock.sentinel.driver, mock.sentinel.reportclient)