From c0bc2d09f3b61f0fb2a4ff901f072a960a0dbcbd Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 11 Mar 2025 18:34:32 +0100 Subject: [PATCH 1/7] Add functional reproducer for bug 2102038 Related-Bug: #2102038 Change-Id: Ia950fc16ac5bb890baaa826580e7cb05d64831df (cherry picked from commit ebf5aca10109398a9006e54eca8d38b600d47b78) --- .../regressions/test_bug_2102038.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 nova/tests/functional/regressions/test_bug_2102038.py diff --git a/nova/tests/functional/regressions/test_bug_2102038.py b/nova/tests/functional/regressions/test_bug_2102038.py new file mode 100644 index 00000000000..e867e27c6f7 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_2102038.py @@ -0,0 +1,55 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.tests.fixtures import libvirt as fakelibvirt +from nova.tests.functional.api import client +from nova.tests.functional.libvirt import test_pci_in_placement as base + + +class MultipleSpecPerAliasWithPCIInPlacementTest( + base.PlacementPCIReportingTests +): + + def test_alias_with_multiple_specs_not_supported(self): + self.flags(group='filter_scheduler', pci_in_placement=True) + + pci_alias = [ + { + "device_type": "type-VF", + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": "f000", + "name": "a-vf", + }, + { + "device_type": "type-VF", + "vendor_id": fakelibvirt.PCI_VEND_ID, + "product_id": "f001", + "name": "a-vf", + } + ] + self.flags( + group="pci", + alias=self._to_list_of_json_str(pci_alias), + ) + extra_spec = {"pci_passthrough:alias": "a-vf:1"} + flavor_id = self._create_flavor(extra_spec=extra_spec) + + exc = self.assertRaises( + client.OpenStackApiException, + self._create_server, + flavor_id=flavor_id, + networks=[], + ) + # This is bug 2102038 as nova does not handle the internal ValueError + # and therefore returns HTTP 500 instead of returning 400 Bad Request + # with a message pointing to the unsupported alias config. + self.assertEqual(500, exc.response.status_code) From f0a1ee5a8835bad56d9736131406eb20a64fd8c2 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 27 May 2025 16:48:22 +0200 Subject: [PATCH 2/7] Return HTTP400 for multi spec pci alias if PCI in Placement PCI in Placement never supported PCI aliases with multiple specs, i.e. when an alias name is used in multiple alias definitions. The code raised ValueError late and without a proper error message. Now PciInvalidAlias with a descriptive message is raised instead. Closes-Bug: #2102038 Change-Id: Id1407a37dc5ddad22d8dbf7d589ed998ffc2804e (cherry picked from commit 0bfac5c7fedece9fe28e0eebf9c7fb535a5ee431) --- nova/pci/request.py | 18 +++++++++++++++ .../regressions/test_bug_2102038.py | 12 ++++++---- nova/tests/unit/pci/test_request.py | 22 +++++++++++++++++++ 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/nova/pci/request.py b/nova/pci/request.py index fe12bb50fd7..6ef87df551a 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -121,6 +121,23 @@ } +def _validate_aliases(aliases): + """Checks the parsed aliases for common mistakes and raise easy to parse + error messages + """ + if CONF.filter_scheduler.pci_in_placement: + alias_with_multiple_specs = [ + name for name, spec in aliases.items() if len(spec[1]) > 1] + if alias_with_multiple_specs: + raise exception.PciInvalidAlias( + "The PCI alias(es) %s have multiple specs but " + "[filter_scheduler]pci_in_placement is True. The PCI in " + "Placement feature only supports one spec per alias. You can " + "assign the same resource_class to multiple [pci]device_spec " + "matchers to allow using different devices for the same alias." + % ",".join(alias_with_multiple_specs)) + + def _get_alias_from_config() -> Alias: """Parse and validate PCI aliases from the nova config. @@ -177,6 +194,7 @@ def _get_alias_from_config() -> Alias: except Exception as exc: raise exception.PciInvalidAlias(reason=str(exc)) + _validate_aliases(aliases) return aliases diff --git a/nova/tests/functional/regressions/test_bug_2102038.py b/nova/tests/functional/regressions/test_bug_2102038.py index e867e27c6f7..f1f1d13770f 100644 --- a/nova/tests/functional/regressions/test_bug_2102038.py +++ b/nova/tests/functional/regressions/test_bug_2102038.py @@ -49,7 +49,11 @@ def test_alias_with_multiple_specs_not_supported(self): flavor_id=flavor_id, networks=[], ) - # This is bug 2102038 as nova does not handle the internal ValueError - # and therefore returns HTTP 500 instead of returning 400 Bad Request - # with a message pointing to the unsupported alias config. - self.assertEqual(500, exc.response.status_code) + self.assertEqual(400, exc.response.status_code) + self.assertIn( + "The PCI alias(es) a-vf have multiple specs but " + "[filter_scheduler]pci_in_placement is True. The PCI in Placement " + "feature only supports one spec per alias. You can assign the " + "same resource_class to multiple [pci]device_spec matchers to " + "allow using different devices for the same alias.", + exc.response.text) diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index be9c6be877e..fcd14d117c1 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -120,6 +120,28 @@ def test_get_alias_from_config_valid_multispec(self): }]) self.assertEqual(expected_result, result['QuickAssist']) + def test_get_alias_from_config_multispec_rejected_pci_in_placement(self): + _fake_alias = jsonutils.dumps({ + "name": "QuickAssist", + "capability_type": "pci", + "product_id": "4444", + "vendor_id": "8086", + "device_type": "type-PCI", + }) + + self.flags(pci_in_placement=True, group='filter_scheduler') + self.flags(alias=[_fake_alias1, _fake_alias], group='pci') + + ex = self.assertRaises( + exception.PciInvalidAlias, request._get_alias_from_config) + self.assertEqual( + "The PCI alias(es) QuickAssist have multiple specs but " + "[filter_scheduler]pci_in_placement is True. The PCI in Placement " + "feature only supports one spec per alias. You can assign the " + "same resource_class to multiple [pci]device_spec matchers to " + "allow using different devices for the same alias.", + str(ex)) + def _test_get_alias_from_config_invalid(self, alias): self.flags(alias=[alias], group='pci') self.assertRaises( From 265f4926c8fca7d7d2da7349e5bf52630498b4fd Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Tue, 11 Mar 2025 17:14:27 +0100 Subject: [PATCH 3/7] Multiple spec per PCI alias limitation Document and the limitation of the PCI in Placement feature that it does not support [pci]alias configuration where the name of the alias is repeated. E.g. [pci] alias = { "name": "vf1", "product_id":"10ca", "vendor_id":"8086", "device_type":"type-VF"} alias = { "name": "vf1", "product_id":"f000", "vendor_id":"8086", "device_type":"type-VF"} This would mean the alias vf1 can be fulfilled from devices with product id 10ca OR f000. However this OR relationship cannot be encoded to a single Placement allocation query as Placement does not support requesting alternative resource classes for a request[2]. This limitation was encoded in the original PCI in Placement implementation[1] but we missed to mention it in the doc. This is now fixed. [1]https://github.com/openstack/nova/blob/0d484ce37d86e989c8abdf57aec5e334f68206ef/nova/objects/request_spec.py#L504-L528 [2]https://docs.openstack.org/api-ref/placement/#list-allocation-candidates Related-Bug: #2102038 Change-Id: I9dd78b1498f870a4e4c3f26c23d42d105aec0350 (cherry picked from commit c3f392dd8e895c457abd55d82d58f1bb400ccff6) --- doc/source/admin/pci-passthrough.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 0f82a227a8a..39b15bb2e5b 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -431,6 +431,12 @@ be added to the resource provider representing the matching PCI devices. (Zed) the nova-compute service will refuse to start with such configuration. It is suggested to use the PCI address of the device instead. +.. important:: + While nova supported configuring :oslo.config:option:`pci.alias` where an + alias name is repeated and therefore associated to multiple alias + specifications, such configuration is not supported when PCI tracking in + Placement is enabled. + The nova-compute service makes sure that existing instances with PCI allocations in the nova DB will have a corresponding PCI allocation in placement. This allocation healing also acts on any new instances regardless of From 2ca6c939d388606bdc98d82ed1987010ef7e3eef Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 28 May 2025 14:32:54 +0200 Subject: [PATCH 4/7] Validated that PCI alias has proper ids Either the vendor_id and product_id needs to be set or the resource_class needs to be set in each alias. This is now validated when the alias is parsed to avoid late failure during placement allocation_candidates query. Conflicts: nova/conf/pci.py due to I62ec475988eab8de948498f50d8d4c0d47321102 not on stable/2025.1 Closes-Bug: #2111440 Change-Id: I7fd43b3d6faac8c4098b0983e8adc596414823a1 (cherry picked from commit acc6221660d5113314c66361cace9e3982fa76bf) --- doc/source/admin/pci-passthrough.rst | 4 +- nova/conf/pci.py | 5 +- nova/pci/request.py | 43 +++++++++++-- .../regressions/test_bug_2111440.py | 48 ++++++++++++++ nova/tests/unit/pci/test_request.py | 64 +++++++++++++++++++ 5 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 nova/tests/functional/regressions/test_bug_2111440.py diff --git a/doc/source/admin/pci-passthrough.rst b/doc/source/admin/pci-passthrough.rst index 39b15bb2e5b..eef63b894d0 100644 --- a/doc/source/admin/pci-passthrough.rst +++ b/doc/source/admin/pci-passthrough.rst @@ -494,7 +494,9 @@ configuration option supports requesting devices by Placement resource class name via the ``resource_class`` field and also support requesting traits to be present on the selected devices via the ``traits`` field in the alias. If the ``resource_class`` field is not specified in the alias then it is defaulted -by nova to ``CUSTOM_PCI__``. +by nova to ``CUSTOM_PCI__``. Either the ``product_id`` +and ``vendor_id`` or the ``resource_class`` field must be provided in each +alias. For deeper technical details please read the `nova specification. `_ diff --git a/nova/conf/pci.py b/nova/conf/pci.py index ed9e7d4eb00..fd6e4ce012e 100644 --- a/nova/conf/pci.py +++ b/nova/conf/pci.py @@ -70,7 +70,8 @@ alias = { "name": "A16_16A", "device_type": "type-VF", - resource_class: "CUSTOM_A16_16A", + "resource_class": "GPU_VF", + "traits": "blue, big" } Valid key values are : @@ -108,6 +109,8 @@ in the alias is matched against the ``resource_class`` defined in the ``[pci]device_spec``. This field can only be used only if ``[filter_scheduler]pci_in_placement`` is enabled. + Either the product_id and vendor_id or the resource_class field must be + provided in each alias. ``traits`` An optional comma separated list of Placement trait names requested to be diff --git a/nova/pci/request.py b/nova/pci/request.py index 6ef87df551a..d7c9e79243d 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -121,10 +121,7 @@ } -def _validate_aliases(aliases): - """Checks the parsed aliases for common mistakes and raise easy to parse - error messages - """ +def _validate_multispec(aliases): if CONF.filter_scheduler.pci_in_placement: alias_with_multiple_specs = [ name for name, spec in aliases.items() if len(spec[1]) > 1] @@ -138,6 +135,44 @@ def _validate_aliases(aliases): % ",".join(alias_with_multiple_specs)) +def _validate_required_ids(aliases): + if CONF.filter_scheduler.pci_in_placement: + alias_without_ids_or_rc = set() + for name, alias in aliases.items(): + for spec in alias[1]: + ids = "vendor_id" in spec and "product_id" in spec + rc = "resource_class" in spec + if not ids and not rc: + alias_without_ids_or_rc.add(name) + + if alias_without_ids_or_rc: + raise exception.PciInvalidAlias( + "The PCI alias(es) %s does not have vendor_id and product_id " + "fields set or resource_class field set." + % ",".join(sorted(alias_without_ids_or_rc))) + else: + alias_without_ids = set() + for name, alias in aliases.items(): + for spec in alias[1]: + ids = "vendor_id" in spec and "product_id" in spec + if not ids: + alias_without_ids.add(name) + + if alias_without_ids: + raise exception.PciInvalidAlias( + "The PCI alias(es) %s does not have vendor_id and product_id " + "fields set." + % ",".join(sorted(alias_without_ids))) + + +def _validate_aliases(aliases): + """Checks the parsed aliases for common mistakes and raise easy to parse + error messages + """ + _validate_multispec(aliases) + _validate_required_ids(aliases) + + def _get_alias_from_config() -> Alias: """Parse and validate PCI aliases from the nova config. diff --git a/nova/tests/functional/regressions/test_bug_2111440.py b/nova/tests/functional/regressions/test_bug_2111440.py new file mode 100644 index 00000000000..44b858724e7 --- /dev/null +++ b/nova/tests/functional/regressions/test_bug_2111440.py @@ -0,0 +1,48 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from nova.tests.functional.api import client +from nova.tests.functional.libvirt import test_pci_in_placement as base + + +class MissingRCAndIdAliasWithPCIInPlacementTest( + base.PlacementPCIReportingTests +): + + def test_alias_without_rc_or_vendor_product_id_is_not_supported(self): + self.flags(group='filter_scheduler', pci_in_placement=True) + + pci_alias = [ + { + "device_type": "type-VF", + "name": "a-vf", + "traits": "foo" + }, + ] + self.flags( + group="pci", + alias=self._to_list_of_json_str(pci_alias), + ) + extra_spec = {"pci_passthrough:alias": "a-vf:1"} + flavor_id = self._create_flavor(extra_spec=extra_spec) + + exc = self.assertRaises( + client.OpenStackApiException, + self._create_server, + flavor_id=flavor_id, + networks=[], + ) + self.assertEqual(400, exc.response.status_code) + self.assertIn( + "The PCI alias(es) a-vf does not have vendor_id and product_id " + "fields set or resource_class field set.", + exc.response.text) diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index fcd14d117c1..d6b759f5914 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -226,6 +226,7 @@ def test_get_alias_from_config_valid_rc_and_traits(self): "resource_class": "foo", "traits": "bar,baz", }) + self.flags(pci_in_placement=True, group='filter_scheduler') self.flags(alias=[fake_alias], group='pci') aliases = request._get_alias_from_config() self.assertIsNotNone(aliases) @@ -278,6 +279,69 @@ def test_get_alias_from_config_conflicting_numa_policy(self): exception.PciInvalidAlias, request._get_alias_from_config) + def test_get_alias_from_config_missing_ids(self): + a1 = jsonutils.dumps({ + "name": "a1", + "product_id": "4444", + }) + a2 = jsonutils.dumps({ + "name": "a2", + "vendor_id": "4444", + }) + a3 = jsonutils.dumps({ + "name": "a3", + }) + a4 = jsonutils.dumps({ + "name": "a4", + # ignored as PCI in Placement is not enabled + "resource_class": "foo", + }) + a5 = jsonutils.dumps({ + "name": "a5", + "vendor_id": "4444", + "product_id": "4444", + }) + self.flags(alias=[a1, a2, a3, a4, a5], group='pci') + + ex = self.assertRaises( + exception.PciInvalidAlias, request._get_alias_from_config) + self.assertEqual( + "The PCI alias(es) a1,a2,a3,a4 does not have vendor_id and " + "product_id fields set.", + str(ex)) + + def test_get_alias_from_config_missing_ids_or_rc_pci_in_placement(self): + a1 = jsonutils.dumps({ + "name": "a1", + "product_id": "4444", + }) + a2 = jsonutils.dumps({ + "name": "a2", + "vendor_id": "4444", + }) + a3 = jsonutils.dumps({ + "name": "a3", + }) + a4 = jsonutils.dumps({ + "name": "a4", + "resource_class": "foo", + }) + a5 = jsonutils.dumps({ + "name": "a5", + "vendor_id": "4444", + "product_id": "4444", + }) + + self.flags(pci_in_placement=True, group='filter_scheduler') + self.flags(alias=[a1, a2, a3, a4, a5], group='pci') + + ex = self.assertRaises( + exception.PciInvalidAlias, request._get_alias_from_config) + self.assertEqual( + "The PCI alias(es) a1,a2,a3 does not have vendor_id and " + "product_id fields set or resource_class field set.", + str(ex)) + def _verify_result(self, expected, real): exp_real = zip(expected, real) for exp, real in exp_real: From b5dffede7d0aa472941d1433ee97569b70be3c48 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 28 May 2025 15:20:44 +0200 Subject: [PATCH 5/7] Validate [pci]alias at service startup Both nova-api and nova-compute depends on the [pci]alias configuration. These services loaded and validated the config lazily when it was needed. This can late and hard to troubleshoot failures during instance lifecycle operations due to simple config errors. So this patch adds an early load of this config to nova-api and nova-compute. Related-Bug: #2102038 Related-Bug: #2111440 Change-Id: I5d5dc912ca24979067984c7cb53ceaded7daf236 (cherry picked from commit ae064caf169b6d5888a5aec692b911c57c439ca4) --- nova/api/openstack/wsgi_app.py | 6 ++++++ nova/compute/manager.py | 4 ++++ nova/pci/request.py | 4 ++-- .../tests/unit/api/openstack/test_wsgi_app.py | 9 +++++++++ nova/tests/unit/compute/test_compute_mgr.py | 12 +++++++++++ nova/tests/unit/pci/test_request.py | 20 +++++++++---------- 6 files changed, 43 insertions(+), 12 deletions(-) diff --git a/nova/api/openstack/wsgi_app.py b/nova/api/openstack/wsgi_app.py index d6f1030f835..648d529e5f1 100644 --- a/nova/api/openstack/wsgi_app.py +++ b/nova/api/openstack/wsgi_app.py @@ -26,6 +26,7 @@ from nova import context from nova import exception from nova import objects +from nova.pci import request from nova import service from nova import utils from nova import version @@ -51,6 +52,11 @@ def _get_config_files(env=None): def _setup_service(host, name): + + # NOTE(gibi): validate the [pci]alias config early to avoid late failures + # at instance creation due to config errors. + request.get_alias_from_config() + try: utils.raise_if_old_compute() except exception.TooOldComputeService as e: diff --git a/nova/compute/manager.py b/nova/compute/manager.py index 426ac66199a..98ca91be0cb 100644 --- a/nova/compute/manager.py +++ b/nova/compute/manager.py @@ -1621,6 +1621,10 @@ def init_host(self, service_ref): # if the configuration is wrong. whitelist.Whitelist(CONF.pci.device_spec) + # NOTE(gibi): validate the [pci]alias config early to avoid late + # failures at instance lifecycle operations due to config errors. + pci_req_module.get_alias_from_config() + nova.conf.neutron.register_dynamic_opts(CONF) # Even if only libvirt uses them, make it available for all drivers nova.conf.devices.register_dynamic_opts(CONF) diff --git a/nova/pci/request.py b/nova/pci/request.py index d7c9e79243d..7c7c418f019 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -173,7 +173,7 @@ def _validate_aliases(aliases): _validate_required_ids(aliases) -def _get_alias_from_config() -> Alias: +def get_alias_from_config() -> Alias: """Parse and validate PCI aliases from the nova config. :returns: A dictionary where the keys are alias names and the values are @@ -237,7 +237,7 @@ def _translate_alias_to_requests( alias_spec: str, affinity_policy: ty.Optional[str] = None, ) -> ty.List['objects.InstancePCIRequest']: """Generate complete pci requests from pci aliases in extra_spec.""" - pci_aliases = _get_alias_from_config() + pci_aliases = get_alias_from_config() pci_requests: ty.List[objects.InstancePCIRequest] = [] for name, count in [spec.split(':') for spec in alias_spec.split(',')]: diff --git a/nova/tests/unit/api/openstack/test_wsgi_app.py b/nova/tests/unit/api/openstack/test_wsgi_app.py index 7fa1eacc623..b744e4e2694 100644 --- a/nova/tests/unit/api/openstack/test_wsgi_app.py +++ b/nova/tests/unit/api/openstack/test_wsgi_app.py @@ -15,6 +15,7 @@ import fixtures from oslo_config import fixture as config_fixture +from oslo_serialization import jsonutils from oslotest import base from nova.api.openstack import wsgi_app @@ -127,6 +128,14 @@ def test_setup_service_version_workaround(self, mock_check_old, mock_get): group='workarounds') wsgi_app._setup_service('myhost', 'api') + def test_setup_service_pci_alias_validation(self): + wsgi_app.CONF.set_override( + 'alias', jsonutils.dumps({'name': 'foo'}), + group='pci') + self.assertRaises( + exception.PciInvalidAlias, + wsgi_app._setup_service, 'myhost', 'api') + def test__get_config_files_empty_env(self): env = {} result = wsgi_app._get_config_files(env) diff --git a/nova/tests/unit/compute/test_compute_mgr.py b/nova/tests/unit/compute/test_compute_mgr.py index 43b359e2f54..1169b85cbf1 100644 --- a/nova/tests/unit/compute/test_compute_mgr.py +++ b/nova/tests/unit/compute/test_compute_mgr.py @@ -6954,6 +6954,18 @@ def test_init_host_pci_device_spec_validation_failure(self): self.assertRaises(exception.PciDeviceInvalidDeviceName, self.compute.init_host, None) + def test_init_host_pci_alias_validation_failure(self): + # Tests that we fail init_host if the pci.alias is + # configured incorrectly. + self.flags( + alias=[ + jsonutils.dumps({'name': 'foo'}) + ], + group='pci' + ) + self.assertRaises( + exception.PciInvalidAlias, self.compute.init_host, None) + @mock.patch('nova.compute.manager.ComputeManager._instance_update') def test_error_out_instance_on_exception_not_implemented_err(self, inst_update_mock): diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index d6b759f5914..4a736193e8c 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -81,7 +81,7 @@ def setUp(self): def test_get_alias_from_config_valid(self): self.flags(alias=[_fake_alias1], group='pci') - result = request._get_alias_from_config() + result = request.get_alias_from_config() expected_result = ( 'legacy', [{ @@ -103,7 +103,7 @@ def test_get_alias_from_config_valid_multispec(self): }) self.flags(alias=[_fake_alias1, _fake_alias], group='pci') - result = request._get_alias_from_config() + result = request.get_alias_from_config() expected_result = ( 'legacy', [{ @@ -133,7 +133,7 @@ def test_get_alias_from_config_multispec_rejected_pci_in_placement(self): self.flags(alias=[_fake_alias1, _fake_alias], group='pci') ex = self.assertRaises( - exception.PciInvalidAlias, request._get_alias_from_config) + exception.PciInvalidAlias, request.get_alias_from_config) self.assertEqual( "The PCI alias(es) QuickAssist have multiple specs but " "[filter_scheduler]pci_in_placement is True. The PCI in Placement " @@ -146,7 +146,7 @@ def _test_get_alias_from_config_invalid(self, alias): self.flags(alias=[alias], group='pci') self.assertRaises( exception.PciInvalidAlias, - request._get_alias_from_config) + request.get_alias_from_config) def test_get_alias_from_config_invalid_device_type(self): fake_alias = jsonutils.dumps({ @@ -215,7 +215,7 @@ def test_get_alias_from_config_valid_numa_policy(self): "numa_policy": policy, }) self.flags(alias=[fake_alias], group='pci') - aliases = request._get_alias_from_config() + aliases = request.get_alias_from_config() self.assertIsNotNone(aliases) self.assertIn("xxx", aliases) self.assertEqual(policy, aliases["xxx"][0]) @@ -228,7 +228,7 @@ def test_get_alias_from_config_valid_rc_and_traits(self): }) self.flags(pci_in_placement=True, group='filter_scheduler') self.flags(alias=[fake_alias], group='pci') - aliases = request._get_alias_from_config() + aliases = request.get_alias_from_config() self.assertIsNotNone(aliases) self.assertIn("xxx", aliases) self.assertEqual( @@ -256,7 +256,7 @@ def test_get_alias_from_config_conflicting_device_type(self): self.flags(alias=[fake_alias_a, fake_alias_b], group='pci') self.assertRaises( exception.PciInvalidAlias, - request._get_alias_from_config) + request.get_alias_from_config) def test_get_alias_from_config_conflicting_numa_policy(self): """Check behavior when numa_policy conflicts occur.""" @@ -277,7 +277,7 @@ def test_get_alias_from_config_conflicting_numa_policy(self): self.flags(alias=[fake_alias_a, fake_alias_b], group='pci') self.assertRaises( exception.PciInvalidAlias, - request._get_alias_from_config) + request.get_alias_from_config) def test_get_alias_from_config_missing_ids(self): a1 = jsonutils.dumps({ @@ -304,7 +304,7 @@ def test_get_alias_from_config_missing_ids(self): self.flags(alias=[a1, a2, a3, a4, a5], group='pci') ex = self.assertRaises( - exception.PciInvalidAlias, request._get_alias_from_config) + exception.PciInvalidAlias, request.get_alias_from_config) self.assertEqual( "The PCI alias(es) a1,a2,a3,a4 does not have vendor_id and " "product_id fields set.", @@ -336,7 +336,7 @@ def test_get_alias_from_config_missing_ids_or_rc_pci_in_placement(self): self.flags(alias=[a1, a2, a3, a4, a5], group='pci') ex = self.assertRaises( - exception.PciInvalidAlias, request._get_alias_from_config) + exception.PciInvalidAlias, request.get_alias_from_config) self.assertEqual( "The PCI alias(es) a1,a2,a3 does not have vendor_id and " "product_id fields set or resource_class field set.", From 456a57c0206d1b7f7f130c8de600d51abf4ad2e9 Mon Sep 17 00:00:00 2001 From: Balazs Gibizer Date: Wed, 28 May 2025 16:47:29 +0200 Subject: [PATCH 6/7] Cache [pci]alias parsing For each lifecycle operation nova re-load, parses, and validates the [pci]alias config option. This is wasteful. So this patch adds functools.cache decorator on the function used to do this work. Change-Id: If2ffb25430749a22c923c0938221833e7b883873 (cherry picked from commit 0065bb6cd4b5a72dec8f6a2b96b3d279352f2b0c) --- nova/pci/request.py | 3 ++- nova/test.py | 9 +++++++++ nova/tests/unit/pci/test_request.py | 17 +++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/nova/pci/request.py b/nova/pci/request.py index 7c7c418f019..0e61be9a1ea 100644 --- a/nova/pci/request.py +++ b/nova/pci/request.py @@ -37,7 +37,7 @@ These two aliases define a device request meaning: vendor_id is "8086" and product_id is "0442" or "0443". """ - +import functools import typing as ty import jsonschema @@ -173,6 +173,7 @@ def _validate_aliases(aliases): _validate_required_ids(aliases) +@functools.cache def get_alias_from_config() -> Alias: """Parse and validate PCI aliases from the nova config. diff --git a/nova/test.py b/nova/test.py index e084f5c14f7..5be9162280c 100644 --- a/nova/test.py +++ b/nova/test.py @@ -61,6 +61,7 @@ from nova import exception from nova import objects from nova.objects import base as objects_base +from nova.pci import request from nova import quota from nova.scheduler.client import report from nova.scheduler import utils as scheduler_utils @@ -189,6 +190,10 @@ def setUp(self): self.useFixture( nova_fixtures.PropagateTestCaseIdToChildEventlets(self.id())) + # Ensure that the pci alias is reset between test cases running in + # the same process + request.get_alias_from_config.cache_clear() + # How many of which service we've started. {$service-name: $count} self._service_fixture_count = collections.defaultdict(int) @@ -425,6 +430,10 @@ def flags(self, **kw): group = kw.pop('group', None) for k, v in kw.items(): CONF.set_override(k, v, group) + # loading and validating alias is cached so if it is reconfigured + # we need to reset the cache + if k == 'alias' and group == 'pci': + request.get_alias_from_config.cache_clear() def reset_flags(self, *k, **kw): """Reset flag variables for a test.""" diff --git a/nova/tests/unit/pci/test_request.py b/nova/tests/unit/pci/test_request.py index 4a736193e8c..f1cfc477f68 100644 --- a/nova/tests/unit/pci/test_request.py +++ b/nova/tests/unit/pci/test_request.py @@ -342,6 +342,23 @@ def test_get_alias_from_config_missing_ids_or_rc_pci_in_placement(self): "product_id fields set or resource_class field set.", str(ex)) + def test_get_alias_from_config_cached(self): + alias = jsonutils.dumps({ + "name": "a5", + "vendor_id": "4444", + "product_id": "4444", + }) + self.flags(alias=[alias], group='pci') + + origi_loads = jsonutils.loads + + with mock.patch('oslo_serialization.jsonutils.loads') as mock_loads: + mock_loads.side_effect = origi_loads + request.get_alias_from_config() + request.get_alias_from_config() + + mock_loads.assert_called_once() + def _verify_result(self, expected, real): exp_real = zip(expected, real) for exp, real in exp_real: From f2385906860b6dff63a8ab8a6146cf8928be8927 Mon Sep 17 00:00:00 2001 From: Max Lamprecht Date: Wed, 4 Feb 2026 16:43:24 +0100 Subject: [PATCH 7/7] fix: device_by_alias should respect config type When retrieving devices by alias we should respect the from_persistent_config config option. Otherwise the persistent disk detach always throws the disk detach failed warning because of checking the live config. [1] https://github.com/openstack/nova/blob/ba24639b8dd34a19885298cf728e58dd7db9e703/nova/virt/libvirt/driver.py#L2653 Fixes LP Bug: #2140347 Change-Id: I454403256c5a98fd3502a25c9ad8291d6492ae08 Signed-off-by: Max Lamprecht (cherry picked from commit 17b6b12365ecfba9b85449bcd2cd5d35a27e3480) (cherry picked from commit 5a8462745b5077e713f1d87086b835ffe1b24cbb) (cherry picked from commit b6ca54481ca36efc0f26437933a9adf588d3fa9d) --- nova/tests/unit/virt/libvirt/test_guest.py | 14 ++++++++++++++ nova/virt/libvirt/guest.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/nova/tests/unit/virt/libvirt/test_guest.py b/nova/tests/unit/virt/libvirt/test_guest.py index 3034ff0a9d2..5de5021a65c 100644 --- a/nova/tests/unit/virt/libvirt/test_guest.py +++ b/nova/tests/unit/virt/libvirt/test_guest.py @@ -351,6 +351,20 @@ def test_get_device_by_alias(self): self.assertIsNone(self.guest.get_device_by_alias('nope')) + def test_get_device_by_alias_from_persistent_config(self): + with mock.patch.object(self.guest, 'get_all_devices') as mock_get_all: + mock_get_all.return_value = [] + + self.assertIsNone(self.guest.get_device_by_alias( + 'qemu-disk1', + devtype=vconfig.LibvirtConfigGuestDisk, + from_persistent_config=True, + )) + + mock_get_all.assert_called_once_with( + # check if we're querying the persistent config + vconfig.LibvirtConfigGuestDisk, True) + def test_get_devices(self): xml = """ diff --git a/nova/virt/libvirt/guest.py b/nova/virt/libvirt/guest.py index 065181d89e6..309b266b47d 100644 --- a/nova/virt/libvirt/guest.py +++ b/nova/virt/libvirt/guest.py @@ -434,7 +434,7 @@ def get_all_disks(self): def get_device_by_alias(self, devalias, devtype=None, from_persistent_config=False): - for dev in self.get_all_devices(devtype): + for dev in self.get_all_devices(devtype, from_persistent_config): if hasattr(dev, 'alias') and dev.alias == devalias: return dev