|
34 | 34 | from cuda.bindings._test_helpers.mempool import xfail_if_mempool_oom |
35 | 35 | except ModuleNotFoundError: |
36 | 36 | # Older cuda.bindings artifacts (for example 12.9.x backports) do not ship |
37 | | - # this helper yet. In that case, keep the primary failure visible instead of |
38 | | - # xfail-ing the known Windows MCDM mempool setup issue. |
| 37 | + # this helper yet. Keep the fallback local so tests against published |
| 38 | + # bindings still xfail the known Windows MCDM mempool setup issue. |
| 39 | + # |
| 40 | + # Keep in sync with cuda_bindings/cuda/bindings/_test_helpers/mempool.py. |
| 41 | + # This copy is intentionally simpler because it only handles cuda_core |
| 42 | + # CUDAError exceptions when the shared helper is absent. |
| 43 | + def _is_windows_mcdm_device(device=0): |
| 44 | + if sys.platform != "win32": |
| 45 | + return False |
| 46 | + import cuda.bindings.nvml as nvml |
| 47 | + |
| 48 | + device_id = int(getattr(device, "device_id", device)) |
| 49 | + (err,) = driver.cuInit(0) |
| 50 | + if err != driver.CUresult.CUDA_SUCCESS: |
| 51 | + return False |
| 52 | + err, pci_bus_id = driver.cuDeviceGetPCIBusId(13, device_id) |
| 53 | + if err != driver.CUresult.CUDA_SUCCESS: |
| 54 | + return False |
| 55 | + pci_bus_id = pci_bus_id.split(b"\x00", 1)[0].decode("ascii") |
| 56 | + nvml.init_v2() |
| 57 | + try: |
| 58 | + handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) |
| 59 | + current, _ = nvml.device_get_driver_model_v2(handle) |
| 60 | + return current == nvml.DriverModel.DRIVER_MCDM |
| 61 | + finally: |
| 62 | + nvml.shutdown() |
| 63 | + |
39 | 64 | def xfail_if_mempool_oom(err_or_exc, api_name=None, device=0): |
40 | | - return |
| 65 | + if api_name is not None and not isinstance(api_name, str): |
| 66 | + device = api_name |
| 67 | + api_name = None |
| 68 | + |
| 69 | + if "CUDA_ERROR_OUT_OF_MEMORY" not in str(err_or_exc): |
| 70 | + return |
| 71 | + try: |
| 72 | + is_windows_mcdm = _is_windows_mcdm_device(device) |
| 73 | + except Exception: |
| 74 | + # If MCDM detection fails, leave the primary test failure visible. |
| 75 | + return |
| 76 | + if not is_windows_mcdm: |
| 77 | + return |
| 78 | + |
| 79 | + api_context = f"{api_name} " if api_name else "" |
| 80 | + pytest.xfail(f"{api_context}could not reserve VA for mempool operations on Windows MCDM") |
41 | 81 |
|
42 | 82 |
|
43 | 83 | # Import shared test helpers for tests across subprojects. |
|
0 commit comments