Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions cuda_core/tests/graph/test_graph_memory_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,36 @@ def test_gmr_check_capture_state(mempool_device, mode):
gb.end_building().complete()


def test_graph_memory_resource_attributes_direct_init_raises():
"""GraphMemoryResourceAttributes cannot be constructed directly."""
from cuda.core._memory._graph_memory_resource import GraphMemoryResourceAttributes

with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
GraphMemoryResourceAttributes()


def test_graph_memory_resource_accessibility_flags(init_cuda):
"""GraphMemoryResource exposes expected accessibility flags and device_id."""
device = Device()
gmr = GraphMemoryResource(device)
assert gmr.is_device_accessible is True
assert gmr.is_host_accessible is False
assert gmr.device_id == int(device)


def test_graph_memory_resource_attributes_repr(mempool_device):
"""GraphMemoryResourceAttributes.__repr__ includes the class name and the 4 documented attributes."""
device = mempool_device
gmr = GraphMemoryResource(device)
r = repr(gmr.attributes)
assert r.startswith("GraphMemoryResourceAttributes(")
assert r.endswith(")")
assert "reserved_mem_current=" in r
assert "reserved_mem_high=" in r
assert "used_mem_current=" in r
assert "used_mem_high=" in r


@pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
def test_dmr_check_capture_state(mempool_device, mode):
"""
Expand Down
135 changes: 83 additions & 52 deletions cuda_core/tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,64 @@ def test_launch_config_native_conversion(init_cuda):
pytest.skip("Driver or GPU not new enough for thread block clusters")


def test_to_native_launch_config_no_cluster():
"""Covers the no-cluster path of _to_native_launch_config; no Hopper+ required."""
from cuda.core._launch_config import _to_native_launch_config

config = LaunchConfig(grid=(4, 5, 6), block=(7, 8, 9), shmem_size=128)
native = _to_native_launch_config(config)
assert native.gridDimX == 4, f"Expected gridDimX=4, got {native.gridDimX}"
assert native.gridDimY == 5, f"Expected gridDimY=5, got {native.gridDimY}"
assert native.gridDimZ == 6, f"Expected gridDimZ=6, got {native.gridDimZ}"
assert native.blockDimX == 7, f"Expected blockDimX=7, got {native.blockDimX}"
assert native.blockDimY == 8, f"Expected blockDimY=8, got {native.blockDimY}"
assert native.blockDimZ == 9, f"Expected blockDimZ=9, got {native.blockDimZ}"
assert native.sharedMemBytes == 128, f"Expected sharedMemBytes=128, got {native.sharedMemBytes}"
assert native.numAttrs == 0, f"Expected numAttrs=0, got {native.numAttrs}"
assert list(native.attrs) == [], f"Expected empty attrs, got {list(native.attrs)}"


def test_launch_config_cooperative_unsupported(monkeypatch):
"""LaunchConfig(is_cooperative=True) raises when device does not support it."""
from cuda.core import _launch_config as _lc_mod

class _FakeProps:
cooperative_launch = False

class _FakeDev:
properties = _FakeProps()

monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
with pytest.raises(CUDAError, match="cooperative kernels are not supported"):
LaunchConfig(grid=1, block=1, is_cooperative=True)


def test_to_native_launch_config_cooperative(monkeypatch):
"""Covers the is_cooperative branch of _to_native_launch_config; Device is mocked so it runs on any GPU."""
from cuda.bindings import driver
from cuda.core import _launch_config as _lc_mod
from cuda.core._launch_config import _to_native_launch_config

class _FakeProps:
cooperative_launch = True

class _FakeDev:
properties = _FakeProps()

monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())

config = LaunchConfig(grid=2, block=4, is_cooperative=True)
native = _to_native_launch_config(config)
assert native.gridDimX == 2
assert native.blockDimX == 4
assert native.numAttrs == 1
attr = native.attrs[0]
assert attr.id == driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE, (
f"Expected CU_LAUNCH_ATTRIBUTE_COOPERATIVE, got {attr.id}"
)
assert attr.value.cooperative == 1, f"Expected cooperative=1, got {attr.value.cooperative}"


def test_launch_invalid_values(init_cuda):
code = 'extern "C" __global__ void my_kernel() {}'
program = Program(code, SourceCodeType.CXX)
Expand Down Expand Up @@ -403,28 +461,42 @@ class MyFloat(ctypes.c_float):
class MyBool(ctypes.c_bool):
pass

# These should NOT raise they should be handled via isinstance fallback
# These should NOT raise; they should be handled via isinstance fallback
holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
assert holder.ptr != 0


@requires_module(np, "2.1")
def test_launch_scalar_argument_ctypes_subclass_fallback():
"""Subclassed ctypes scalars survive the launch path and reach the kernel correctly."""
@pytest.mark.parametrize(
("scalar_kind", "np_dtype", "cpp_type", "raw_value"),
[
("ctypes", np.int32, "signed int", -123456),
("numpy", np.float32, "float", 3.14),
],
ids=["ctypes_subclass", "numpy_subclass"],
)
def test_launch_scalar_argument_subclass_fallback(scalar_kind, np_dtype, cpp_type, raw_value):
"""Subclassed scalar arguments survive fallback handling and reach the kernel."""
if scalar_kind == "ctypes":

class MyInt32(ctypes.c_int32):
pass
class Subclassed(ctypes.c_int32):
pass
else:

class Subclassed(np.float32):
pass

scalar = Subclassed(raw_value)
expected = np_dtype(raw_value)

dev = Device()
dev.set_current()

mr = LegacyPinnedMemoryResource()
b = mr.allocate(np.dtype(np.int32).itemsize)
arr = np.from_dlpack(b).view(np.int32)
b = mr.allocate(np.dtype(np_dtype).itemsize)
arr = np.from_dlpack(b).view(np_dtype)
arr[:] = 0

scalar = MyInt32(-123456)

code = r"""
template <typename T>
__global__ void write_scalar(T* arr, T val) {
Expand All @@ -435,17 +507,16 @@ class MyInt32(ctypes.c_int32):
arch = "".join(f"{i}" for i in dev.compute_capability)
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
prog = Program(code, code_type="c++", options=pro_opts)
ker_name = "write_scalar<signed int>"
ker_name = f"write_scalar<{cpp_type}>"
mod = prog.compile("cubin", name_expressions=(ker_name,))
ker = mod.get_kernel(ker_name)

# This exercises the prepare_ctypes_arg isinstance fallback through a real launch.
stream = dev.default_stream
config = LaunchConfig(grid=1, block=1)
launch(stream, config, ker, arr.ctypes.data, scalar)
stream.sync()

assert arr[0] == scalar.value
assert arr[0] == expected


def test_kernel_arg_numpy_subclass_isinstance_fallback():
Expand All @@ -462,46 +533,6 @@ class MyFloat32(np.float32):
assert holder.ptr != 0


@requires_module(np, "2.1")
def test_launch_scalar_argument_numpy_subclass_fallback():
"""Subclassed numpy scalars survive the launch path and reach the kernel correctly."""

class MyFloat32(np.float32):
pass

dev = Device()
dev.set_current()

mr = LegacyPinnedMemoryResource()
b = mr.allocate(np.dtype(np.float32).itemsize)
arr = np.from_dlpack(b).view(np.float32)
arr[:] = 0.0

scalar = MyFloat32(3.14)

code = r"""
template <typename T>
__global__ void write_scalar(T* arr, T val) {
arr[0] = val;
}
"""

arch = "".join(f"{i}" for i in dev.compute_capability)
pro_opts = ProgramOptions(std="c++17", arch=f"sm_{arch}")
prog = Program(code, code_type="c++", options=pro_opts)
ker_name = "write_scalar<float>"
mod = prog.compile("cubin", name_expressions=(ker_name,))
ker = mod.get_kernel(ker_name)

# This exercises the prepare_numpy_arg isinstance fallback through a real launch.
stream = dev.default_stream
config = LaunchConfig(grid=1, block=1)
launch(stream, config, ker, arr.ctypes.data, scalar)
stream.sync()

assert arr[0] == scalar


def test_kernel_arg_python_isinstance_fallbacks():
"""Subclassed Python builtins hit the isinstance fallback in ParamHolder."""
from cuda.core._kernel_arg_handler import ParamHolder
Expand Down
90 changes: 90 additions & 0 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,93 @@ def test_which_backend_is_not_property(self):
"""
attr = inspect.getattr_static(Linker, "which_backend")
assert not isinstance(attr, property)


@pytest.fixture
def driver_binding(monkeypatch):
"""Pin _linker._driver to the real driver module so driver-backend tests run under any backend."""
from cuda.bindings import driver

monkeypatch.setattr(_linker, "_driver", driver)
return driver


def test_prepare_driver_options_all_supported(driver_binding):
"""Exercise every supported branch of _prepare_driver_options."""
driver = driver_binding
opts = LinkerOptions(
arch="sm_80",
max_register_count=32,
verbose=True,
link_time_optimization=True,
optimization_level=2,
debug=True,
lineinfo=True,
no_cache=True,
)
formatted, keys = opts._prepare_driver_options()
assert len(formatted) == len(keys)
assert len(keys) == 4 + 8 # 4 fixed log-buffer entries + 8 options set above

# Skip log-buffer entries; verify key-to-value mapping (catches swap/dup/wrong-value).
payload_keys = keys[4:]
assert len(set(payload_keys)) == len(payload_keys), f"duplicate option keys: {payload_keys}"
option_to_value = dict(zip(payload_keys, formatted[4:]))
assert option_to_value[driver.CUjit_option.CU_JIT_TARGET] == driver.CUjit_target.CU_TARGET_COMPUTE_80
assert option_to_value[driver.CUjit_option.CU_JIT_MAX_REGISTERS] == 32
assert option_to_value[driver.CUjit_option.CU_JIT_LOG_VERBOSE] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_LTO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL] == 2
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_CACHE_MODE] == driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE


@pytest.mark.parametrize(
"kwargs,match",
[
({"ftz": True}, "ftz option is deprecated"),
({"prec_div": True}, "prec_div option is deprecated"),
({"prec_sqrt": True}, "prec_sqrt option is deprecated"),
({"fma": True}, "fma options is deprecated"),
({"kernels_used": "my_kernel"}, "kernels_used is deprecated"),
({"variables_used": "my_var"}, "variables_used is deprecated"),
({"optimize_unused_variables": True}, "optimize_unused_variables is deprecated"),
],
)
def test_prepare_driver_options_deprecated_warnings(driver_binding, kwargs, match):
"""Each driver-deprecated option emits a DeprecationWarning."""
opts = LinkerOptions(**kwargs)
with pytest.warns(DeprecationWarning, match=match):
opts._prepare_driver_options()


@pytest.mark.parametrize(
"kwargs,match",
[
({"time": True}, "time option is not supported by the driver API"),
({"ptx": True}, "ptx option is not supported by the driver API"),
({"ptxas_options": ["-v"]}, "ptxas_options option is not supported by the driver API"),
({"split_compile": 0}, "split_compile option is not supported by the driver API"),
({"split_compile_extended": 1}, "split_compile_extended option is not supported by the driver API"),
],
)
def test_prepare_driver_options_unsupported_raises(driver_binding, kwargs, match):
"""Each nvjitlink-only option raises ValueError on the driver backend."""
opts = LinkerOptions(**kwargs)
with pytest.raises(ValueError, match=match):
opts._prepare_driver_options()


def test_linker_empty_object_codes_raises():
"""Linker with no ObjectCode raises ValueError."""
with pytest.raises(ValueError, match="At least one ObjectCode object must be provided"):
Linker()


def test_as_bytes_nvjitlink_unavailable(monkeypatch):
"""as_bytes('nvjitlink') raises RuntimeError when the backend is unavailable."""
monkeypatch.setattr(_linker, "_use_nvjitlink_backend", False)
opts = LinkerOptions(arch="sm_80")
with pytest.raises(RuntimeError, match="nvJitLink backend is not available"):
opts.as_bytes("nvjitlink")
Loading
Loading