From 863e9b71e0687f17da58ad0f21e74e358be4f6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Mon, 8 Jun 2026 08:56:43 +0200 Subject: [PATCH 1/2] Fix redundant code in XBuffer --- xobjects/context_cpu.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xobjects/context_cpu.py b/xobjects/context_cpu.py index 90952bf..4991110 100644 --- a/xobjects/context_cpu.py +++ b/xobjects/context_cpu.py @@ -768,10 +768,8 @@ def update_from_nplike(self, offset, dest_dtype, value): value = nplike_to_numpy(value) if dest_dtype != value.dtype: value = value.astype(dtype=dest_dtype) # make a copy - src = value.view("int8") - self.buffer[offset : offset + src.nbytes] = value.flatten().view( - "int8" - ) + src = value.flatten().view("int8") + self.buffer[offset : offset + src.nbytes] = src def to_bytearray(self, offset, nbytes): """copy in byte array: used in update_from_xbuffer""" From 7e6e8bb6407d6317995898f0af4f5170cb07d642 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Thu, 11 Jun 2026 15:45:05 +0300 Subject: [PATCH 2/2] Prebuild kernels on OpenMP --- tests/test_kernel.py | 2 ++ xobjects/context_cpu.py | 21 +++++++++++++++------ xobjects/struct.py | 2 +- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/test_kernel.py b/tests/test_kernel.py index e381c11..5f6cebd 100644 --- a/tests/test_kernel.py +++ b/tests/test_kernel.py @@ -4,7 +4,9 @@ # ########################################### # import cffi +import pytest import sysconfig +from pathlib import Path import numpy as np diff --git a/xobjects/context_cpu.py b/xobjects/context_cpu.py index 4991110..4bf53a2 100644 --- a/xobjects/context_cpu.py +++ b/xobjects/context_cpu.py @@ -150,8 +150,7 @@ def __init__(self, omp_num_threads=0): """ super().__init__() self.omp_num_threads = omp_num_threads - if omp_num_threads == 0: - self.allow_prebuilt_kernels = True + self.allow_prebuilt_kernels = True def __str__(self): if not self.openmp_enabled: @@ -415,8 +414,18 @@ def compile_kernel( log.debug(f"cffi def {pyname} {signature}") if self.openmp_enabled: - ffi_interface.cdef("void omp_set_num_threads(int);") - ffi_interface.cdef("int omp_get_max_threads();") + # The wrapper is needed to ensure that the omp functions are linked + ffi_interface.cdef("void xo_omp_set_num_threads(int);") + ffi_interface.cdef("int xo_omp_get_max_threads();") + specialized_source += """ + void xo_omp_set_num_threads(int num_threads) { + omp_set_num_threads(num_threads); + } + + int xo_omp_get_max_threads(void) { + return omp_get_max_threads(); + } + """ # Compile xtr_compile_args = ["-std=c99", "-DXO_CONTEXT_CPU"] @@ -528,8 +537,8 @@ def _load_kernel_module( spec.loader.exec_module(module) if self.openmp_enabled: - self.omp_set_num_threads = module.lib.omp_set_num_threads - self.omp_get_max_threads = module.lib.omp_get_max_threads + self.omp_set_num_threads = module.lib.xo_omp_set_num_threads + self.omp_get_max_threads = module.lib.xo_omp_get_max_threads return module diff --git a/xobjects/struct.py b/xobjects/struct.py index f11c547..67fb062 100644 --- a/xobjects/struct.py +++ b/xobjects/struct.py @@ -518,7 +518,6 @@ def compile_class_kernels( extra_classes=(), extra_compile_args=(), ): - if context.allow_prebuilt_kernels: _print_state = Print.suppress Print.suppress = True @@ -532,6 +531,7 @@ def compile_class_kernels( config={}, tracker_element_classes=[], classes=list(extra_classes) + [cls], + context=context, ) except ImportError: kernel_info = None