Videre

2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/init.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/init.py
@@ -0,0 +1,8 @@
+from numba.cuda.testing import ensure_supported_ccs_initialized
+from numba.testing import load_testsuite
+import os
+
+
+def load_tests(loader, tests, pattern):
+    ensure_supported_ccs_initialized()
+    return load_testsuite(loader, os.path.dirname(__file__))
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/init.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/init.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/cache_usecases.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/cache_usecases.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/cache_with_cpu_usecases.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/cache_with_cpu_usecases.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/extensions_usecases.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/extensions_usecases.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/recursion_usecases.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/recursion_usecases.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_alignment.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_alignment.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array_args.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array_args.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array_methods.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_array_methods.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_atomics.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_atomics.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_blackscholes.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_blackscholes.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_boolean.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_boolean.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_caching.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_caching.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_casting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_casting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cffi.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cffi.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_compiler.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_compiler.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_complex.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_complex.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_complex_kernel.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_complex_kernel.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_const_string.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_const_string.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_constmem.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_constmem.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cooperative_groups.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cooperative_groups.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cuda_array_interface.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cuda_array_interface.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cuda_jit_no_types.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_cuda_jit_no_types.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_datetime.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_datetime.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_debug.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_debug.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_debuginfo.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_debuginfo.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_device_func.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_device_func.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_dispatcher.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_dispatcher.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_enums.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_enums.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_errors.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_errors.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_exception.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_exception.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_extending.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_extending.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_fastmath.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_fastmath.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_forall.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_forall.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_freevar.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_freevar.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_frexp_ldexp.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_frexp_ldexp.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_globals.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_globals.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc_scalar.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc_scalar.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc_scheduling.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_gufunc_scheduling.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_idiv.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_idiv.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_inspect.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_inspect.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_intrinsics.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_intrinsics.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_ipc.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_ipc.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_iterators.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_iterators.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_lang.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_lang.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_laplace.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_laplace.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_libdevice.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_libdevice.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_lineinfo.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_lineinfo.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_localmem.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_localmem.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_mandel.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_mandel.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_math.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_math.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_matmul.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_matmul.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_minmax.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_minmax.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_montecarlo.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_montecarlo.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multigpu.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multigpu.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multiprocessing.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multiprocessing.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multithreads.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_multithreads.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_nondet.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_nondet.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_operator.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_operator.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_optimization.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_optimization.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_overload.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_overload.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_powi.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_powi.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_print.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_print.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_py2_div_issue.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_py2_div_issue.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_random.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_random.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_record_dtype.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_record_dtype.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_recursion.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_recursion.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_reduction.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_reduction.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_retrieve_autoconverted_arrays.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_retrieve_autoconverted_arrays.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_serialize.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_serialize.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_slicing.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_slicing.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sm.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sm.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sm_creation.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sm_creation.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sync.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_sync.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_transpose.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_transpose.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_ufuncs.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_ufuncs.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_userexc.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_userexc.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vector_type.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vector_type.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_complex.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_complex.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_decor.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_decor.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_device.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_device.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_scalar_arg.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_vectorize_scalar_arg.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_warning.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_warning.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_warp_ops.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/pycache/test_warp_ops.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/cache_usecases.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/cache_usecases.py
@@ -0,0 +1,234 @@
+from numba import cuda
+from numba.cuda.testing import CUDATestCase
+import numpy as np
+import sys
+
+
+class UseCase:
+    """
+    Provide a way to call a kernel as if it were a function.
+
+    This allows the CUDA cache tests to closely match the CPU cache tests, and
+    also to support calling cache use cases as njitted functions. The class
+    wraps a function that takes an array for the return value and arguments,
+    and provides an interface that accepts arguments, launches the kernel
+    appropriately, and returns the stored return value.
+
+    The return type is inferred from the type of the first argument, unless it
+    is explicitly overridden by the ``retty`` kwarg.
+    """
+    def __init__(self, func, retty=None):
+        self._func = func
+        self._retty = retty
+
+    def __call__(self, *args):
+        array_args = [np.asarray(arg) for arg in args]
+        if self._retty:
+            array_return = np.ndarray((), dtype=self._retty)
+        else:
+            array_return = np.zeros_like(array_args[0])
+
+        self._call(array_return, *array_args)
+        return array_return[()]
+
+    @property
+    def func(self):
+        return self._func
+
+
+class CUDAUseCase(UseCase):
+    def _call(self, ret, *args):
+        self._func[1, 1](ret, *args)
+
+
+@cuda.jit(cache=True)
+def add_usecase_kernel(r, x, y):
+    r[()] = x[()] + y[()] + Z
+
+
+@cuda.jit(cache=False)
+def add_nocache_usecase_kernel(r, x, y):
+    r[()] = x[()] + y[()] + Z
+
+
+add_usecase = CUDAUseCase(add_usecase_kernel)
+add_nocache_usecase = CUDAUseCase(add_nocache_usecase_kernel)
+
+Z = 1
+
+
+# Inner / outer cached / uncached cases
+
+@cuda.jit(cache=True)
+def inner(x, y):
+    return x + y + Z
+
+
+@cuda.jit(cache=True)
+def outer_kernel(r, x, y):
+    r[()] = inner(-y[()], x[()])
+
+
+@cuda.jit(cache=False)
+def outer_uncached_kernel(r, x, y):
+    r[()] = inner(-y[()], x[()])
+
+
+outer = CUDAUseCase(outer_kernel)
+outer_uncached = CUDAUseCase(outer_uncached_kernel)
+
+
+# Exercise returning a record instance.  This used to hardcode the dtype
+# pointer's value in the bitcode.
+
+packed_record_type = np.dtype([('a', np.int8), ('b', np.float64)])
+aligned_record_type = np.dtype([('a', np.int8), ('b', np.float64)], align=True)
+
+packed_arr = np.empty(2, dtype=packed_record_type)
+for i in range(packed_arr.size):
+    packed_arr[i]['a'] = i + 1
+    packed_arr[i]['b'] = i + 42.5
+
+aligned_arr = np.array(packed_arr, dtype=aligned_record_type)
+
+
+@cuda.jit(cache=True)
+def record_return(r, ary, i):
+    r[()] = ary[i]
+
+
+record_return_packed = CUDAUseCase(record_return, retty=packed_record_type)
+record_return_aligned = CUDAUseCase(record_return, retty=aligned_record_type)
+
+
+# Closure test cases
+
+def make_closure(x):
+    @cuda.jit(cache=True)
+    def closure(r, y):
+        r[()] = x + y[()]
+
+    return CUDAUseCase(closure)
+
+
+closure1 = make_closure(3)
+closure2 = make_closure(5)
+closure3 = make_closure(7)
+closure4 = make_closure(9)
+
+
+# Ambiguous / renamed functions
+
+@cuda.jit(cache=True)
+def ambiguous_function(r, x):
+    r[()] = x[()] + 2
+
+
+renamed_function1 = CUDAUseCase(ambiguous_function)
+
+
+@cuda.jit(cache=True)
+def ambiguous_function(r, x):
+    r[()] = x[()] + 6
+
+
+renamed_function2 = CUDAUseCase(ambiguous_function)
+
+
+@cuda.jit(cache=True)
+def many_locals():
+    aa = cuda.local.array((1, 1), np.float64)
+    ab = cuda.local.array((1, 1), np.float64)
+    ac = cuda.local.array((1, 1), np.float64)
+    ad = cuda.local.array((1, 1), np.float64)
+    ae = cuda.local.array((1, 1), np.float64)
+    af = cuda.local.array((1, 1), np.float64)
+    ag = cuda.local.array((1, 1), np.float64)
+    ah = cuda.local.array((1, 1), np.float64)
+    ai = cuda.local.array((1, 1), np.float64)
+    aj = cuda.local.array((1, 1), np.float64)
+    ak = cuda.local.array((1, 1), np.float64)
+    al = cuda.local.array((1, 1), np.float64)
+    am = cuda.local.array((1, 1), np.float64)
+    an = cuda.local.array((1, 1), np.float64)
+    ao = cuda.local.array((1, 1), np.float64)
+    ap = cuda.local.array((1, 1), np.float64)
+    ar = cuda.local.array((1, 1), np.float64)
+    at = cuda.local.array((1, 1), np.float64)
+    au = cuda.local.array((1, 1), np.float64)
+    av = cuda.local.array((1, 1), np.float64)
+    aw = cuda.local.array((1, 1), np.float64)
+    ax = cuda.local.array((1, 1), np.float64)
+    ay = cuda.local.array((1, 1), np.float64)
+    az = cuda.local.array((1, 1), np.float64)
+
+    aa[:] = 0
+    ab[:] = 0
+    ac[:] = 0
+    ad[:] = 0
+    ae[:] = 0
+    af[:] = 0
+    ag[:] = 0
+    ah[:] = 0
+    ai[:] = 0
+    aj[:] = 0
+    ak[:] = 0
+    al[:] = 0
+    am[:] = 0
+    an[:] = 0
+    ao[:] = 0
+    ap[:] = 0
+    ar[:] = 0
+    at[:] = 0
+    au[:] = 0
+    av[:] = 0
+    aw[:] = 0
+    ax[:] = 0
+    ay[:] = 0
+    az[:] = 0
+
+
+# Simple use case for multiprocessing test
+
+@cuda.jit(cache=True)
+def simple_usecase_kernel(r, x):
+    r[()] = x[()]
+
+
+simple_usecase_caller = CUDAUseCase(simple_usecase_kernel)
+
+
+# Usecase with cooperative groups
+
+@cuda.jit(cache=True)
+def cg_usecase_kernel(r, x):
+    grid = cuda.cg.this_grid()
+    grid.sync()
+
+
+cg_usecase = CUDAUseCase(cg_usecase_kernel)
+
+
+class _TestModule(CUDATestCase):
+    """
+    Tests for functionality of this module's functions.
+    Note this does not define any "test_*" method, instead check_module()
+    should be called by hand.
+    """
+
+    def check_module(self, mod):
+        self.assertPreciseEqual(mod.add_usecase(2, 3), 6)
+        self.assertPreciseEqual(mod.outer_uncached(3, 2), 2)
+        self.assertPreciseEqual(mod.outer(3, 2), 2)
+
+        packed_rec = mod.record_return_packed(mod.packed_arr, 1)
+        self.assertPreciseEqual(tuple(packed_rec), (2, 43.5))
+        aligned_rec = mod.record_return_aligned(mod.aligned_arr, 1)
+        self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
+
+        mod.simple_usecase_caller(2)
+
+
+def self_test():
+    mod = sys.modules[__name__]
+    _TestModule().check_module(mod)
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py
@@ -0,0 +1,41 @@
+import sys
+
+from numba import cuda, njit
+from numba.cuda.testing import CUDATestCase
+from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase, UseCase
+
+
+class CPUUseCase(UseCase):
+    def _call(self, ret, *args):
+        self._func(ret, *args)
+
+
+# Using the same function as a cached CPU and CUDA-jitted function
+
+def target_shared_assign(r, x):
+    r[()] = x[()]
+
+
+assign_cuda_kernel = cuda.jit(cache=True)(target_shared_assign)
+assign_cuda = CUDAUseCase(assign_cuda_kernel)
+assign_cpu_jitted = njit(cache=True)(target_shared_assign)
+assign_cpu = CPUUseCase(assign_cpu_jitted)
+
+
+class _TestModule(CUDATestCase):
+    """
+    Tests for functionality of this module's functions.
+    Note this does not define any "test_*" method, instead check_module()
+    should be called by hand.
+    """
+
+    def check_module(self, mod):
+        self.assertPreciseEqual(mod.assign_cpu(5), 5)
+        self.assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
+        self.assertPreciseEqual(mod.assign_cuda(5), 5)
+        self.assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
+
+
+def self_test():
+    mod = sys.modules[__name__]
+    _TestModule().check_module(mod)
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/extensions_usecases.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/extensions_usecases.py
@@ -0,0 +1,58 @@
+from numba import types
+from numba.core import config
+
+
+class TestStruct:
+    def __init__(self, x, y):
+        self.x = x
+        self.y = y
+
+
+class TestStructModelType(types.Type):
+    def __init__(self):
+        super().__init__(name="TestStructModelType")
+
+
+test_struct_model_type = TestStructModelType()
+
+
+if not config.ENABLE_CUDASIM:
+    from numba import int32
+    from numba.core.extending import (
+        models,
+        register_model,
+        make_attribute_wrapper,
+        typeof_impl,
+        type_callable
+    )
+    from numba.cuda.cudaimpl import lower
+    from numba.core import cgutils
+
+    @typeof_impl.register(TestStruct)
+    def typeof_teststruct(val, c):
+        return test_struct_model_type
+
+    @register_model(TestStructModelType)
+    class TestStructModel(models.StructModel):
+        def __init__(self, dmm, fe_type):
+            members = [("x", int32), ("y", int32)]
+            super().__init__(dmm, fe_type, members)
+
+    make_attribute_wrapper(TestStructModelType, 'x', 'x')
+    make_attribute_wrapper(TestStructModelType, 'y', 'y')
+
+    @type_callable(TestStruct)
+    def type_test_struct(context):
+        def typer(x, y):
+            if isinstance(x, types.Integer) and isinstance(y, types.Integer):
+                return test_struct_model_type
+        return typer
+
+    @lower(TestStruct, types.Integer, types.Integer)
+    def lower_test_type_ctor(context, builder, sig, args):
+        obj = cgutils.create_struct_proxy(
+            test_struct_model_type
+        )(context, builder)
+        obj.x = args[0]
+        obj.y = args[1]
+        return obj._getvalue()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/recursion_usecases.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/recursion_usecases.py
@@ -0,0 +1,100 @@
+"""
+Usecases of recursive functions in the CUDA target, many derived from
+numba/tests/recursion_usecases.py.
+
+Some functions are compiled at import time, hence a separate module.
+"""
+
+from numba import cuda
+
+
+@cuda.jit("i8(i8)", device=True)
+def fib1(n):
+    if n < 2:
+        return n
+    # Note the second call does not use a named argument, unlike the CPU target
+    # usecase
+    return fib1(n - 1) + fib1(n - 2)
+
+
+def make_fib2():
+    @cuda.jit("i8(i8)", device=True)
+    def fib2(n):
+        if n < 2:
+            return n
+        return fib2(n - 1) + fib2(n - 2)
+
+    return fib2
+
+
+fib2 = make_fib2()
+
+
+@cuda.jit
+def type_change_self(x, y):
+    if x > 1 and y > 0:
+        return x + type_change_self(x - y, y)
+    else:
+        return y
+
+
+# Implicit signature
+@cuda.jit(device=True)
+def fib3(n):
+    if n < 2:
+        return n
+
+    return fib3(n - 1) + fib3(n - 2)
+
+
+# Run-away self recursion
+@cuda.jit(device=True)
+def runaway_self(x):
+    return runaway_self(x)
+
+
+@cuda.jit(device=True)
+def raise_self(x):
+    if x == 1:
+        raise ValueError("raise_self")
+    elif x > 0:
+        return raise_self(x - 1)
+    else:
+        return 1
+
+
+@cuda.jit(debug=True, opt=False)
+def raise_self_kernel(x):
+    raise_self(x)
+
+
+def make_optional_return_case(jit=lambda x: x):
+    @jit
+    def foo(x):
+        if x > 5:
+            return x - 1
+        else:
+            return
+
+    @jit
+    def bar(x):
+        out = foo(x)
+        if out is None:
+            return out
+        elif out < 8:
+            return out
+        else:
+            return x * bar(out)
+
+    return bar
+
+
+def make_growing_tuple_case(jit=lambda x: x):
+    # From issue #4387
+    @jit
+    def make_list(n):
+        if n <= 0:
+            return None
+
+        return (n, make_list(n - 1))
+    return make_list
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_alignment.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_alignment.py
@@ -0,0 +1,42 @@
+import numpy as np
+from numba import from_dtype, cuda
+from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+import unittest
+
+
+class TestAlignment(CUDATestCase):
+    def test_record_alignment(self):
+        rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')], align=True)
+        rec = from_dtype(rec_dtype)
+
+        @cuda.jit((rec[:],))
+        def foo(a):
+            i = cuda.grid(1)
+            a[i].a = a[i].b
+
+        a_recarray = np.recarray(3, dtype=rec_dtype)
+        for i in range(a_recarray.size):
+            a_rec = a_recarray[i]
+            a_rec.a = 0
+            a_rec.b = (i + 1) * 123
+
+        foo[1, 3](a_recarray)
+
+        self.assertTrue(np.all(a_recarray.a == a_recarray.b))
+
+    @skip_on_cudasim('Simulator does not check alignment')
+    def test_record_alignment_error(self):
+        rec_dtype = np.dtype([('a', 'int32'), ('b', 'float64')])
+        rec = from_dtype(rec_dtype)
+
+        with self.assertRaises(Exception) as raises:
+            @cuda.jit((rec[:],))
+            def foo(a):
+                i = cuda.grid(1)
+                a[i].a = a[i].b
+
+        self.assertTrue('type float64 is not aligned' in str(raises.exception))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array.py
@@ -0,0 +1,260 @@
+import numpy as np
+
+from numba.cuda.testing import unittest, CUDATestCase
+from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
+from numba import config, cuda
+
+
+if config.ENABLE_CUDASIM:
+    ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.pinned_array_like)
+else:
+    ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.mapped_array_like,
+                            cuda.pinned_array_like)
+
+
+class TestCudaArray(CUDATestCase):
+    def test_gpu_array_zero_length(self):
+        x = np.arange(0)
+        dx = cuda.to_device(x)
+        hx = dx.copy_to_host()
+        self.assertEqual(x.shape, dx.shape)
+        self.assertEqual(x.size, dx.size)
+        self.assertEqual(x.shape, hx.shape)
+        self.assertEqual(x.size, hx.size)
+
+    def test_null_shape(self):
+        null_shape = ()
+        shape1 = cuda.device_array(()).shape
+        shape2 = cuda.device_array_like(np.ndarray(())).shape
+        self.assertEqual(shape1, null_shape)
+        self.assertEqual(shape2, null_shape)
+
+    def test_gpu_array_strided(self):
+
+        @cuda.jit('void(double[:])')
+        def kernel(x):
+            i = cuda.grid(1)
+            if i < x.shape[0]:
+                x[i] = i
+
+        x = np.arange(10, dtype=np.double)
+        y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte)
+        z = np.ndarray(9, buffer=y[4:-4], dtype=np.double)
+        kernel[10, 10](z)
+        self.assertTrue(np.allclose(z, list(range(9))))
+
+    def test_gpu_array_interleaved(self):
+
+        @cuda.jit('void(double[:], double[:])')
+        def copykernel(x, y):
+            i = cuda.grid(1)
+            if i < x.shape[0]:
+                x[i] = i
+                y[i] = i
+
+        x = np.arange(10, dtype=np.double)
+        y = x[:-1:2]
+        # z = x[1::2]
+        # n = y.size
+        try:
+            cuda.devicearray.auto_device(y)
+        except ValueError:
+            pass
+        else:
+            raise AssertionError("Should raise exception complaining the "
+                                 "contiguous-ness of the array.")
+            # Should we handle this use case?
+            # assert z.size == y.size
+            # copykernel[1, n](y, x)
+            # print(y, z)
+            # assert np.all(y == z)
+            # assert np.all(y == list(range(n)))
+
+    def test_auto_device_const(self):
+        d, _ = cuda.devicearray.auto_device(2)
+        self.assertTrue(np.all(d.copy_to_host() == np.array(2)))
+
+    def _test_array_like_same(self, like_func, array):
+        """
+        Tests of *_array_like where shape, strides, dtype, and flags should
+        all be equal.
+        """
+        array_like = like_func(array)
+        self.assertEqual(array.shape, array_like.shape)
+        self.assertEqual(array.strides, array_like.strides)
+        self.assertEqual(array.dtype, array_like.dtype)
+        self.assertEqual(array.flags['C_CONTIGUOUS'],
+                         array_like.flags['C_CONTIGUOUS'])
+        self.assertEqual(array.flags['F_CONTIGUOUS'],
+                         array_like.flags['F_CONTIGUOUS'])
+
+    def test_array_like_1d(self):
+        d_a = cuda.device_array(10, order='C')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_2d(self):
+        d_a = cuda.device_array((10, 12), order='C')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_2d_transpose(self):
+        d_a = cuda.device_array((10, 12), order='C')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_3d(self):
+        d_a = cuda.device_array((10, 12, 14), order='C')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_1d_f(self):
+        d_a = cuda.device_array(10, order='F')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_2d_f(self):
+        d_a = cuda.device_array((10, 12), order='F')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_2d_f_transpose(self):
+        d_a = cuda.device_array((10, 12), order='F')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def test_array_like_3d_f(self):
+        d_a = cuda.device_array((10, 12, 14), order='F')
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_same(like_func, d_a)
+
+    def _test_array_like_view(self, like_func, view, d_view):
+        """
+        Tests of device_array_like where the original array is a view - the
+        strides should not be equal because a contiguous array is expected.
+        """
+        nb_like = like_func(d_view)
+        self.assertEqual(d_view.shape, nb_like.shape)
+        self.assertEqual(d_view.dtype, nb_like.dtype)
+
+        # Use NumPy as a reference for the expected strides
+        np_like = np.zeros_like(view)
+        self.assertEqual(nb_like.strides, np_like.strides)
+        self.assertEqual(nb_like.flags['C_CONTIGUOUS'],
+                         np_like.flags['C_CONTIGUOUS'])
+        self.assertEqual(nb_like.flags['F_CONTIGUOUS'],
+                         np_like.flags['F_CONTIGUOUS'])
+
+    def test_array_like_1d_view(self):
+        shape = 10
+        view = np.zeros(shape)[::2]
+        d_view = cuda.device_array(shape)[::2]
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_view(like_func, view, d_view)
+
+    def test_array_like_1d_view_f(self):
+        shape = 10
+        view = np.zeros(shape, order='F')[::2]
+        d_view = cuda.device_array(shape, order='F')[::2]
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_view(like_func, view, d_view)
+
+    def test_array_like_2d_view(self):
+        shape = (10, 12)
+        view = np.zeros(shape)[::2, ::2]
+        d_view = cuda.device_array(shape)[::2, ::2]
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_view(like_func, view, d_view)
+
+    def test_array_like_2d_view_f(self):
+        shape = (10, 12)
+        view = np.zeros(shape, order='F')[::2, ::2]
+        d_view = cuda.device_array(shape, order='F')[::2, ::2]
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_view(like_func, view, d_view)
+
+    @skip_on_cudasim('Numba and NumPy stride semantics differ for transpose')
+    def test_array_like_2d_view_transpose_device(self):
+        shape = (10, 12)
+        d_view = cuda.device_array(shape)[::2, ::2].T
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                # This is a special case (see issue #4974) because creating the
+                # transpose creates a new contiguous allocation with different
+                # strides.  In this case, rather than comparing against NumPy,
+                # we can only compare against expected values.
+                like = like_func(d_view)
+                self.assertEqual(d_view.shape, like.shape)
+                self.assertEqual(d_view.dtype, like.dtype)
+                self.assertEqual((40, 8), like.strides)
+                self.assertTrue(like.flags['C_CONTIGUOUS'])
+                self.assertFalse(like.flags['F_CONTIGUOUS'])
+
+    @skip_unless_cudasim('Numba and NumPy stride semantics differ for '
+                         'transpose')
+    def test_array_like_2d_view_transpose_simulator(self):
+        shape = (10, 12)
+        view = np.zeros(shape)[::2, ::2].T
+        d_view = cuda.device_array(shape)[::2, ::2].T
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                # On the simulator, the transpose has different strides to on a
+                # CUDA device (See issue #4974). Here we can compare strides
+                # against NumPy as a reference.
+                np_like = np.zeros_like(view)
+                nb_like = like_func(d_view)
+                self.assertEqual(d_view.shape, nb_like.shape)
+                self.assertEqual(d_view.dtype, nb_like.dtype)
+                self.assertEqual(np_like.strides, nb_like.strides)
+                self.assertEqual(np_like.flags['C_CONTIGUOUS'],
+                                 nb_like.flags['C_CONTIGUOUS'])
+                self.assertEqual(np_like.flags['F_CONTIGUOUS'],
+                                 nb_like.flags['F_CONTIGUOUS'])
+
+    def test_array_like_2d_view_f_transpose(self):
+        shape = (10, 12)
+        view = np.zeros(shape, order='F')[::2, ::2].T
+        d_view = cuda.device_array(shape, order='F')[::2, ::2].T
+        for like_func in ARRAY_LIKE_FUNCTIONS:
+            with self.subTest(like_func=like_func):
+                self._test_array_like_view(like_func, view, d_view)
+
+    @skip_on_cudasim('Kernel overloads not created in the simulator')
+    def test_issue_4628(self):
+        # CUDA Device arrays were reported as always being typed with 'A' order
+        # so launching the kernel with a host array and then a device array
+        # resulted in two overloads being compiled - one for 'C' order from
+        # the host array, and one for 'A' order from the device array. With the
+        # resolution of this issue, the order of the device array is also 'C',
+        # so after the kernel launches there should only be one overload of
+        # the function.
+        @cuda.jit
+        def func(A, out):
+            i = cuda.grid(1)
+            out[i] = A[i] * 2
+
+        n = 128
+        a = np.ones((n,))
+        d_a = cuda.to_device(a)
+        result = np.zeros((n,))
+
+        func[1, 128](a, result)
+        func[1, 128](d_a, result)
+
+        self.assertEqual(1, len(func.overloads))
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array_args.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array_args.py
@@ -0,0 +1,224 @@
+import numpy as np
+from collections import namedtuple
+
+from numba import cuda
+from numba.core.errors import TypingError
+from numba.cuda.testing import skip_on_cudasim, unittest, CUDATestCase
+
+
+class TestCudaArrayArg(CUDATestCase):
+    def test_array_ary(self):
+
+        @cuda.jit('double(double[:],int64)', device=True, inline=True)
+        def device_function(a, c):
+            return a[c]
+
+        @cuda.jit('void(double[:],double[:])')
+        def kernel(x, y):
+            i = cuda.grid(1)
+            y[i] = device_function(x, i)
+
+        x = np.arange(10, dtype=np.double)
+        y = np.zeros_like(x)
+        kernel[10, 1](x, y)
+        self.assertTrue(np.all(x == y))
+
+    def test_unituple(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = x[0]
+            r[1] = x[1]
+            r[2] = x[2]
+
+        x = (1, 2, 3)
+        r = np.zeros(len(x), dtype=np.int64)
+        f[1, 1](r, x)
+
+        for i in range(len(x)):
+            self.assertEqual(r[i], x[i])
+
+    def test_tuple(self):
+        @cuda.jit
+        def f(r1, r2, x):
+            r1[0] = x[0]
+            r1[1] = x[1]
+            r1[2] = x[2]
+            r2[0] = x[3]
+            r2[1] = x[4]
+            r2[2] = x[5]
+
+        x = (1, 2, 3, 4.5, 5.5, 6.5)
+        r1 = np.zeros(len(x) // 2, dtype=np.int64)
+        r2 = np.zeros(len(x) // 2, dtype=np.float64)
+        f[1, 1](r1, r2, x)
+
+        for i in range(len(r1)):
+            self.assertEqual(r1[i], x[i])
+
+        for i in range(len(r2)):
+            self.assertEqual(r2[i], x[i + len(r1)])
+
+    def test_namedunituple(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = x.x
+            r[1] = x.y
+
+        Point = namedtuple('Point', ('x', 'y'))
+        x = Point(1, 2)
+        r = np.zeros(len(x), dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], x.x)
+        self.assertEqual(r[1], x.y)
+
+    def test_namedtuple(self):
+        @cuda.jit
+        def f(r1, r2, x):
+            r1[0] = x.x
+            r1[1] = x.y
+            r2[0] = x.r
+
+        Point = namedtuple('Point', ('x', 'y', 'r'))
+        x = Point(1, 2, 2.236)
+        r1 = np.zeros(2, dtype=np.int64)
+        r2 = np.zeros(1, dtype=np.float64)
+        f[1, 1](r1, r2, x)
+
+        self.assertEqual(r1[0], x.x)
+        self.assertEqual(r1[1], x.y)
+        self.assertEqual(r2[0], x.r)
+
+    def test_empty_tuple(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = len(x)
+
+        x = tuple()
+        r = np.ones(1, dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], 0)
+
+    def test_tuple_of_empty_tuples(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = len(x)
+            r[1] = len(x[0])
+
+        x = ((), (), ())
+        r = np.ones(2, dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], 3)
+        self.assertEqual(r[1], 0)
+
+    def test_tuple_of_tuples(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = len(x)
+            r[1] = len(x[0])
+            r[2] = len(x[1])
+            r[3] = len(x[2])
+            r[4] = x[1][0]
+            r[5] = x[1][1]
+            r[6] = x[2][0]
+            r[7] = x[2][1]
+            r[8] = x[2][2]
+
+        x = ((), (5, 6), (8, 9, 10))
+        r = np.ones(9, dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], 3)
+        self.assertEqual(r[1], 0)
+        self.assertEqual(r[2], 2)
+        self.assertEqual(r[3], 3)
+        self.assertEqual(r[4], 5)
+        self.assertEqual(r[5], 6)
+        self.assertEqual(r[6], 8)
+        self.assertEqual(r[7], 9)
+        self.assertEqual(r[8], 10)
+
+    def test_tuple_of_tuples_and_scalars(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = len(x)
+            r[1] = len(x[0])
+            r[2] = x[0][0]
+            r[3] = x[0][1]
+            r[4] = x[0][2]
+            r[5] = x[1]
+
+        x = ((6, 5, 4), 7)
+        r = np.ones(9, dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], 2)
+        self.assertEqual(r[1], 3)
+        self.assertEqual(r[2], 6)
+        self.assertEqual(r[3], 5)
+        self.assertEqual(r[4], 4)
+        self.assertEqual(r[5], 7)
+
+    def test_tuple_of_arrays(self):
+        @cuda.jit
+        def f(x):
+            i = cuda.grid(1)
+            if i < len(x[0]):
+                x[0][i] = x[1][i] + x[2][i]
+
+        N = 10
+        x0 = np.zeros(N)
+        x1 = np.ones_like(x0)
+        x2 = x1 * 3
+        x = (x0, x1, x2)
+        f[1, N](x)
+
+        np.testing.assert_equal(x0, x1 + x2)
+
+    def test_tuple_of_array_scalar_tuple(self):
+        @cuda.jit
+        def f(r, x):
+            r[0] = x[0][0]
+            r[1] = x[0][1]
+            r[2] = x[1]
+            r[3] = x[2][0]
+            r[4] = x[2][1]
+
+        z = np.arange(2, dtype=np.int64)
+        x = (2 * z, 10, (4, 3))
+        r = np.zeros(5, dtype=np.int64)
+        f[1, 1](r, x)
+
+        self.assertEqual(r[0], 0)
+        self.assertEqual(r[1], 2)
+        self.assertEqual(r[2], 10)
+        self.assertEqual(r[3], 4)
+        self.assertEqual(r[4], 3)
+
+
+class TestDatetimeIssues(CUDATestCase):
+    # See also numba.tests.test_npdatetime.TestDatetimeIssues.
+
+    @skip_on_cudasim("Typing not used on cudasim")
+    def test_10y_issue_9585(self):
+        @cuda.jit
+        def f(x):
+            return x + 1
+
+        arr = np.array('2010', dtype='datetime64[10Y]')
+
+        with self.assertRaises(TypingError) as e:
+            f[1, 1](arr)
+
+        # Note that the CUDA target doesn't report which argument caused the
+        # exception, so we can't check for it here as we do with the CPU
+        # target.
+        message = e.exception.args[0]
+        unsupported_type = "Unsupported array dtype: datetime64[10Y]"
+        self.assertIn(unsupported_type, message)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array_methods.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_array_methods.py
@@ -0,0 +1,35 @@
+import numpy as np
+from numba import cuda
+from numba.cuda.testing import CUDATestCase
+import unittest
+
+
+def reinterpret_array_type(byte_arr, start, stop, output):
+    # Tested with just one thread
+    val = byte_arr[start:stop].view(np.int32)[0]
+    output[0] = val
+
+
+class TestCudaArrayMethods(CUDATestCase):
+    def test_reinterpret_array_type(self):
+        """
+        Reinterpret byte array as int32 in the GPU.
+        """
+        pyfunc = reinterpret_array_type
+        kernel = cuda.jit(pyfunc)
+
+        byte_arr = np.arange(256, dtype=np.uint8)
+        itemsize = np.dtype(np.int32).itemsize
+        for start in range(0, 256, itemsize):
+            stop = start + itemsize
+            expect = byte_arr[start:stop].view(np.int32)[0]
+
+            output = np.zeros(1, dtype=np.int32)
+            kernel[1, 1](byte_arr, start, stop, output)
+
+            got = output[0]
+            self.assertEqual(expect, got)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_atomics.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_atomics.py
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_blackscholes.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_blackscholes.py
@@ -0,0 +1,120 @@
+import numpy as np
+import math
+from numba import cuda, double, void
+from numba.cuda.testing import unittest, CUDATestCase
+
+
+RISKFREE = 0.02
+VOLATILITY = 0.30
+
+A1 = 0.31938153
+A2 = -0.356563782
+A3 = 1.781477937
+A4 = -1.821255978
+A5 = 1.330274429
+RSQRT2PI = 0.39894228040143267793994605993438
+
+
+def cnd(d):
+    K = 1.0 / (1.0 + 0.2316419 * np.abs(d))
+    ret_val = (RSQRT2PI * np.exp(-0.5 * d * d) *
+               (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
+    return np.where(d > 0, 1.0 - ret_val, ret_val)
+
+
+def black_scholes(callResult, putResult, stockPrice, optionStrike, optionYears,
+                  Riskfree, Volatility):
+    S = stockPrice
+    X = optionStrike
+    T = optionYears
+    R = Riskfree
+    V = Volatility
+    sqrtT = np.sqrt(T)
+    d1 = (np.log(S / X) + (R + 0.5 * V * V) * T) / (V * sqrtT)
+    d2 = d1 - V * sqrtT
+    cndd1 = cnd(d1)
+    cndd2 = cnd(d2)
+
+    expRT = np.exp(- R * T)
+    callResult[:] = (S * cndd1 - X * expRT * cndd2)
+    putResult[:] = (X * expRT * (1.0 - cndd2) - S * (1.0 - cndd1))
+
+
+def randfloat(rand_var, low, high):
+    return (1.0 - rand_var) * low + rand_var * high
+
+
+class TestBlackScholes(CUDATestCase):
+    def test_blackscholes(self):
+        OPT_N = 400
+        iterations = 2
+
+        stockPrice = randfloat(np.random.random(OPT_N), 5.0, 30.0)
+        optionStrike = randfloat(np.random.random(OPT_N), 1.0, 100.0)
+        optionYears = randfloat(np.random.random(OPT_N), 0.25, 10.0)
+
+        callResultNumpy = np.zeros(OPT_N)
+        putResultNumpy = -np.ones(OPT_N)
+
+        callResultNumba = np.zeros(OPT_N)
+        putResultNumba = -np.ones(OPT_N)
+
+        # numpy
+        for i in range(iterations):
+            black_scholes(callResultNumpy, putResultNumpy, stockPrice,
+                          optionStrike, optionYears, RISKFREE, VOLATILITY)
+
+        @cuda.jit(double(double), device=True, inline=True)
+        def cnd_cuda(d):
+            K = 1.0 / (1.0 + 0.2316419 * math.fabs(d))
+            ret_val = (RSQRT2PI * math.exp(-0.5 * d * d) *
+                       (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))))
+            if d > 0:
+                ret_val = 1.0 - ret_val
+            return ret_val
+
+        @cuda.jit(void(double[:], double[:], double[:], double[:], double[:],
+                       double, double))
+        def black_scholes_cuda(callResult, putResult, S, X, T, R, V):
+            i = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
+            if i >= S.shape[0]:
+                return
+            sqrtT = math.sqrt(T[i])
+            d1 = ((math.log(S[i] / X[i]) + (R + 0.5 * V * V) * T[i])
+                  / (V * sqrtT))
+            d2 = d1 - V * sqrtT
+            cndd1 = cnd_cuda(d1)
+            cndd2 = cnd_cuda(d2)
+
+            expRT = math.exp((-1. * R) * T[i])
+            callResult[i] = (S[i] * cndd1 - X[i] * expRT * cndd2)
+            putResult[i] = (X[i] * expRT * (1.0 - cndd2) - S[i] * (1.0 - cndd1))
+
+        # numba
+        blockdim = 512, 1
+        griddim = int(math.ceil(float(OPT_N) / blockdim[0])), 1
+        stream = cuda.stream()
+        d_callResult = cuda.to_device(callResultNumba, stream)
+        d_putResult = cuda.to_device(putResultNumba, stream)
+        d_stockPrice = cuda.to_device(stockPrice, stream)
+        d_optionStrike = cuda.to_device(optionStrike, stream)
+        d_optionYears = cuda.to_device(optionYears, stream)
+
+        for i in range(iterations):
+            black_scholes_cuda[griddim, blockdim, stream](
+                d_callResult, d_putResult, d_stockPrice, d_optionStrike,
+                d_optionYears, RISKFREE, VOLATILITY)
+        d_callResult.copy_to_host(callResultNumba, stream)
+        d_putResult.copy_to_host(putResultNumba, stream)
+        stream.synchronize()
+
+        delta = np.abs(callResultNumpy - callResultNumba)
+        L1norm = delta.sum() / np.abs(callResultNumpy).sum()
+
+        max_abs_err = delta.max()
+        self.assertTrue(L1norm < 1e-13)
+        self.assertTrue(max_abs_err < 1e-13)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_boolean.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_boolean.py
@@ -0,0 +1,24 @@
+import numpy as np
+from numba.cuda.testing import unittest, CUDATestCase
+from numba import cuda
+
+
+def boolean_func(A, vertial):
+    if vertial:
+        A[0] = 123
+    else:
+        A[0] = 321
+
+
+class TestCudaBoolean(CUDATestCase):
+    def test_boolean(self):
+        func = cuda.jit('void(float64[:], bool_)')(boolean_func)
+        A = np.array([0], dtype='float64')
+        func[1, 1](A, True)
+        self.assertTrue(A[0] == 123)
+        func[1, 1](A, False)
+        self.assertTrue(A[0] == 321)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_caching.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_caching.py
@@ -0,0 +1,545 @@
+import multiprocessing
+import os
+import shutil
+import subprocess
+import sys
+import unittest
+import warnings
+
+from numba import cuda
+from numba.core.errors import NumbaWarning
+from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
+                                skip_unless_cc_60, skip_if_cudadevrt_missing,
+                                skip_if_mvc_enabled, test_data_dir)
+from numba.tests.support import SerialMixin
+from numba.tests.test_caching import (DispatcherCacheUsecasesTest,
+                                      skip_bad_access)
+
+
+@skip_on_cudasim('Simulator does not implement caching')
+class CUDACachingTest(SerialMixin, DispatcherCacheUsecasesTest):
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cache_usecases.py")
+    modname = "cuda_caching_test_fodder"
+
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+
+    def test_caching(self):
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+
+        f = mod.add_usecase
+        self.assertPreciseEqual(f(2, 3), 6)
+        self.check_pycache(2)  # 1 index, 1 data
+        self.assertPreciseEqual(f(2.5, 3), 6.5)
+        self.check_pycache(3)  # 1 index, 2 data
+        self.check_hits(f.func, 0, 2)
+
+        f = mod.record_return_aligned
+        rec = f(mod.aligned_arr, 1)
+        self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+        f = mod.record_return_packed
+        rec = f(mod.packed_arr, 1)
+        self.assertPreciseEqual(tuple(rec), (2, 43.5))
+        self.check_pycache(6)  # 2 index, 4 data
+        self.check_hits(f.func, 0, 2)
+
+        # Check the code runs ok from another process
+        self.run_in_separate_process()
+
+    def test_no_caching(self):
+        mod = self.import_module()
+
+        f = mod.add_nocache_usecase
+        self.assertPreciseEqual(f(2, 3), 6)
+        self.check_pycache(0)
+
+    def test_many_locals(self):
+        # Declaring many local arrays creates a very large LLVM IR, which
+        # cannot be pickled due to the level of recursion it requires to
+        # pickle. This test ensures that kernels with many locals (and
+        # therefore large IR) can be cached. See Issue #8373:
+        # https://github.com/numba/numba/issues/8373
+        self.check_pycache(0)
+        mod = self.import_module()
+        f = mod.many_locals
+        f[1, 1]()
+        self.check_pycache(2) # 1 index, 1 data
+
+    def test_closure(self):
+        mod = self.import_module()
+
+        with warnings.catch_warnings():
+            warnings.simplefilter('error', NumbaWarning)
+
+            f = mod.closure1
+            self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
+            f = mod.closure2
+            self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
+            f = mod.closure3
+            self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
+            f = mod.closure4
+            self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
+            self.check_pycache(5) # 1 nbi, 4 nbc
+
+    def test_cache_reuse(self):
+        mod = self.import_module()
+        mod.add_usecase(2, 3)
+        mod.add_usecase(2.5, 3.5)
+        mod.outer_uncached(2, 3)
+        mod.outer(2, 3)
+        mod.record_return_packed(mod.packed_arr, 0)
+        mod.record_return_aligned(mod.aligned_arr, 1)
+        mod.simple_usecase_caller(2)
+        mtimes = self.get_cache_mtimes()
+        # Two signatures compiled
+        self.check_hits(mod.add_usecase.func, 0, 2)
+
+        mod2 = self.import_module()
+        self.assertIsNot(mod, mod2)
+        f = mod2.add_usecase
+        f(2, 3)
+        self.check_hits(f.func, 1, 0)
+        f(2.5, 3.5)
+        self.check_hits(f.func, 2, 0)
+
+        # The files haven't changed
+        self.assertEqual(self.get_cache_mtimes(), mtimes)
+
+        self.run_in_separate_process()
+        self.assertEqual(self.get_cache_mtimes(), mtimes)
+
+    def test_cache_invalidate(self):
+        mod = self.import_module()
+        f = mod.add_usecase
+        self.assertPreciseEqual(f(2, 3), 6)
+
+        # This should change the functions' results
+        with open(self.modfile, "a") as f:
+            f.write("\nZ = 10\n")
+
+        mod = self.import_module()
+        f = mod.add_usecase
+        self.assertPreciseEqual(f(2, 3), 15)
+
+    def test_recompile(self):
+        # Explicit call to recompile() should overwrite the cache
+        mod = self.import_module()
+        f = mod.add_usecase
+        self.assertPreciseEqual(f(2, 3), 6)
+
+        mod = self.import_module()
+        f = mod.add_usecase
+        mod.Z = 10
+        self.assertPreciseEqual(f(2, 3), 6)
+        f.func.recompile()
+        self.assertPreciseEqual(f(2, 3), 15)
+
+        # Freshly recompiled version is re-used from other imports
+        mod = self.import_module()
+        f = mod.add_usecase
+        self.assertPreciseEqual(f(2, 3), 15)
+
+    def test_same_names(self):
+        # Function with the same names should still disambiguate
+        mod = self.import_module()
+        f = mod.renamed_function1
+        self.assertPreciseEqual(f(2), 4)
+        f = mod.renamed_function2
+        self.assertPreciseEqual(f(2), 8)
+
+    @skip_unless_cc_60
+    @skip_if_cudadevrt_missing
+    @skip_if_mvc_enabled('CG not supported with MVC')
+    def test_cache_cg(self):
+        # Functions using cooperative groups should be cacheable. See Issue
+        # #8888: https://github.com/numba/numba/issues/8888
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+
+        mod.cg_usecase(0)
+        self.check_pycache(2)  # 1 index, 1 data
+
+        # Check the code runs ok from another process
+        self.run_in_separate_process()
+
+    @skip_unless_cc_60
+    @skip_if_cudadevrt_missing
+    @skip_if_mvc_enabled('CG not supported with MVC')
+    def test_cache_cg_clean_run(self):
+        # See Issue #9432: https://github.com/numba/numba/issues/9432
+        # If a cached function using CG sync was the first thing to compile,
+        # the compile would fail.
+        self.check_pycache(0)
+
+        # This logic is modelled on run_in_separate_process(), but executes the
+        # CG usecase directly in the subprocess.
+        code = """if 1:
+            import sys
+
+            sys.path.insert(0, %(tempdir)r)
+            mod = __import__(%(modname)r)
+            mod.cg_usecase(0)
+            """ % dict(tempdir=self.tempdir, modname=self.modname)
+
+        popen = subprocess.Popen([sys.executable, "-c", code],
+                                 stdout=subprocess.PIPE,
+                                 stderr=subprocess.PIPE)
+        out, err = popen.communicate(timeout=60)
+        if popen.returncode != 0:
+            raise AssertionError(
+                "process failed with code %s: \n"
+                "stdout follows\n%s\n"
+                "stderr follows\n%s\n"
+                % (popen.returncode, out.decode(), err.decode()),
+            )
+
+    def _test_pycache_fallback(self):
+        """
+        With a disabled __pycache__, test there is a working fallback
+        (e.g. on the user-wide cache dir)
+        """
+        mod = self.import_module()
+        f = mod.add_usecase
+        # Remove this function's cache files at the end, to avoid accumulation
+        # across test calls.
+        self.addCleanup(shutil.rmtree, f.func.stats.cache_path,
+                        ignore_errors=True)
+
+        self.assertPreciseEqual(f(2, 3), 6)
+        # It's a cache miss since the file was copied to a new temp location
+        self.check_hits(f.func, 0, 1)
+
+        # Test re-use
+        mod2 = self.import_module()
+        f = mod2.add_usecase
+        self.assertPreciseEqual(f(2, 3), 6)
+        self.check_hits(f.func, 1, 0)
+
+        # The __pycache__ is empty (otherwise the test's preconditions
+        # wouldn't be met)
+        self.check_pycache(0)
+
+    @skip_bad_access
+    @unittest.skipIf(os.name == "nt",
+                     "cannot easily make a directory read-only on Windows")
+    def test_non_creatable_pycache(self):
+        # Make it impossible to create the __pycache__ directory
+        old_perms = os.stat(self.tempdir).st_mode
+        os.chmod(self.tempdir, 0o500)
+        self.addCleanup(os.chmod, self.tempdir, old_perms)
+
+        self._test_pycache_fallback()
+
+    @skip_bad_access
+    @unittest.skipIf(os.name == "nt",
+                     "cannot easily make a directory read-only on Windows")
+    def test_non_writable_pycache(self):
+        # Make it impossible to write to the __pycache__ directory
+        pycache = os.path.join(self.tempdir, '__pycache__')
+        os.mkdir(pycache)
+        old_perms = os.stat(pycache).st_mode
+        os.chmod(pycache, 0o500)
+        self.addCleanup(os.chmod, pycache, old_perms)
+
+        self._test_pycache_fallback()
+
+    def test_cannot_cache_linking_libraries(self):
+        link = str(test_data_dir / 'jitlink.ptx')
+        msg = 'Cannot pickle CUDACodeLibrary with linking files'
+        with self.assertRaisesRegex(RuntimeError, msg):
+            @cuda.jit('void()', cache=True, link=[link])
+            def f():
+                pass
+
+
+@skip_on_cudasim('Simulator does not implement caching')
+class CUDAAndCPUCachingTest(SerialMixin, DispatcherCacheUsecasesTest):
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
+    modname = "cuda_and_cpu_caching_test_fodder"
+
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+
+    def test_cpu_and_cuda_targets(self):
+        # The same function jitted for CPU and CUDA targets should maintain
+        # separate caches for each target.
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+
+        f_cpu = mod.assign_cpu
+        f_cuda = mod.assign_cuda
+        self.assertPreciseEqual(f_cpu(5), 5)
+        self.check_pycache(2)  # 1 index, 1 data
+        self.assertPreciseEqual(f_cuda(5), 5)
+        self.check_pycache(3)  # 1 index, 2 data
+
+        self.check_hits(f_cpu.func, 0, 1)
+        self.check_hits(f_cuda.func, 0, 1)
+
+        self.assertPreciseEqual(f_cpu(5.5), 5.5)
+        self.check_pycache(4)  # 1 index, 3 data
+        self.assertPreciseEqual(f_cuda(5.5), 5.5)
+        self.check_pycache(5)  # 1 index, 4 data
+
+        self.check_hits(f_cpu.func, 0, 2)
+        self.check_hits(f_cuda.func, 0, 2)
+
+    def test_cpu_and_cuda_reuse(self):
+        # Existing cache files for the CPU and CUDA targets are reused.
+        mod = self.import_module()
+        mod.assign_cpu(5)
+        mod.assign_cpu(5.5)
+        mod.assign_cuda(5)
+        mod.assign_cuda(5.5)
+
+        mtimes = self.get_cache_mtimes()
+
+        # Two signatures compiled
+        self.check_hits(mod.assign_cpu.func, 0, 2)
+        self.check_hits(mod.assign_cuda.func, 0, 2)
+
+        mod2 = self.import_module()
+        self.assertIsNot(mod, mod2)
+        f_cpu = mod2.assign_cpu
+        f_cuda = mod2.assign_cuda
+
+        f_cpu(2)
+        self.check_hits(f_cpu.func, 1, 0)
+        f_cpu(2.5)
+        self.check_hits(f_cpu.func, 2, 0)
+        f_cuda(2)
+        self.check_hits(f_cuda.func, 1, 0)
+        f_cuda(2.5)
+        self.check_hits(f_cuda.func, 2, 0)
+
+        # The files haven't changed
+        self.assertEqual(self.get_cache_mtimes(), mtimes)
+
+        self.run_in_separate_process()
+        self.assertEqual(self.get_cache_mtimes(), mtimes)
+
+
+def get_different_cc_gpus():
+    # Find two GPUs with different Compute Capabilities and return them as a
+    # tuple. If two GPUs with distinct Compute Capabilities cannot be found,
+    # then None is returned.
+    first_gpu = cuda.gpus[0]
+    with first_gpu:
+        first_cc = cuda.current_context().device.compute_capability
+
+    for gpu in cuda.gpus[1:]:
+        with gpu:
+            cc = cuda.current_context().device.compute_capability
+            if cc != first_cc:
+                return (first_gpu, gpu)
+
+    return None
+
+
+@skip_on_cudasim('Simulator does not implement caching')
+class TestMultiCCCaching(SerialMixin, DispatcherCacheUsecasesTest):
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cache_usecases.py")
+    modname = "cuda_multi_cc_caching_test_fodder"
+
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+
+    def test_cache(self):
+        gpus = get_different_cc_gpus()
+        if not gpus:
+            self.skipTest('Need two different CCs for multi-CC cache test')
+
+        self.check_pycache(0)
+        mod = self.import_module()
+        self.check_pycache(0)
+
+        # Step 1. Populate the cache with the first GPU
+        with gpus[0]:
+            f = mod.add_usecase
+            self.assertPreciseEqual(f(2, 3), 6)
+            self.check_pycache(2)  # 1 index, 1 data
+            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            self.check_pycache(3)  # 1 index, 2 data
+            self.check_hits(f.func, 0, 2)
+
+            f = mod.record_return_aligned
+            rec = f(mod.aligned_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+            f = mod.record_return_packed
+            rec = f(mod.packed_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            self.check_pycache(6)  # 2 index, 4 data
+            self.check_hits(f.func, 0, 2)
+
+        # Step 2. Run with the second GPU - under present behaviour this
+        # doesn't further populate the cache.
+        with gpus[1]:
+            f = mod.add_usecase
+            self.assertPreciseEqual(f(2, 3), 6)
+            self.check_pycache(6)  # cache unchanged
+            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            self.check_pycache(6)  # cache unchanged
+            self.check_hits(f.func, 0, 2)
+
+            f = mod.record_return_aligned
+            rec = f(mod.aligned_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+            f = mod.record_return_packed
+            rec = f(mod.packed_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            self.check_pycache(6)  # cache unchanged
+            self.check_hits(f.func, 0, 2)
+
+        # Step 3. Run in a separate module with the second GPU - this populates
+        # the cache for the second CC.
+        mod2 = self.import_module()
+        self.assertIsNot(mod, mod2)
+
+        with gpus[1]:
+            f = mod2.add_usecase
+            self.assertPreciseEqual(f(2, 3), 6)
+            self.check_pycache(7)  # 2 index, 5 data
+            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            self.check_pycache(8)  # 2 index, 6 data
+            self.check_hits(f.func, 0, 2)
+
+            f = mod2.record_return_aligned
+            rec = f(mod.aligned_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+            f = mod2.record_return_packed
+            rec = f(mod.packed_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            self.check_pycache(10)  # 2 index, 8 data
+            self.check_hits(f.func, 0, 2)
+
+        # The following steps check that we can use the NVVM IR loaded from the
+        # cache to generate PTX for a different compute capability to the
+        # cached cubin's CC. To check this, we create another module that loads
+        # the cached version containing a cubin for GPU 1. There will be no
+        # cubin for GPU 0, so when we try to use it the PTX must be generated.
+
+        mod3 = self.import_module()
+        self.assertIsNot(mod, mod3)
+
+        # Step 4. Run with GPU 1 and get a cache hit, loading the cache created
+        # during Step 3.
+        with gpus[1]:
+            f = mod3.add_usecase
+            self.assertPreciseEqual(f(2, 3), 6)
+            self.assertPreciseEqual(f(2.5, 3), 6.5)
+
+            f = mod3.record_return_aligned
+            rec = f(mod.aligned_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+            f = mod3.record_return_packed
+            rec = f(mod.packed_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+        # Step 5. Run with GPU 0 using the module from Step 4, to force PTX
+        # generation from cached NVVM IR.
+        with gpus[0]:
+            f = mod3.add_usecase
+            self.assertPreciseEqual(f(2, 3), 6)
+            self.assertPreciseEqual(f(2.5, 3), 6.5)
+
+            f = mod3.record_return_aligned
+            rec = f(mod.aligned_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+            f = mod3.record_return_packed
+            rec = f(mod.packed_arr, 1)
+            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+
+
+def child_initializer():
+    # Disable occupancy and implicit copy warnings in processes in a
+    # multiprocessing pool.
+    from numba.core import config
+    config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
+    config.CUDA_WARN_ON_IMPLICIT_COPY = 0
+
+
+@skip_on_cudasim('Simulator does not implement caching')
+class TestMultiprocessCache(SerialMixin, DispatcherCacheUsecasesTest):
+
+    # Nested multiprocessing.Pool raises AssertionError:
+    # "daemonic processes are not allowed to have children"
+    _numba_parallel_test_ = False
+
+    here = os.path.dirname(__file__)
+    usecases_file = os.path.join(here, "cache_usecases.py")
+    modname = "cuda_mp_caching_test_fodder"
+
+    def setUp(self):
+        DispatcherCacheUsecasesTest.setUp(self)
+        CUDATestCase.setUp(self)
+
+    def tearDown(self):
+        CUDATestCase.tearDown(self)
+        DispatcherCacheUsecasesTest.tearDown(self)
+
+    def test_multiprocessing(self):
+        # Check caching works from multiple processes at once (#2028)
+        mod = self.import_module()
+        # Calling a pure Python caller of the JIT-compiled function is
+        # necessary to reproduce the issue.
+        f = mod.simple_usecase_caller
+        n = 3
+        try:
+            ctx = multiprocessing.get_context('spawn')
+        except AttributeError:
+            ctx = multiprocessing
+
+        pool = ctx.Pool(n, child_initializer)
+
+        try:
+            res = sum(pool.imap(f, range(n)))
+        finally:
+            pool.close()
+        self.assertEqual(res, n * (n - 1) // 2)
+
+
+@skip_on_cudasim('Simulator does not implement the CUDACodeLibrary')
+class TestCUDACodeLibrary(CUDATestCase):
+    # For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
+    # explicitly check
+
+    def test_cannot_serialize_unfinalized(self):
+        # The CUDA codegen failes to import under the simulator, so we cannot
+        # import it at the top level
+        from numba.cuda.codegen import CUDACodeLibrary
+
+        # Usually a CodeLibrary requires a real CodeGen, but since we don't
+        # interact with it, anything will do
+        codegen = object()
+        name = 'library'
+        cl = CUDACodeLibrary(codegen, name)
+        with self.assertRaisesRegex(RuntimeError, 'Cannot pickle unfinalized'):
+            cl._reduce_states()
--- a/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_casting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/numba/cuda/tests/cudapy/test_casting.py
@@ -0,0 +1,257 @@
+import numpy as np
+
+from numba.cuda import compile_ptx
+from numba.core.types import f2, i1, i2, i4, i8, u1, u2, u4, u8
+from numba import cuda
+from numba.core import types
+from numba.cuda.testing import (CUDATestCase, skip_on_cudasim,
+                                skip_unless_cc_53)
+from numba.types import float16, float32
+import itertools
+import unittest
+
+
+def native_cast(x):
+    return float(x)
+
+
+def to_int8(x):
+    return np.int8(x)
+
+
+def to_int16(x):
+    return np.int16(x)
+
+
+def to_int32(x):
+    return np.int32(x)
+
+
+def to_int64(x):
+    return np.int64(x)
+
+
+def to_uint8(x):
+    return np.uint8(x)
+
+
+def to_uint16(x):
+    return np.uint16(x)
+
+
+def to_uint32(x):
+    return types.uint32(x)
+
+
+def to_uint64(x):
+    return types.uint64(x)
+
+
+def to_float16(x):
+    # When division and operators on float16 types are supported, this should
+    # be changed to match the implementation in to_float32.
+    return (np.float16(x) * np.float16(0.5))
+
+
+def to_float32(x):
+    return np.float32(x) / np.float32(2)
+
+
+def to_float64(x):
+    return np.float64(x) / np.float64(2)
+
+
+def to_complex64(x):
+    return np.complex64(x)
+
+
+def to_complex128(x):
+    return np.complex128(x)
+
+
+# Since multiplication of float16 is not supported via the operator * on
+# float16s yet, and the host does not implement cuda.fp16.*, we need two
+# versions of the following functions:
+#
+# - The device version uses cuda.fp16.hmul
+# - The host version uses the * operator
+
+def cuda_int_literal_to_float16(x):
+    # Note that we need to use `2` and not `np.float16(2)` to ensure that this
+    # types as a literal int and not a const float16.
+    return cuda.fp16.hmul(np.float16(x), 2)
+
+
+def reference_int_literal_to_float16(x):
+    return np.float16(x) * np.float16(2)
+
+
+def cuda_float_literal_to_float16(x):
+    # Note that `2.5` types as a const float64 and not a literal float, but
+    # this case is provided in case that changes in future.
+    return cuda.fp16.hmul(np.float16(x), 2.5)
+
+
+def reference_float_literal_to_float16(x):
+    return np.float16(x) * np.float16(2.5)
+
+
+class TestCasting(CUDATestCase):
+    def _create_wrapped(self, pyfunc, intype, outtype):
+        wrapped_func = cuda.jit(device=True)(pyfunc)
+
+        @cuda.jit
+        def cuda_wrapper_fn(arg, res):
+            res[0] = wrapped_func(arg[0])
+
+        def wrapper_fn(arg):
+            argarray = np.zeros(1, dtype=intype)
+            argarray[0] = arg
+            resarray = np.zeros(1, dtype=outtype)
+            cuda_wrapper_fn[1, 1](argarray, resarray)
+            return resarray[0]
+
+        return wrapper_fn
+
+    @skip_unless_cc_53
+    def test_float_to_int(self):
+        pyfuncs = (to_int8, to_int16, to_int32, to_int64)
+        totys = (np.int8, np.int16, np.int32, np.int64)
+        fromtys = (np.float16, np.float32, np.float64)
+
+        for pyfunc, toty in zip(pyfuncs, totys):
+            for fromty in fromtys:
+                with self.subTest(fromty=fromty, toty=toty):
+                    cfunc = self._create_wrapped(pyfunc, fromty, toty)
+                    self.assertEqual(cfunc(12.3), pyfunc(12.3))
+                    self.assertEqual(cfunc(12.3), int(12.3))
+                    self.assertEqual(cfunc(-12.3), pyfunc(-12.3))
+                    self.assertEqual(cfunc(-12.3), int(-12.3))
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_float16_to_int_ptx(self):
+        pyfuncs = (to_int8, to_int16, to_int32, to_int64)
+        sizes = (8, 16, 32, 64)
+
+        for pyfunc, size in zip(pyfuncs, sizes):
+            ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
+            self.assertIn(f"cvt.rni.s{size}.f16", ptx)
+
+    @skip_unless_cc_53
+    def test_float_to_uint(self):
+        pyfuncs = (to_int8, to_int16, to_int32, to_int64)
+        totys = (np.uint8, np.uint16, np.uint32, np.uint64)
+        fromtys = (np.float16, np.float32, np.float64)
+
+        for pyfunc, toty in zip(pyfuncs, totys):
+            for fromty in fromtys:
+                with self.subTest(fromty=fromty, toty=toty):
+                    cfunc = self._create_wrapped(pyfunc, fromty, toty)
+                    self.assertEqual(cfunc(12.3), pyfunc(12.3))
+                    self.assertEqual(cfunc(12.3), int(12.3))
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_float16_to_uint_ptx(self):
+        pyfuncs = (to_uint8, to_uint16, to_uint32, to_uint64)
+        sizes = (8, 16, 32, 64)
+
+        for pyfunc, size in zip(pyfuncs, sizes):
+            ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
+            self.assertIn(f"cvt.rni.u{size}.f16", ptx)
+
+    @skip_unless_cc_53
+    def test_int_to_float(self):
+        pyfuncs = (to_float16, to_float32, to_float64)
+        totys = (np.float16, np.float32, np.float64)
+
+        for pyfunc, toty in zip(pyfuncs, totys):
+            with self.subTest(toty=toty):
+                cfunc = self._create_wrapped(pyfunc, np.int64, toty)
+                self.assertEqual(cfunc(321), pyfunc(321))
+
+    @skip_unless_cc_53
+    def test_literal_to_float16(self):
+        cudafuncs = (cuda_int_literal_to_float16,
+                     cuda_float_literal_to_float16)
+        hostfuncs = (reference_int_literal_to_float16,
+                     reference_float_literal_to_float16)
+
+        for cudafunc, hostfunc in zip(cudafuncs, hostfuncs):
+            with self.subTest(func=cudafunc):
+                cfunc = self._create_wrapped(cudafunc, np.float16, np.float16)
+                self.assertEqual(cfunc(321), hostfunc(321))
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_int_to_float16_ptx(self):
+        fromtys = (i1, i2, i4, i8)
+        sizes = (8, 16, 32, 64)
+
+        for ty, size in zip(fromtys, sizes):
+            ptx, _ = compile_ptx(to_float16, (ty,), device=True)
+            self.assertIn(f"cvt.rn.f16.s{size}", ptx)
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_uint_to_float16_ptx(self):
+        fromtys = (u1, u2, u4, u8)
+        sizes = (8, 16, 32, 64)
+
+        for ty, size in zip(fromtys, sizes):
+            ptx, _ = compile_ptx(to_float16, (ty,), device=True)
+            self.assertIn(f"cvt.rn.f16.u{size}", ptx)
+
+    @skip_unless_cc_53
+    def test_float_to_float(self):
+        pyfuncs = (to_float16, to_float32, to_float64)
+        tys = (np.float16, np.float32, np.float64)
+
+        for (pyfunc, fromty), toty in itertools.product(zip(pyfuncs, tys), tys):
+            with self.subTest(fromty=fromty, toty=toty):
+                cfunc = self._create_wrapped(pyfunc, fromty, toty)
+                # For this test we cannot use the pyfunc for comparison because
+                # the CUDA target doesn't yet implement division (or operators)
+                # for float16 values, so we test by comparing with the computed
+                # expression instead.
+                np.testing.assert_allclose(cfunc(12.3),
+                                           toty(12.3) / toty(2), rtol=0.0003)
+                np.testing.assert_allclose(cfunc(-12.3),
+                                           toty(-12.3) / toty(2), rtol=0.0003)
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_float16_to_float_ptx(self):
+        pyfuncs = (to_float32, to_float64)
+        postfixes = ("f32", "f64")
+
+        for pyfunc, postfix in zip(pyfuncs, postfixes):
+            ptx, _ = compile_ptx(pyfunc, (f2,), device=True)
+            self.assertIn(f"cvt.{postfix}.f16", ptx)
+
+    @skip_unless_cc_53
+    def test_float_to_complex(self):
+        pyfuncs = (to_complex64, to_complex128)
+        totys = (np.complex64, np.complex128)
+        fromtys = (np.float16, np.float32, np.float64)
+
+        for pyfunc, toty in zip(pyfuncs, totys):
+            for fromty in fromtys:
+                with self.subTest(fromty=fromty, toty=toty):
+                    cfunc = self._create_wrapped(pyfunc, fromty, toty)
+                    # Here we need to explicitly cast the input to the pyfunc
+                    # to match the casting that is automatically applied when
+                    # passing the input to the cfunc as part of wrapping it in
+                    # an array of type fromtype.
+                    np.testing.assert_allclose(cfunc(3.21),
+                                               pyfunc(fromty(3.21)))
+                    np.testing.assert_allclose(cfunc(-3.21),
+                                               pyfunc(fromty(-3.21)) + 0j)
+
+    @skip_on_cudasim('Compilation unsupported in the simulator')
+    def test_native_cast(self):
+        float32_ptx, _ = cuda.compile_ptx(native_cast, (float32,), device=True)
+        self.assertIn("st.f32", float32_ptx)
+
+        float16_ptx, _ = cuda.compile_ptx(native_cast, (float16,), device=True)
+        self.assertIn("st.u16", float16_ptx)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/Show More
+++ b/Show More