Videre
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,836 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
#
|
||||
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
from llvmlite import ir as lir
|
||||
|
||||
from numba.core import types, typing, utils, ir, config, ir_utils, registry
|
||||
from numba.core.typing.templates import (CallableTemplate, signature,
|
||||
infer_global, AbstractTemplate)
|
||||
from numba.core.imputils import lower_builtin
|
||||
from numba.core.extending import register_jitable
|
||||
from numba.core.errors import NumbaValueError
|
||||
from numba.misc.special import literal_unroll
|
||||
import numba
|
||||
|
||||
import operator
|
||||
from numba.np import numpy_support
|
||||
|
||||
class StencilFuncLowerer(object):
|
||||
'''Callable class responsible for lowering calls to a specific StencilFunc.
|
||||
'''
|
||||
def __init__(self, sf):
|
||||
self.stencilFunc = sf
|
||||
|
||||
def __call__(self, context, builder, sig, args):
|
||||
cres = self.stencilFunc.compile_for_argtys(sig.args, {},
|
||||
sig.return_type, None)
|
||||
res = context.call_internal(builder, cres.fndesc, sig, args)
|
||||
context.add_linking_libs([cres.library])
|
||||
return res
|
||||
|
||||
@register_jitable
|
||||
def raise_if_incompatible_array_sizes(a, *args):
|
||||
ashape = a.shape
|
||||
|
||||
# We need literal_unroll here because the stencil might take
|
||||
# multiple input arrays with different types that are not compatible
|
||||
# (e.g. values as float[:] and flags as bool[:])
|
||||
# When more than three total arrays are given, the second and third
|
||||
# are iterated over in the loop below. Without literal_unroll, their
|
||||
# types have to match.
|
||||
# An example failing signature without literal_unroll might be
|
||||
# (float[:], float[:], bool[:]) (Just (float[:], bool[:]) wouldn't fail)
|
||||
for arg in literal_unroll(args):
|
||||
if a.ndim != arg.ndim:
|
||||
raise ValueError("Secondary stencil array does not have same number "
|
||||
" of dimensions as the first stencil input.")
|
||||
argshape = arg.shape
|
||||
for i in range(len(ashape)):
|
||||
if ashape[i] > argshape[i]:
|
||||
raise ValueError("Secondary stencil array has some dimension "
|
||||
"smaller the same dimension in the first "
|
||||
"stencil input.")
|
||||
|
||||
def slice_addition(the_slice, addend):
|
||||
""" Called by stencil in Python mode to add the loop index to a
|
||||
user-specified slice.
|
||||
"""
|
||||
return slice(the_slice.start + addend, the_slice.stop + addend)
|
||||
|
||||
class StencilFunc(object):
|
||||
"""
|
||||
A special type to hold stencil information for the IR.
|
||||
"""
|
||||
|
||||
id_counter = 0
|
||||
|
||||
def __init__(self, kernel_ir, mode, options):
|
||||
self.id = type(self).id_counter
|
||||
type(self).id_counter += 1
|
||||
self.kernel_ir = kernel_ir
|
||||
self.mode = mode
|
||||
self.options = options
|
||||
self.kws = [] # remember original kws arguments
|
||||
|
||||
# stencils only supported for CPU context currently
|
||||
self._typingctx = registry.cpu_target.typing_context
|
||||
self._targetctx = registry.cpu_target.target_context
|
||||
self._install_type(self._typingctx)
|
||||
self.neighborhood = self.options.get("neighborhood")
|
||||
self._type_cache = {}
|
||||
self._lower_me = StencilFuncLowerer(self)
|
||||
|
||||
def replace_return_with_setitem(self, blocks, index_vars, out_name):
|
||||
"""
|
||||
Find return statements in the IR and replace them with a SetItem
|
||||
call of the value "returned" by the kernel into the result array.
|
||||
Returns the block labels that contained return statements.
|
||||
"""
|
||||
ret_blocks = []
|
||||
|
||||
for label, block in blocks.items():
|
||||
scope = block.scope
|
||||
loc = block.loc
|
||||
new_body = []
|
||||
for stmt in block.body:
|
||||
if isinstance(stmt, ir.Return):
|
||||
ret_blocks.append(label)
|
||||
# If 1D array then avoid the tuple construction.
|
||||
if len(index_vars) == 1:
|
||||
rvar = ir.Var(scope, out_name, loc)
|
||||
ivar = ir.Var(scope, index_vars[0], loc)
|
||||
new_body.append(ir.SetItem(rvar, ivar, stmt.value, loc))
|
||||
else:
|
||||
# Convert the string names of the index variables into
|
||||
# ir.Var's.
|
||||
var_index_vars = []
|
||||
for one_var in index_vars:
|
||||
index_var = ir.Var(scope, one_var, loc)
|
||||
var_index_vars += [index_var]
|
||||
|
||||
s_index_var = scope.redefine("stencil_index", loc)
|
||||
# Build a tuple from the index ir.Var's.
|
||||
tuple_call = ir.Expr.build_tuple(var_index_vars, loc)
|
||||
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
|
||||
rvar = ir.Var(scope, out_name, loc)
|
||||
# Write the return statements original value into
|
||||
# the array using the tuple index.
|
||||
si = ir.SetItem(rvar, s_index_var, stmt.value, loc)
|
||||
new_body.append(si)
|
||||
else:
|
||||
new_body.append(stmt)
|
||||
block.body = new_body
|
||||
return ret_blocks
|
||||
|
||||
def add_indices_to_kernel(self, kernel, index_names, ndim,
|
||||
neighborhood, standard_indexed, typemap, calltypes):
|
||||
"""
|
||||
Transforms the stencil kernel as specified by the user into one
|
||||
that includes each dimension's index variable as part of the getitem
|
||||
calls. So, in effect array[-1] becomes array[index0-1].
|
||||
"""
|
||||
const_dict = {}
|
||||
kernel_consts = []
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("add_indices_to_kernel", ndim, neighborhood)
|
||||
ir_utils.dump_blocks(kernel.blocks)
|
||||
|
||||
if neighborhood is None:
|
||||
need_to_calc_kernel = True
|
||||
else:
|
||||
need_to_calc_kernel = False
|
||||
if len(neighborhood) != ndim:
|
||||
raise NumbaValueError("%d dimensional neighborhood specified "
|
||||
"for %d dimensional input array" %
|
||||
(len(neighborhood), ndim))
|
||||
|
||||
tuple_table = ir_utils.get_tuple_table(kernel.blocks)
|
||||
|
||||
relatively_indexed = set()
|
||||
|
||||
for block in kernel.blocks.values():
|
||||
scope = block.scope
|
||||
loc = block.loc
|
||||
new_body = []
|
||||
for stmt in block.body:
|
||||
if (isinstance(stmt, ir.Assign) and
|
||||
isinstance(stmt.value, ir.Const)):
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("remembering in const_dict", stmt.target.name,
|
||||
stmt.value.value)
|
||||
# Remember consts for use later.
|
||||
const_dict[stmt.target.name] = stmt.value.value
|
||||
if ((isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op in ['setitem', 'static_setitem']
|
||||
and stmt.value.value.name in kernel.arg_names) or
|
||||
(isinstance(stmt, ir.SetItem)
|
||||
and stmt.target.name in kernel.arg_names)):
|
||||
raise NumbaValueError("Assignments to arrays passed to " \
|
||||
"stencil kernels is not allowed.")
|
||||
if (isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op in ['getitem', 'static_getitem']
|
||||
and stmt.value.value.name in kernel.arg_names
|
||||
and stmt.value.value.name not in standard_indexed):
|
||||
# We found a getitem from the input array.
|
||||
if stmt.value.op == 'getitem':
|
||||
stmt_index_var = stmt.value.index
|
||||
else:
|
||||
stmt_index_var = stmt.value.index_var
|
||||
# allow static_getitem since rewrite passes are applied
|
||||
#raise ValueError("Unexpected static_getitem in add_indices_to_kernel.")
|
||||
|
||||
relatively_indexed.add(stmt.value.value.name)
|
||||
|
||||
# Store the index used after looking up the variable in
|
||||
# the const dictionary.
|
||||
if need_to_calc_kernel:
|
||||
assert hasattr(stmt_index_var, 'name')
|
||||
|
||||
if stmt_index_var.name in tuple_table:
|
||||
kernel_consts += [tuple_table[stmt_index_var.name]]
|
||||
elif stmt_index_var.name in const_dict:
|
||||
kernel_consts += [const_dict[stmt_index_var.name]]
|
||||
else:
|
||||
raise NumbaValueError("stencil kernel index is not "
|
||||
"constant, 'neighborhood' option required")
|
||||
|
||||
if ndim == 1:
|
||||
# Single dimension always has index variable 'index0'.
|
||||
# tmpvar will hold the real index and is computed by
|
||||
# adding the relative offset in stmt.value.index to
|
||||
# the current absolute location in index0.
|
||||
index_var = ir.Var(scope, index_names[0], loc)
|
||||
tmpvar = scope.redefine("stencil_index", loc)
|
||||
stmt_index_var_typ = typemap[stmt_index_var.name]
|
||||
# If the array is indexed with a slice then we
|
||||
# have to add the index value with a call to
|
||||
# slice_addition.
|
||||
if isinstance(stmt_index_var_typ, types.misc.SliceType):
|
||||
sa_var = scope.redefine("slice_addition", loc)
|
||||
sa_func = numba.njit(slice_addition)
|
||||
sa_func_typ = types.functions.Dispatcher(sa_func)
|
||||
typemap[sa_var.name] = sa_func_typ
|
||||
g_sa = ir.Global("slice_addition", sa_func, loc)
|
||||
new_body.append(ir.Assign(g_sa, sa_var, loc))
|
||||
slice_addition_call = ir.Expr.call(sa_var, [stmt_index_var, index_var], (), loc)
|
||||
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [stmt_index_var_typ, types.intp], {})
|
||||
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
|
||||
new_body.append(ir.Assign(
|
||||
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
|
||||
stmt.target, loc))
|
||||
else:
|
||||
acc_call = ir.Expr.binop(operator.add, stmt_index_var,
|
||||
index_var, loc)
|
||||
new_body.append(ir.Assign(acc_call, tmpvar, loc))
|
||||
new_body.append(ir.Assign(
|
||||
ir.Expr.getitem(stmt.value.value, tmpvar, loc),
|
||||
stmt.target, loc))
|
||||
else:
|
||||
index_vars = []
|
||||
sum_results = []
|
||||
s_index_var = scope.redefine("stencil_index", loc)
|
||||
const_index_vars = []
|
||||
ind_stencils = []
|
||||
|
||||
stmt_index_var_typ = typemap[stmt_index_var.name]
|
||||
# Same idea as above but you have to extract
|
||||
# individual elements out of the tuple indexing
|
||||
# expression and add the corresponding index variable
|
||||
# to them and then reconstitute as a tuple that can
|
||||
# index the array.
|
||||
for dim in range(ndim):
|
||||
tmpvar = scope.redefine("const_index", loc)
|
||||
new_body.append(ir.Assign(ir.Const(dim, loc),
|
||||
tmpvar, loc))
|
||||
const_index_vars += [tmpvar]
|
||||
index_var = ir.Var(scope, index_names[dim], loc)
|
||||
index_vars += [index_var]
|
||||
|
||||
tmpvar = scope.redefine("ind_stencil_index", loc)
|
||||
ind_stencils += [tmpvar]
|
||||
getitemvar = scope.redefine("getitem", loc)
|
||||
getitemcall = ir.Expr.getitem(stmt_index_var,
|
||||
const_index_vars[dim], loc)
|
||||
new_body.append(ir.Assign(getitemcall, getitemvar, loc))
|
||||
# Get the type of this particular part of the index tuple.
|
||||
if isinstance(stmt_index_var_typ, types.ConstSized):
|
||||
one_index_typ = stmt_index_var_typ[dim]
|
||||
else:
|
||||
one_index_typ = stmt_index_var_typ[:]
|
||||
# If the array is indexed with a slice then we
|
||||
# have to add the index value with a call to
|
||||
# slice_addition.
|
||||
if isinstance(one_index_typ, types.misc.SliceType):
|
||||
sa_var = scope.redefine("slice_addition", loc)
|
||||
sa_func = numba.njit(slice_addition)
|
||||
sa_func_typ = types.functions.Dispatcher(sa_func)
|
||||
typemap[sa_var.name] = sa_func_typ
|
||||
g_sa = ir.Global("slice_addition", sa_func, loc)
|
||||
new_body.append(ir.Assign(g_sa, sa_var, loc))
|
||||
slice_addition_call = ir.Expr.call(sa_var, [getitemvar, index_vars[dim]], (), loc)
|
||||
calltypes[slice_addition_call] = sa_func_typ.get_call_type(self._typingctx, [one_index_typ, types.intp], {})
|
||||
new_body.append(ir.Assign(slice_addition_call, tmpvar, loc))
|
||||
else:
|
||||
acc_call = ir.Expr.binop(operator.add, getitemvar,
|
||||
index_vars[dim], loc)
|
||||
new_body.append(ir.Assign(acc_call, tmpvar, loc))
|
||||
|
||||
tuple_call = ir.Expr.build_tuple(ind_stencils, loc)
|
||||
new_body.append(ir.Assign(tuple_call, s_index_var, loc))
|
||||
new_body.append(ir.Assign(
|
||||
ir.Expr.getitem(stmt.value.value,s_index_var,loc),
|
||||
stmt.target,loc))
|
||||
else:
|
||||
new_body.append(stmt)
|
||||
block.body = new_body
|
||||
|
||||
if need_to_calc_kernel:
|
||||
# Find the size of the kernel by finding the maximum absolute value
|
||||
# index used in the kernel specification.
|
||||
neighborhood = [[0,0] for _ in range(ndim)]
|
||||
if len(kernel_consts) == 0:
|
||||
raise NumbaValueError("Stencil kernel with no accesses to "
|
||||
"relatively indexed arrays.")
|
||||
|
||||
for index in kernel_consts:
|
||||
if isinstance(index, tuple) or isinstance(index, list):
|
||||
for i in range(len(index)):
|
||||
te = index[i]
|
||||
if isinstance(te, ir.Var) and te.name in const_dict:
|
||||
te = const_dict[te.name]
|
||||
if isinstance(te, int):
|
||||
neighborhood[i][0] = min(neighborhood[i][0], te)
|
||||
neighborhood[i][1] = max(neighborhood[i][1], te)
|
||||
else:
|
||||
raise NumbaValueError(
|
||||
"stencil kernel index is not constant,"
|
||||
"'neighborhood' option required")
|
||||
index_len = len(index)
|
||||
elif isinstance(index, int):
|
||||
neighborhood[0][0] = min(neighborhood[0][0], index)
|
||||
neighborhood[0][1] = max(neighborhood[0][1], index)
|
||||
index_len = 1
|
||||
else:
|
||||
raise NumbaValueError(
|
||||
"Non-tuple or non-integer used as stencil index.")
|
||||
if index_len != ndim:
|
||||
raise NumbaValueError(
|
||||
"Stencil index does not match array dimensionality.")
|
||||
|
||||
return (neighborhood, relatively_indexed)
|
||||
|
||||
|
||||
def get_return_type(self, argtys):
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("get_return_type", argtys)
|
||||
ir_utils.dump_blocks(self.kernel_ir.blocks)
|
||||
|
||||
if not isinstance(argtys[0], types.npytypes.Array):
|
||||
raise NumbaValueError("The first argument to a stencil kernel must "
|
||||
"be the primary input array.")
|
||||
|
||||
from numba.core import typed_passes
|
||||
typemap, return_type, calltypes, _ = typed_passes.type_inference_stage(
|
||||
self._typingctx,
|
||||
self._targetctx,
|
||||
self.kernel_ir,
|
||||
argtys,
|
||||
None,
|
||||
{})
|
||||
if isinstance(return_type, types.npytypes.Array):
|
||||
raise NumbaValueError(
|
||||
"Stencil kernel must return a scalar and not a numpy array.")
|
||||
|
||||
real_ret = types.npytypes.Array(return_type, argtys[0].ndim,
|
||||
argtys[0].layout)
|
||||
return (real_ret, typemap, calltypes)
|
||||
|
||||
def _install_type(self, typingctx):
|
||||
"""Constructs and installs a typing class for a StencilFunc object in
|
||||
the input typing context.
|
||||
"""
|
||||
_ty_cls = type('StencilFuncTyping_' +
|
||||
str(self.id),
|
||||
(AbstractTemplate,),
|
||||
dict(key=self, generic=self._type_me))
|
||||
typingctx.insert_user_function(self, _ty_cls)
|
||||
|
||||
def compile_for_argtys(self, argtys, kwtys, return_type, sigret):
|
||||
# look in the type cache to find if result array is passed
|
||||
(_, result, typemap, calltypes) = self._type_cache[argtys]
|
||||
new_func = self._stencil_wrapper(result, sigret, return_type,
|
||||
typemap, calltypes, *argtys)
|
||||
return new_func
|
||||
|
||||
def _type_me(self, argtys, kwtys):
|
||||
"""
|
||||
Implement AbstractTemplate.generic() for the typing class
|
||||
built by StencilFunc._install_type().
|
||||
Return the call-site signature.
|
||||
"""
|
||||
if (self.neighborhood is not None and
|
||||
len(self.neighborhood) != argtys[0].ndim):
|
||||
raise NumbaValueError("%d dimensional neighborhood specified "
|
||||
"for %d dimensional input array" %
|
||||
(len(self.neighborhood), argtys[0].ndim))
|
||||
|
||||
argtys_extra = argtys
|
||||
sig_extra = ""
|
||||
result = None
|
||||
if 'out' in kwtys:
|
||||
argtys_extra += (kwtys['out'],)
|
||||
sig_extra += ", out=None"
|
||||
result = kwtys['out']
|
||||
|
||||
if 'neighborhood' in kwtys:
|
||||
argtys_extra += (kwtys['neighborhood'],)
|
||||
sig_extra += ", neighborhood=None"
|
||||
|
||||
# look in the type cache first
|
||||
if argtys_extra in self._type_cache:
|
||||
(_sig, _, _, _) = self._type_cache[argtys_extra]
|
||||
return _sig
|
||||
|
||||
(real_ret, typemap, calltypes) = self.get_return_type(argtys)
|
||||
sig = signature(real_ret, *argtys_extra)
|
||||
dummy_text = ("def __numba_dummy_stencil({}{}):\n pass\n".format(
|
||||
",".join(self.kernel_ir.arg_names), sig_extra))
|
||||
dct = {}
|
||||
exec(dummy_text, dct)
|
||||
dummy_func = dct["__numba_dummy_stencil"]
|
||||
sig = sig.replace(pysig=utils.pysignature(dummy_func))
|
||||
self._targetctx.insert_func_defn([(self._lower_me, self, argtys_extra)])
|
||||
self._type_cache[argtys_extra] = (sig, result, typemap, calltypes)
|
||||
return sig
|
||||
|
||||
def copy_ir_with_calltypes(self, ir, calltypes):
|
||||
"""
|
||||
Create a copy of a given IR along with its calltype information.
|
||||
We need a copy of the calltypes because copy propagation applied
|
||||
to the copied IR will change the calltypes and make subsequent
|
||||
uses of the original IR invalid.
|
||||
"""
|
||||
copy_calltypes = {}
|
||||
kernel_copy = ir.copy()
|
||||
kernel_copy.blocks = {}
|
||||
# For each block...
|
||||
for (block_label, block) in ir.blocks.items():
|
||||
new_block = copy.deepcopy(ir.blocks[block_label])
|
||||
new_block.body = []
|
||||
# For each statement in each block...
|
||||
for stmt in ir.blocks[block_label].body:
|
||||
# Copy the statement to the new copy of the kernel
|
||||
# and if the original statement is in the original
|
||||
# calltypes then add the type associated with this
|
||||
# statement to the calltypes copy.
|
||||
scopy = copy.deepcopy(stmt)
|
||||
new_block.body.append(scopy)
|
||||
if stmt in calltypes:
|
||||
copy_calltypes[scopy] = calltypes[stmt]
|
||||
kernel_copy.blocks[block_label] = new_block
|
||||
return (kernel_copy, copy_calltypes)
|
||||
|
||||
def _stencil_wrapper(self, result, sigret, return_type, typemap, calltypes, *args):
|
||||
# Overall approach:
|
||||
# 1) Construct a string containing a function definition for the stencil function
|
||||
# that will execute the stencil kernel. This function definition includes a
|
||||
# unique stencil function name, the parameters to the stencil kernel, loop
|
||||
# nests across the dimensions of the input array. Those loop nests use the
|
||||
# computed stencil kernel size so as not to try to compute elements where
|
||||
# elements outside the bounds of the input array would be needed.
|
||||
# 2) The but of the loop nest in this new function is a special sentinel
|
||||
# assignment.
|
||||
# 3) Get the IR of this new function.
|
||||
# 4) Split the block containing the sentinel assignment and remove the sentinel
|
||||
# assignment. Insert the stencil kernel IR into the stencil function IR
|
||||
# after label and variable renaming of the stencil kernel IR to prevent
|
||||
# conflicts with the stencil function IR.
|
||||
# 5) Compile the combined stencil function IR + stencil kernel IR into existence.
|
||||
|
||||
# Copy the kernel so that our changes for this callsite
|
||||
# won't effect other callsites.
|
||||
(kernel_copy, copy_calltypes) = self.copy_ir_with_calltypes(
|
||||
self.kernel_ir, calltypes)
|
||||
# The stencil kernel body becomes the body of a loop, for which args aren't needed.
|
||||
ir_utils.remove_args(kernel_copy.blocks)
|
||||
first_arg = kernel_copy.arg_names[0]
|
||||
|
||||
in_cps, out_cps = ir_utils.copy_propagate(kernel_copy.blocks, typemap)
|
||||
name_var_table = ir_utils.get_name_var_table(kernel_copy.blocks)
|
||||
ir_utils.apply_copy_propagate(
|
||||
kernel_copy.blocks,
|
||||
in_cps,
|
||||
name_var_table,
|
||||
typemap,
|
||||
copy_calltypes)
|
||||
|
||||
if "out" in name_var_table:
|
||||
raise NumbaValueError("Cannot use the reserved word 'out' in stencil kernels.")
|
||||
|
||||
sentinel_name = ir_utils.get_unused_var_name("__sentinel__", name_var_table)
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("name_var_table", name_var_table, sentinel_name)
|
||||
|
||||
the_array = args[0]
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("_stencil_wrapper", return_type, return_type.dtype,
|
||||
type(return_type.dtype), args)
|
||||
ir_utils.dump_blocks(kernel_copy.blocks)
|
||||
|
||||
# We generate a Numba function to execute this stencil and here
|
||||
# create the unique name of this function.
|
||||
stencil_func_name = "__numba_stencil_%s_%s" % (
|
||||
hex(id(the_array)).replace("-", "_"),
|
||||
self.id)
|
||||
|
||||
# We will put a loop nest in the generated function for each
|
||||
# dimension in the input array. Here we create the name for
|
||||
# the index variable for each dimension. index0, index1, ...
|
||||
index_vars = []
|
||||
for i in range(the_array.ndim):
|
||||
index_var_name = ir_utils.get_unused_var_name("index" + str(i),
|
||||
name_var_table)
|
||||
index_vars += [index_var_name]
|
||||
|
||||
# Create extra signature for out and neighborhood.
|
||||
out_name = ir_utils.get_unused_var_name("out", name_var_table)
|
||||
neighborhood_name = ir_utils.get_unused_var_name("neighborhood",
|
||||
name_var_table)
|
||||
sig_extra = ""
|
||||
if result is not None:
|
||||
sig_extra += ", {}=None".format(out_name)
|
||||
if "neighborhood" in dict(self.kws):
|
||||
sig_extra += ", {}=None".format(neighborhood_name)
|
||||
|
||||
# Get a list of the standard indexed array names.
|
||||
standard_indexed = self.options.get("standard_indexing", [])
|
||||
|
||||
if first_arg in standard_indexed:
|
||||
raise NumbaValueError("The first argument to a stencil kernel must "
|
||||
"use relative indexing, not standard indexing.")
|
||||
|
||||
if len(set(standard_indexed) - set(kernel_copy.arg_names)) != 0:
|
||||
raise NumbaValueError("Standard indexing requested for an array name "
|
||||
"not present in the stencil kernel definition.")
|
||||
|
||||
# Add index variables to getitems in the IR to transition the accesses
|
||||
# in the kernel from relative to regular Python indexing. Returns the
|
||||
# computed size of the stencil kernel and a list of the relatively indexed
|
||||
# arrays.
|
||||
kernel_size, relatively_indexed = self.add_indices_to_kernel(
|
||||
kernel_copy, index_vars, the_array.ndim,
|
||||
self.neighborhood, standard_indexed, typemap, copy_calltypes)
|
||||
if self.neighborhood is None:
|
||||
self.neighborhood = kernel_size
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("After add_indices_to_kernel")
|
||||
ir_utils.dump_blocks(kernel_copy.blocks)
|
||||
|
||||
# The return in the stencil kernel becomes a setitem for that
|
||||
# particular point in the iteration space.
|
||||
ret_blocks = self.replace_return_with_setitem(kernel_copy.blocks,
|
||||
index_vars, out_name)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("After replace_return_with_setitem", ret_blocks)
|
||||
ir_utils.dump_blocks(kernel_copy.blocks)
|
||||
|
||||
# Start to form the new function to execute the stencil kernel.
|
||||
func_text = "def {}({}{}):\n".format(stencil_func_name,
|
||||
",".join(kernel_copy.arg_names), sig_extra)
|
||||
|
||||
# Get loop ranges for each dimension, which could be either int
|
||||
# or variable. In the latter case we'll use the extra neighborhood
|
||||
# argument to the function.
|
||||
ranges = []
|
||||
for i in range(the_array.ndim):
|
||||
if isinstance(kernel_size[i][0], int):
|
||||
lo = kernel_size[i][0]
|
||||
hi = kernel_size[i][1]
|
||||
else:
|
||||
lo = "{}[{}][0]".format(neighborhood_name, i)
|
||||
hi = "{}[{}][1]".format(neighborhood_name, i)
|
||||
ranges.append((lo, hi))
|
||||
|
||||
# If there are more than one relatively indexed arrays, add a call to
|
||||
# a function that will raise an error if any of the relatively indexed
|
||||
# arrays are of different size than the first input array.
|
||||
if len(relatively_indexed) > 1:
|
||||
func_text += " raise_if_incompatible_array_sizes(" + first_arg
|
||||
for other_array in relatively_indexed:
|
||||
if other_array != first_arg:
|
||||
func_text += "," + other_array
|
||||
func_text += ")\n"
|
||||
|
||||
# Get the shape of the first input array.
|
||||
shape_name = ir_utils.get_unused_var_name("full_shape", name_var_table)
|
||||
func_text += " {} = {}.shape\n".format(shape_name, first_arg)
|
||||
|
||||
# Converts cval to a string constant
|
||||
def cval_as_str(cval):
|
||||
if not np.isfinite(cval):
|
||||
# See if this is a string-repr numerical const, issue #7286
|
||||
if np.isnan(cval):
|
||||
return "np.nan"
|
||||
elif np.isinf(cval):
|
||||
if cval < 0:
|
||||
return "-np.inf"
|
||||
else:
|
||||
return "np.inf"
|
||||
else:
|
||||
return str(cval)
|
||||
|
||||
# If we have to allocate the output array (the out argument was not used)
|
||||
# then us numpy.full if the user specified a cval stencil decorator option
|
||||
# or np.zeros if they didn't to allocate the array.
|
||||
if result is None:
|
||||
return_type_name = numpy_support.as_dtype(
|
||||
return_type.dtype).type.__name__
|
||||
out_init ="{} = np.empty({}, dtype=np.{})\n".format(
|
||||
out_name, shape_name, return_type_name)
|
||||
|
||||
if "cval" in self.options:
|
||||
cval = self.options["cval"]
|
||||
cval_ty = typing.typeof.typeof(cval)
|
||||
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
|
||||
msg = "cval type does not match stencil return type."
|
||||
raise NumbaValueError(msg)
|
||||
else:
|
||||
cval = 0
|
||||
func_text += " " + out_init
|
||||
for dim in range(the_array.ndim):
|
||||
start_items = [":"] * the_array.ndim
|
||||
end_items = [":"] * the_array.ndim
|
||||
start_items[dim] = ":-{}".format(self.neighborhood[dim][0])
|
||||
end_items[dim] = "-{}:".format(self.neighborhood[dim][1])
|
||||
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(start_items), cval_as_str(cval))
|
||||
func_text += " " + "{}[{}] = {}\n".format(out_name, ",".join(end_items), cval_as_str(cval))
|
||||
else: # result is present, if cval is set then use it
|
||||
if "cval" in self.options:
|
||||
cval = self.options["cval"]
|
||||
cval_ty = typing.typeof.typeof(cval)
|
||||
if not self._typingctx.can_convert(cval_ty, return_type.dtype):
|
||||
msg = "cval type does not match stencil return type."
|
||||
raise NumbaValueError(msg)
|
||||
out_init = "{}[:] = {}\n".format(out_name, cval_as_str(cval))
|
||||
func_text += " " + out_init
|
||||
|
||||
offset = 1
|
||||
# Add the loop nests to the new function.
|
||||
for i in range(the_array.ndim):
|
||||
for j in range(offset):
|
||||
func_text += " "
|
||||
# ranges[i][0] is the minimum index used in the i'th dimension
|
||||
# but minimum's greater than 0 don't preclude any entry in the array.
|
||||
# So, take the minimum of 0 and the minimum index found in the kernel
|
||||
# and this will be a negative number (potentially -0). Then, we do
|
||||
# unary - on that to get the positive offset in this dimension whose
|
||||
# use is precluded.
|
||||
# ranges[i][1] is the maximum of 0 and the observed maximum index
|
||||
# in this dimension because negative maximums would not cause us to
|
||||
# preclude any entry in the array from being used.
|
||||
func_text += ("for {} in range(-min(0,{}),"
|
||||
"{}[{}]-max(0,{})):\n").format(
|
||||
index_vars[i],
|
||||
ranges[i][0],
|
||||
shape_name,
|
||||
i,
|
||||
ranges[i][1])
|
||||
offset += 1
|
||||
|
||||
for j in range(offset):
|
||||
func_text += " "
|
||||
# Put a sentinel in the code so we can locate it in the IR. We will
|
||||
# remove this sentinel assignment and replace it with the IR for the
|
||||
# stencil kernel body.
|
||||
func_text += "{} = 0\n".format(sentinel_name)
|
||||
func_text += " return {}\n".format(out_name)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("new stencil func text")
|
||||
print(func_text)
|
||||
|
||||
# Force the new stencil function into existence.
|
||||
dct = {}
|
||||
dct.update(globals())
|
||||
exec(func_text, dct)
|
||||
stencil_func = dct[stencil_func_name]
|
||||
if sigret is not None:
|
||||
pysig = utils.pysignature(stencil_func)
|
||||
sigret.pysig = pysig
|
||||
# Get the IR for the newly created stencil function.
|
||||
from numba.core import compiler
|
||||
stencil_ir = compiler.run_frontend(stencil_func)
|
||||
ir_utils.remove_dels(stencil_ir.blocks)
|
||||
|
||||
# rename all variables in stencil_ir afresh
|
||||
var_table = ir_utils.get_name_var_table(stencil_ir.blocks)
|
||||
new_var_dict = {}
|
||||
reserved_names = ([sentinel_name, out_name, neighborhood_name,
|
||||
shape_name] + kernel_copy.arg_names + index_vars)
|
||||
for name, var in var_table.items():
|
||||
if not name in reserved_names:
|
||||
assert isinstance(var, ir.Var)
|
||||
new_var = var.scope.redefine(var.name, var.loc)
|
||||
new_var_dict[name] = new_var.name
|
||||
ir_utils.replace_var_names(stencil_ir.blocks, new_var_dict)
|
||||
|
||||
stencil_stub_last_label = max(stencil_ir.blocks.keys()) + 1
|
||||
|
||||
# Shift labels in the kernel copy so they are guaranteed unique
|
||||
# and don't conflict with any labels in the stencil_ir.
|
||||
kernel_copy.blocks = ir_utils.add_offset_to_labels(
|
||||
kernel_copy.blocks, stencil_stub_last_label)
|
||||
new_label = max(kernel_copy.blocks.keys()) + 1
|
||||
# Adjust ret_blocks to account for addition of the offset.
|
||||
ret_blocks = [x + stencil_stub_last_label for x in ret_blocks]
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("ret_blocks w/ offsets", ret_blocks, stencil_stub_last_label)
|
||||
print("before replace sentinel stencil_ir")
|
||||
ir_utils.dump_blocks(stencil_ir.blocks)
|
||||
print("before replace sentinel kernel_copy")
|
||||
ir_utils.dump_blocks(kernel_copy.blocks)
|
||||
|
||||
# Search all the block in the stencil outline for the sentinel.
|
||||
for label, block in stencil_ir.blocks.items():
|
||||
for i, inst in enumerate(block.body):
|
||||
if (isinstance( inst, ir.Assign) and
|
||||
inst.target.name == sentinel_name):
|
||||
# We found the sentinel assignment.
|
||||
loc = inst.loc
|
||||
scope = block.scope
|
||||
# split block across __sentinel__
|
||||
# A new block is allocated for the statements prior to the
|
||||
# sentinel but the new block maintains the current block
|
||||
# label.
|
||||
prev_block = ir.Block(scope, loc)
|
||||
prev_block.body = block.body[:i]
|
||||
# The current block is used for statements after sentinel.
|
||||
block.body = block.body[i + 1:]
|
||||
# But the current block gets a new label.
|
||||
body_first_label = min(kernel_copy.blocks.keys())
|
||||
|
||||
# The previous block jumps to the minimum labelled block of
|
||||
# the parfor body.
|
||||
prev_block.append(ir.Jump(body_first_label, loc))
|
||||
# Add all the parfor loop body blocks to the gufunc
|
||||
# function's IR.
|
||||
for (l, b) in kernel_copy.blocks.items():
|
||||
stencil_ir.blocks[l] = b
|
||||
|
||||
stencil_ir.blocks[new_label] = block
|
||||
stencil_ir.blocks[label] = prev_block
|
||||
# Add a jump from all the blocks that previously contained
|
||||
# a return in the stencil kernel to the block
|
||||
# containing statements after the sentinel.
|
||||
for ret_block in ret_blocks:
|
||||
stencil_ir.blocks[ret_block].append(
|
||||
ir.Jump(new_label, loc))
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
stencil_ir.blocks = ir_utils.rename_labels(stencil_ir.blocks)
|
||||
ir_utils.remove_dels(stencil_ir.blocks)
|
||||
|
||||
assert(isinstance(the_array, types.Type))
|
||||
array_types = args
|
||||
|
||||
new_stencil_param_types = list(array_types)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("new_stencil_param_types", new_stencil_param_types)
|
||||
ir_utils.dump_blocks(stencil_ir.blocks)
|
||||
|
||||
# Compile the combined stencil function with the replaced loop
|
||||
# body in it.
|
||||
ir_utils.fixup_var_define_in_scope(stencil_ir.blocks)
|
||||
new_func = compiler.compile_ir(
|
||||
self._typingctx,
|
||||
self._targetctx,
|
||||
stencil_ir,
|
||||
new_stencil_param_types,
|
||||
None,
|
||||
compiler.DEFAULT_FLAGS,
|
||||
{})
|
||||
return new_func
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self._typingctx.refresh()
|
||||
if (self.neighborhood is not None and
|
||||
len(self.neighborhood) != args[0].ndim):
|
||||
raise NumbaValueError("{} dimensional neighborhood specified for "
|
||||
"{} dimensional input array".format(
|
||||
len(self.neighborhood), args[0].ndim))
|
||||
|
||||
if 'out' in kwargs:
|
||||
result = kwargs['out']
|
||||
rdtype = result.dtype
|
||||
rttype = numpy_support.from_dtype(rdtype)
|
||||
result_type = types.npytypes.Array(rttype, result.ndim,
|
||||
numpy_support.map_layout(result))
|
||||
array_types = tuple([typing.typeof.typeof(x) for x in args])
|
||||
array_types_full = tuple([typing.typeof.typeof(x) for x in args] +
|
||||
[result_type])
|
||||
else:
|
||||
result = None
|
||||
array_types = tuple([typing.typeof.typeof(x) for x in args])
|
||||
array_types_full = array_types
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("__call__", array_types, args, kwargs)
|
||||
|
||||
(real_ret, typemap, calltypes) = self.get_return_type(array_types)
|
||||
new_func = self._stencil_wrapper(result, None, real_ret, typemap,
|
||||
calltypes, *array_types_full)
|
||||
|
||||
if result is None:
|
||||
return new_func.entry_point(*args)
|
||||
else:
|
||||
return new_func.entry_point(*(args+(result,)))
|
||||
|
||||
def stencil(func_or_mode='constant', **options):
|
||||
# called on function without specifying mode style
|
||||
if not isinstance(func_or_mode, str):
|
||||
mode = 'constant' # default style
|
||||
func = func_or_mode
|
||||
else:
|
||||
mode = func_or_mode
|
||||
func = None
|
||||
|
||||
for option in options:
|
||||
if option not in ["cval", "standard_indexing", "neighborhood"]:
|
||||
raise NumbaValueError("Unknown stencil option " + option)
|
||||
|
||||
wrapper = _stencil(mode, options)
|
||||
if func is not None:
|
||||
return wrapper(func)
|
||||
return wrapper
|
||||
|
||||
def _stencil(mode, options):
|
||||
if mode != 'constant':
|
||||
raise NumbaValueError("Unsupported mode style " + mode)
|
||||
|
||||
def decorated(func):
|
||||
from numba.core import compiler
|
||||
kernel_ir = compiler.run_frontend(func)
|
||||
return StencilFunc(kernel_ir, mode, options)
|
||||
|
||||
return decorated
|
||||
|
||||
@lower_builtin(stencil)
|
||||
def stencil_dummy_lower(context, builder, sig, args):
|
||||
"lowering for dummy stencil calls"
|
||||
return lir.Constant(lir.IntType(types.intp.bitwidth), 0)
|
||||
@@ -0,0 +1,957 @@
|
||||
#
|
||||
# Copyright (c) 2017 Intel Corporation
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
#
|
||||
|
||||
import numbers
|
||||
import copy
|
||||
import types as pytypes
|
||||
from operator import add
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
|
||||
import numba.parfors.parfor
|
||||
from numba.core import types, ir, rewrites, config, ir_utils
|
||||
from numba.core.typing.templates import infer_global, AbstractTemplate
|
||||
from numba.core.typing import signature
|
||||
from numba.core import utils, typing
|
||||
from numba.core.ir_utils import (get_call_table, mk_unique_var,
|
||||
compile_to_numba_ir, replace_arg_nodes, guard,
|
||||
find_callname, require, find_const, GuardException)
|
||||
from numba.core.errors import NumbaValueError
|
||||
from numba.core.utils import OPERATORS_TO_BUILTINS
|
||||
from numba.np import numpy_support
|
||||
|
||||
|
||||
def _compute_last_ind(dim_size, index_const):
|
||||
if index_const > 0:
|
||||
return dim_size - index_const
|
||||
else:
|
||||
return dim_size
|
||||
|
||||
class StencilPass(object):
|
||||
def __init__(self, func_ir, typemap, calltypes, array_analysis, typingctx,
|
||||
targetctx, flags):
|
||||
self.func_ir = func_ir
|
||||
self.typemap = typemap
|
||||
self.calltypes = calltypes
|
||||
self.array_analysis = array_analysis
|
||||
self.typingctx = typingctx
|
||||
self.targetctx = targetctx
|
||||
self.flags = flags
|
||||
|
||||
def run(self):
|
||||
""" Finds all calls to StencilFuncs in the IR and converts them to parfor.
|
||||
"""
|
||||
from numba.stencils.stencil import StencilFunc
|
||||
|
||||
# Get all the calls in the function IR.
|
||||
call_table, _ = get_call_table(self.func_ir.blocks)
|
||||
stencil_calls = []
|
||||
stencil_dict = {}
|
||||
for call_varname, call_list in call_table.items():
|
||||
for one_call in call_list:
|
||||
if isinstance(one_call, StencilFunc):
|
||||
# Remember all calls to StencilFuncs.
|
||||
stencil_calls.append(call_varname)
|
||||
stencil_dict[call_varname] = one_call
|
||||
if not stencil_calls:
|
||||
return # return early if no stencil calls found
|
||||
|
||||
# find and transform stencil calls
|
||||
for label, block in self.func_ir.blocks.items():
|
||||
for i, stmt in reversed(list(enumerate(block.body))):
|
||||
# Found a call to a StencilFunc.
|
||||
if (isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op == 'call'
|
||||
and stmt.value.func.name in stencil_calls):
|
||||
kws = dict(stmt.value.kws)
|
||||
# Create dictionary of input argument number to
|
||||
# the argument itself.
|
||||
input_dict = {i: stmt.value.args[i] for i in
|
||||
range(len(stmt.value.args))}
|
||||
in_args = stmt.value.args
|
||||
arg_typemap = tuple(self.typemap[i.name] for i in in_args)
|
||||
for arg_type in arg_typemap:
|
||||
if isinstance(arg_type, types.BaseTuple):
|
||||
raise NumbaValueError("Tuple parameters not " \
|
||||
"supported for stencil " \
|
||||
"kernels in parallel=True " \
|
||||
"mode.")
|
||||
|
||||
out_arr = kws.get('out')
|
||||
|
||||
# Get the StencilFunc object corresponding to this call.
|
||||
sf = stencil_dict[stmt.value.func.name]
|
||||
stencil_ir, rt, arg_to_arr_dict = get_stencil_ir(sf,
|
||||
self.typingctx, arg_typemap,
|
||||
block.scope, block.loc, input_dict,
|
||||
self.typemap, self.calltypes)
|
||||
index_offsets = sf.options.get('index_offsets', None)
|
||||
gen_nodes = self._mk_stencil_parfor(label, in_args, out_arr,
|
||||
stencil_ir, index_offsets, stmt.target, rt, sf,
|
||||
arg_to_arr_dict)
|
||||
block.body = block.body[:i] + gen_nodes + block.body[i+1:]
|
||||
# Found a call to a stencil via numba.stencil().
|
||||
elif (isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op == 'call'
|
||||
and guard(find_callname, self.func_ir, stmt.value)
|
||||
== ('stencil', 'numba')):
|
||||
# remove dummy stencil() call
|
||||
stmt.value = ir.Const(0, stmt.loc)
|
||||
|
||||
def replace_return_with_setitem(self, blocks, exit_value_var,
|
||||
parfor_body_exit_label):
|
||||
"""
|
||||
Find return statements in the IR and replace them with a SetItem
|
||||
call of the value "returned" by the kernel into the result array.
|
||||
Returns the block labels that contained return statements.
|
||||
"""
|
||||
for label, block in blocks.items():
|
||||
scope = block.scope
|
||||
loc = block.loc
|
||||
new_body = []
|
||||
for stmt in block.body:
|
||||
if isinstance(stmt, ir.Return):
|
||||
# previous stmt should have been a cast
|
||||
prev_stmt = new_body.pop()
|
||||
assert (isinstance(prev_stmt, ir.Assign)
|
||||
and isinstance(prev_stmt.value, ir.Expr)
|
||||
and prev_stmt.value.op == 'cast')
|
||||
|
||||
new_body.append(ir.Assign(prev_stmt.value.value, exit_value_var, loc))
|
||||
new_body.append(ir.Jump(parfor_body_exit_label, loc))
|
||||
else:
|
||||
new_body.append(stmt)
|
||||
block.body = new_body
|
||||
|
||||
def _mk_stencil_parfor(self, label, in_args, out_arr, stencil_ir,
|
||||
index_offsets, target, return_type, stencil_func,
|
||||
arg_to_arr_dict):
|
||||
""" Converts a set of stencil kernel blocks to a parfor.
|
||||
"""
|
||||
gen_nodes = []
|
||||
stencil_blocks = stencil_ir.blocks
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("_mk_stencil_parfor", label, in_args, out_arr, index_offsets,
|
||||
return_type, stencil_func, stencil_blocks)
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
in_arr = in_args[0]
|
||||
# run copy propagate to replace in_args copies (e.g. a = A)
|
||||
in_arr_typ = self.typemap[in_arr.name]
|
||||
in_cps, out_cps = ir_utils.copy_propagate(stencil_blocks, self.typemap)
|
||||
name_var_table = ir_utils.get_name_var_table(stencil_blocks)
|
||||
|
||||
ir_utils.apply_copy_propagate(
|
||||
stencil_blocks,
|
||||
in_cps,
|
||||
name_var_table,
|
||||
self.typemap,
|
||||
self.calltypes)
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after copy_propagate")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
ir_utils.remove_dead(stencil_blocks, self.func_ir.arg_names, stencil_ir,
|
||||
self.typemap)
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after removing dead code")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
# create parfor vars
|
||||
ndims = self.typemap[in_arr.name].ndim
|
||||
scope = in_arr.scope
|
||||
loc = in_arr.loc
|
||||
parfor_vars = []
|
||||
for i in range(ndims):
|
||||
parfor_var = ir.Var(scope, mk_unique_var(
|
||||
"$parfor_index_var"), loc)
|
||||
self.typemap[parfor_var.name] = types.intp
|
||||
parfor_vars.append(parfor_var)
|
||||
|
||||
start_lengths, end_lengths = self._replace_stencil_accesses(
|
||||
stencil_ir, parfor_vars, in_args, index_offsets, stencil_func,
|
||||
arg_to_arr_dict)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after replace stencil accesses")
|
||||
print("start_lengths:", start_lengths)
|
||||
print("end_lengths:", end_lengths)
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
# create parfor loop nests
|
||||
loopnests = []
|
||||
equiv_set = self.array_analysis.get_equiv_set(label)
|
||||
in_arr_dim_sizes = equiv_set.get_shape(in_arr)
|
||||
|
||||
assert ndims == len(in_arr_dim_sizes)
|
||||
start_inds = []
|
||||
last_inds = []
|
||||
for i in range(ndims):
|
||||
last_ind = self._get_stencil_last_ind(in_arr_dim_sizes[i],
|
||||
end_lengths[i], gen_nodes, scope, loc)
|
||||
start_ind = self._get_stencil_start_ind(
|
||||
start_lengths[i], gen_nodes, scope, loc)
|
||||
start_inds.append(start_ind)
|
||||
last_inds.append(last_ind)
|
||||
# start from stencil size to avoid invalid array access
|
||||
loopnests.append(numba.parfors.parfor.LoopNest(parfor_vars[i],
|
||||
start_ind, last_ind, 1))
|
||||
|
||||
# We have to guarantee that the exit block has maximum label and that
|
||||
# there's only one exit block for the parfor body.
|
||||
# So, all return statements will change to jump to the parfor exit block.
|
||||
parfor_body_exit_label = max(stencil_blocks.keys()) + 1
|
||||
stencil_blocks[parfor_body_exit_label] = ir.Block(scope, loc)
|
||||
exit_value_var = ir.Var(scope, mk_unique_var("$parfor_exit_value"), loc)
|
||||
self.typemap[exit_value_var.name] = return_type.dtype
|
||||
|
||||
# create parfor index var
|
||||
for_replacing_ret = []
|
||||
if ndims == 1:
|
||||
parfor_ind_var = parfor_vars[0]
|
||||
else:
|
||||
parfor_ind_var = ir.Var(scope, mk_unique_var(
|
||||
"$parfor_index_tuple_var"), loc)
|
||||
self.typemap[parfor_ind_var.name] = types.containers.UniTuple(
|
||||
types.intp, ndims)
|
||||
tuple_call = ir.Expr.build_tuple(parfor_vars, loc)
|
||||
tuple_assign = ir.Assign(tuple_call, parfor_ind_var, loc)
|
||||
for_replacing_ret.append(tuple_assign)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after creating parfor index var")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
# empty init block
|
||||
init_block = ir.Block(scope, loc)
|
||||
if out_arr is None:
|
||||
in_arr_typ = self.typemap[in_arr.name]
|
||||
|
||||
shape_name = ir_utils.mk_unique_var("in_arr_shape")
|
||||
shape_var = ir.Var(scope, shape_name, loc)
|
||||
shape_getattr = ir.Expr.getattr(in_arr, "shape", loc)
|
||||
self.typemap[shape_name] = types.containers.UniTuple(types.intp,
|
||||
in_arr_typ.ndim)
|
||||
init_block.body.extend([ir.Assign(shape_getattr, shape_var, loc)])
|
||||
|
||||
zero_name = ir_utils.mk_unique_var("zero_val")
|
||||
zero_var = ir.Var(scope, zero_name, loc)
|
||||
if "cval" in stencil_func.options:
|
||||
cval = stencil_func.options["cval"]
|
||||
# TODO: Loosen this restriction to adhere to casting rules.
|
||||
cval_ty = typing.typeof.typeof(cval)
|
||||
if not self.typingctx.can_convert(cval_ty, return_type.dtype):
|
||||
raise NumbaValueError("cval type does not match stencil " \
|
||||
"return type.")
|
||||
|
||||
temp2 = return_type.dtype(cval)
|
||||
else:
|
||||
temp2 = return_type.dtype(0)
|
||||
full_const = ir.Const(temp2, loc)
|
||||
self.typemap[zero_name] = return_type.dtype
|
||||
init_block.body.extend([ir.Assign(full_const, zero_var, loc)])
|
||||
|
||||
so_name = ir_utils.mk_unique_var("stencil_output")
|
||||
out_arr = ir.Var(scope, so_name, loc)
|
||||
self.typemap[out_arr.name] = numba.core.types.npytypes.Array(
|
||||
return_type.dtype,
|
||||
in_arr_typ.ndim,
|
||||
in_arr_typ.layout)
|
||||
dtype_g_np_var = ir.Var(scope, mk_unique_var("$np_g_var"), loc)
|
||||
self.typemap[dtype_g_np_var.name] = types.misc.Module(np)
|
||||
dtype_g_np = ir.Global('np', np, loc)
|
||||
dtype_g_np_assign = ir.Assign(dtype_g_np, dtype_g_np_var, loc)
|
||||
init_block.body.append(dtype_g_np_assign)
|
||||
|
||||
return_type_name = numpy_support.as_dtype(
|
||||
return_type.dtype).type.__name__
|
||||
if return_type_name == 'bool':
|
||||
return_type_name = 'bool_'
|
||||
dtype_np_attr_call = ir.Expr.getattr(dtype_g_np_var, return_type_name, loc)
|
||||
dtype_attr_var = ir.Var(scope, mk_unique_var("$np_attr_attr"), loc)
|
||||
self.typemap[dtype_attr_var.name] = types.functions.NumberClass(return_type.dtype)
|
||||
dtype_attr_assign = ir.Assign(dtype_np_attr_call, dtype_attr_var, loc)
|
||||
init_block.body.append(dtype_attr_assign)
|
||||
|
||||
stmts = ir_utils.gen_np_call("empty",
|
||||
np.empty,
|
||||
out_arr,
|
||||
[shape_var, dtype_attr_var],
|
||||
self.typingctx,
|
||||
self.typemap,
|
||||
self.calltypes)
|
||||
# ------------------
|
||||
# Generate the code to fill just the border with zero_var.
|
||||
|
||||
# Generate a none var to use in slicing.
|
||||
none_var = ir.Var(scope, mk_unique_var("$none_var"), loc)
|
||||
none_assign = ir.Assign(ir.Const(None, loc), none_var, loc)
|
||||
stmts.append(none_assign)
|
||||
self.typemap[none_var.name] = types.none
|
||||
# Generate a zero var to use in slicing.
|
||||
zero_index_var = ir.Var(scope, mk_unique_var("$zero_index_var"), loc)
|
||||
zero_index_assign = ir.Assign(ir.Const(0, loc), zero_index_var, loc)
|
||||
stmts.append(zero_index_assign)
|
||||
self.typemap[zero_index_var.name] = types.intp
|
||||
# Generate generic ":" slice.
|
||||
# ---- Generate var to hold slice func var.
|
||||
slice_func_var = ir.Var(scope, mk_unique_var("$slice_func_var"), loc)
|
||||
slice_fn_ty = self.typingctx.resolve_value_type(slice)
|
||||
self.typemap[slice_func_var.name] = slice_fn_ty
|
||||
slice_g = ir.Global('slice', slice, loc)
|
||||
slice_assign = ir.Assign(slice_g, slice_func_var, loc)
|
||||
stmts.append(slice_assign)
|
||||
# ---- Generate call to slice func.
|
||||
sig = self.typingctx.resolve_function_type(slice_fn_ty,
|
||||
(types.none,) * 2,
|
||||
{})
|
||||
slice_callexpr = ir.Expr.call(func=slice_func_var,
|
||||
args=(none_var, none_var),
|
||||
kws=(),
|
||||
loc=loc)
|
||||
self.calltypes[slice_callexpr] = sig
|
||||
# ---- Generate slice var
|
||||
slice_var = ir.Var(scope, mk_unique_var("$slice"), loc)
|
||||
self.typemap[slice_var.name] = types.slice2_type
|
||||
slice_assign = ir.Assign(slice_callexpr, slice_var, loc)
|
||||
stmts.append(slice_assign)
|
||||
|
||||
def handle_border(slice_fn_ty,
|
||||
dim,
|
||||
scope,
|
||||
loc,
|
||||
slice_func_var,
|
||||
stmts,
|
||||
border_inds,
|
||||
border_tuple_items,
|
||||
other_arg,
|
||||
other_first):
|
||||
# Handle the border for start or end of the index range.
|
||||
# ---- Generate call to slice func.
|
||||
sig = self.typingctx.resolve_function_type(
|
||||
slice_fn_ty,
|
||||
(types.intp,) * 2,
|
||||
{})
|
||||
si = border_inds[dim]
|
||||
assert(isinstance(si, (int, ir.Var)))
|
||||
si_var = ir.Var(scope, mk_unique_var("$border_ind"), loc)
|
||||
self.typemap[si_var.name] = types.intp
|
||||
if isinstance(si, int):
|
||||
si_assign = ir.Assign(ir.Const(si, loc), si_var, loc)
|
||||
else:
|
||||
si_assign = ir.Assign(si, si_var, loc)
|
||||
stmts.append(si_assign)
|
||||
|
||||
slice_callexpr = ir.Expr.call(
|
||||
func=slice_func_var,
|
||||
args=(other_arg, si_var) if other_first else (si_var, other_arg),
|
||||
kws=(),
|
||||
loc=loc)
|
||||
self.calltypes[slice_callexpr] = sig
|
||||
# ---- Generate slice var
|
||||
border_slice_var = ir.Var(scope, mk_unique_var("$slice"), loc)
|
||||
self.typemap[border_slice_var.name] = types.slice2_type
|
||||
slice_assign = ir.Assign(slice_callexpr, border_slice_var, loc)
|
||||
stmts.append(slice_assign)
|
||||
|
||||
border_tuple_items[dim] = border_slice_var
|
||||
border_ind_var = ir.Var(scope, mk_unique_var(
|
||||
"$border_index_tuple_var"), loc)
|
||||
self.typemap[border_ind_var.name] = types.containers.UniTuple(
|
||||
types.slice2_type, ndims)
|
||||
tuple_call = ir.Expr.build_tuple(border_tuple_items, loc)
|
||||
tuple_assign = ir.Assign(tuple_call, border_ind_var, loc)
|
||||
stmts.append(tuple_assign)
|
||||
|
||||
setitem_call = ir.SetItem(out_arr, border_ind_var, zero_var, loc)
|
||||
self.calltypes[setitem_call] = signature(
|
||||
types.none, self.typemap[out_arr.name],
|
||||
self.typemap[border_ind_var.name],
|
||||
self.typemap[out_arr.name].dtype
|
||||
)
|
||||
stmts.append(setitem_call)
|
||||
|
||||
# For each dimension, add setitem to set border values.
|
||||
for dim in range(in_arr_typ.ndim):
|
||||
# First, fill all entries with ":".
|
||||
start_tuple_items = [slice_var] * in_arr_typ.ndim
|
||||
last_tuple_items = [slice_var] * in_arr_typ.ndim
|
||||
|
||||
handle_border(slice_fn_ty,
|
||||
dim,
|
||||
scope,
|
||||
loc,
|
||||
slice_func_var,
|
||||
stmts,
|
||||
start_inds,
|
||||
start_tuple_items,
|
||||
zero_index_var,
|
||||
True)
|
||||
handle_border(slice_fn_ty,
|
||||
dim,
|
||||
scope,
|
||||
loc,
|
||||
slice_func_var,
|
||||
stmts,
|
||||
last_inds,
|
||||
last_tuple_items,
|
||||
in_arr_dim_sizes[dim],
|
||||
False)
|
||||
|
||||
# ------------------
|
||||
|
||||
equiv_set.insert_equiv(out_arr, in_arr_dim_sizes)
|
||||
init_block.body.extend(stmts)
|
||||
else: # out is present
|
||||
if "cval" in stencil_func.options: # do out[:] = cval
|
||||
cval = stencil_func.options["cval"]
|
||||
# TODO: Loosen this restriction to adhere to casting rules.
|
||||
cval_ty = typing.typeof.typeof(cval)
|
||||
if not self.typingctx.can_convert(cval_ty, return_type.dtype):
|
||||
msg = "cval type does not match stencil return type."
|
||||
raise NumbaValueError(msg)
|
||||
|
||||
# get slice ref
|
||||
slice_var = ir.Var(scope, mk_unique_var("$py_g_var"), loc)
|
||||
slice_fn_ty = self.typingctx.resolve_value_type(slice)
|
||||
self.typemap[slice_var.name] = slice_fn_ty
|
||||
slice_g = ir.Global('slice', slice, loc)
|
||||
slice_assigned = ir.Assign(slice_g, slice_var, loc)
|
||||
init_block.body.append(slice_assigned)
|
||||
|
||||
sig = self.typingctx.resolve_function_type(slice_fn_ty,
|
||||
(types.none,) * 2,
|
||||
{})
|
||||
|
||||
callexpr = ir.Expr.call(func=slice_var, args=(), kws=(),
|
||||
loc=loc)
|
||||
|
||||
self.calltypes[callexpr] = sig
|
||||
slice_inst_var = ir.Var(scope, mk_unique_var("$slice_inst"),
|
||||
loc)
|
||||
self.typemap[slice_inst_var.name] = types.slice2_type
|
||||
slice_assign = ir.Assign(callexpr, slice_inst_var, loc)
|
||||
init_block.body.append(slice_assign)
|
||||
|
||||
# get const val for cval
|
||||
cval_const_val = ir.Const(return_type.dtype(cval), loc)
|
||||
cval_const_var = ir.Var(scope, mk_unique_var("$cval_const"),
|
||||
loc)
|
||||
self.typemap[cval_const_var.name] = return_type.dtype
|
||||
cval_const_assign = ir.Assign(cval_const_val,
|
||||
cval_const_var, loc)
|
||||
init_block.body.append(cval_const_assign)
|
||||
|
||||
# do setitem on `out` array
|
||||
setitemexpr = ir.StaticSetItem(out_arr, slice(None, None),
|
||||
slice_inst_var, cval_const_var,
|
||||
loc)
|
||||
init_block.body.append(setitemexpr)
|
||||
sig = signature(types.none, self.typemap[out_arr.name],
|
||||
self.typemap[slice_inst_var.name],
|
||||
self.typemap[out_arr.name].dtype)
|
||||
self.calltypes[setitemexpr] = sig
|
||||
|
||||
|
||||
self.replace_return_with_setitem(stencil_blocks, exit_value_var,
|
||||
parfor_body_exit_label)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after replacing return")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
setitem_call = ir.SetItem(out_arr, parfor_ind_var, exit_value_var, loc)
|
||||
self.calltypes[setitem_call] = signature(
|
||||
types.none, self.typemap[out_arr.name],
|
||||
self.typemap[parfor_ind_var.name],
|
||||
self.typemap[out_arr.name].dtype
|
||||
)
|
||||
stencil_blocks[parfor_body_exit_label].body.extend(for_replacing_ret)
|
||||
stencil_blocks[parfor_body_exit_label].body.append(setitem_call)
|
||||
|
||||
# simplify CFG of parfor body (exit block could be simplified often)
|
||||
# add dummy return to enable CFG
|
||||
dummy_loc = ir.Loc("stencilparfor_dummy", -1)
|
||||
ret_const_var = ir.Var(scope, mk_unique_var("$cval_const"), dummy_loc)
|
||||
cval_const_assign = ir.Assign(ir.Const(0, loc=dummy_loc), ret_const_var, dummy_loc)
|
||||
stencil_blocks[parfor_body_exit_label].body.append(cval_const_assign)
|
||||
|
||||
stencil_blocks[parfor_body_exit_label].body.append(
|
||||
ir.Return(ret_const_var, dummy_loc),
|
||||
)
|
||||
stencil_blocks = ir_utils.simplify_CFG(stencil_blocks)
|
||||
stencil_blocks[max(stencil_blocks.keys())].body.pop()
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("stencil_blocks after adding SetItem")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
pattern = ('stencil', [start_lengths, end_lengths])
|
||||
parfor = numba.parfors.parfor.Parfor(loopnests, init_block, stencil_blocks,
|
||||
loc, parfor_ind_var, equiv_set, pattern, self.flags)
|
||||
gen_nodes.append(parfor)
|
||||
gen_nodes.append(ir.Assign(out_arr, target, loc))
|
||||
return gen_nodes
|
||||
|
||||
def _get_stencil_last_ind(self, dim_size, end_length, gen_nodes, scope,
|
||||
loc):
|
||||
last_ind = dim_size
|
||||
if end_length != 0:
|
||||
# set last index to size minus stencil size to avoid invalid
|
||||
# memory access
|
||||
index_const = ir.Var(scope, mk_unique_var("stencil_const_var"),
|
||||
loc)
|
||||
self.typemap[index_const.name] = types.intp
|
||||
if isinstance(end_length, numbers.Number):
|
||||
const_assign = ir.Assign(ir.Const(end_length, loc),
|
||||
index_const, loc)
|
||||
else:
|
||||
const_assign = ir.Assign(end_length, index_const, loc)
|
||||
|
||||
gen_nodes.append(const_assign)
|
||||
last_ind = ir.Var(scope, mk_unique_var("last_ind"), loc)
|
||||
self.typemap[last_ind.name] = types.intp
|
||||
|
||||
g_var = ir.Var(scope, mk_unique_var("compute_last_ind_var"), loc)
|
||||
check_func = numba.njit(_compute_last_ind)
|
||||
func_typ = types.functions.Dispatcher(check_func)
|
||||
self.typemap[g_var.name] = func_typ
|
||||
g_obj = ir.Global("_compute_last_ind", check_func, loc)
|
||||
g_assign = ir.Assign(g_obj, g_var, loc)
|
||||
gen_nodes.append(g_assign)
|
||||
index_call = ir.Expr.call(g_var, [dim_size, index_const], (), loc)
|
||||
self.calltypes[index_call] = func_typ.get_call_type(
|
||||
self.typingctx, [types.intp, types.intp], {})
|
||||
index_assign = ir.Assign(index_call, last_ind, loc)
|
||||
gen_nodes.append(index_assign)
|
||||
|
||||
return last_ind
|
||||
|
||||
def _get_stencil_start_ind(self, start_length, gen_nodes, scope, loc):
|
||||
if isinstance(start_length, int):
|
||||
return abs(min(start_length, 0))
|
||||
def get_start_ind(s_length):
|
||||
return abs(min(s_length, 0))
|
||||
f_ir = compile_to_numba_ir(get_start_ind, {}, self.typingctx,
|
||||
self.targetctx, (types.intp,), self.typemap,
|
||||
self.calltypes)
|
||||
assert len(f_ir.blocks) == 1
|
||||
block = f_ir.blocks.popitem()[1]
|
||||
replace_arg_nodes(block, [start_length])
|
||||
gen_nodes += block.body[:-2]
|
||||
ret_var = block.body[-2].value.value
|
||||
return ret_var
|
||||
|
||||
def _replace_stencil_accesses(self, stencil_ir, parfor_vars, in_args,
|
||||
index_offsets, stencil_func, arg_to_arr_dict):
|
||||
""" Convert relative indexing in the stencil kernel to standard indexing
|
||||
by adding the loop index variables to the corresponding dimensions
|
||||
of the array index tuples.
|
||||
"""
|
||||
stencil_blocks = stencil_ir.blocks
|
||||
in_arr = in_args[0]
|
||||
in_arg_names = [x.name for x in in_args]
|
||||
|
||||
if "standard_indexing" in stencil_func.options:
|
||||
for x in stencil_func.options["standard_indexing"]:
|
||||
if x not in arg_to_arr_dict:
|
||||
raise NumbaValueError("Standard indexing requested for " \
|
||||
"an array name not present in the " \
|
||||
"stencil kernel definition.")
|
||||
standard_indexed = [arg_to_arr_dict[x] for x in
|
||||
stencil_func.options["standard_indexing"]]
|
||||
else:
|
||||
standard_indexed = []
|
||||
|
||||
if in_arr.name in standard_indexed:
|
||||
raise NumbaValueError("The first argument to a stencil kernel " \
|
||||
"must use relative indexing, not standard " \
|
||||
"indexing.")
|
||||
|
||||
ndims = self.typemap[in_arr.name].ndim
|
||||
scope = in_arr.scope
|
||||
loc = in_arr.loc
|
||||
# replace access indices, find access lengths in each dimension
|
||||
need_to_calc_kernel = stencil_func.neighborhood is None
|
||||
|
||||
# If we need to infer the kernel size then initialize the minimum and
|
||||
# maximum seen indices for each dimension to 0. If we already have
|
||||
# the neighborhood calculated then just convert from neighborhood format
|
||||
# to the separate start and end lengths format used here.
|
||||
if need_to_calc_kernel:
|
||||
start_lengths = ndims*[0]
|
||||
end_lengths = ndims*[0]
|
||||
else:
|
||||
start_lengths = [x[0] for x in stencil_func.neighborhood]
|
||||
end_lengths = [x[1] for x in stencil_func.neighborhood]
|
||||
|
||||
# Get all the tuples defined in the stencil blocks.
|
||||
tuple_table = ir_utils.get_tuple_table(stencil_blocks)
|
||||
|
||||
found_relative_index = False
|
||||
|
||||
# For all blocks in the stencil kernel...
|
||||
for label, block in stencil_blocks.items():
|
||||
new_body = []
|
||||
# For all statements in those blocks...
|
||||
for stmt in block.body:
|
||||
# Reject assignments to input arrays.
|
||||
if ((isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op in ['setitem', 'static_setitem']
|
||||
and stmt.value.value.name in in_arg_names) or
|
||||
((isinstance(stmt, ir.SetItem) or
|
||||
isinstance(stmt, ir.StaticSetItem))
|
||||
and stmt.target.name in in_arg_names)):
|
||||
raise NumbaValueError("Assignments to arrays passed to " \
|
||||
"stencil kernels is not allowed.")
|
||||
# We found a getitem for some array. If that array is an input
|
||||
# array and isn't in the list of standard indexed arrays then
|
||||
# update min and max seen indices if we are inferring the
|
||||
# kernel size and create a new tuple where the relative offsets
|
||||
# are added to loop index vars to get standard indexing.
|
||||
if (isinstance(stmt, ir.Assign)
|
||||
and isinstance(stmt.value, ir.Expr)
|
||||
and stmt.value.op in ['static_getitem', 'getitem']
|
||||
and stmt.value.value.name in in_arg_names
|
||||
and stmt.value.value.name not in standard_indexed):
|
||||
index_list = stmt.value.index
|
||||
# handle 1D case
|
||||
if ndims == 1:
|
||||
index_list = [index_list]
|
||||
else:
|
||||
if hasattr(index_list, 'name') and index_list.name in tuple_table:
|
||||
index_list = tuple_table[index_list.name]
|
||||
# indices can be inferred as constant in simple expressions
|
||||
# like -c where c is constant
|
||||
# handled here since this is a common stencil index pattern
|
||||
stencil_ir._definitions = ir_utils.build_definitions(stencil_blocks)
|
||||
index_list = [_get_const_index_expr(
|
||||
stencil_ir, self.func_ir, v) for v in index_list]
|
||||
if index_offsets:
|
||||
index_list = self._add_index_offsets(index_list,
|
||||
list(index_offsets), new_body, scope, loc)
|
||||
|
||||
# update min and max indices
|
||||
if need_to_calc_kernel:
|
||||
# all indices should be integer to be able to calculate
|
||||
# neighborhood automatically
|
||||
if (isinstance(index_list, ir.Var) or
|
||||
any([not isinstance(v, int) for v in index_list])):
|
||||
raise NumbaValueError("Variable stencil index " \
|
||||
"only possible with known " \
|
||||
"neighborhood")
|
||||
start_lengths = list(map(min, start_lengths,
|
||||
index_list))
|
||||
end_lengths = list(map(max, end_lengths, index_list))
|
||||
found_relative_index = True
|
||||
|
||||
# update access indices
|
||||
index_vars = self._add_index_offsets(parfor_vars,
|
||||
list(index_list), new_body, scope, loc)
|
||||
|
||||
# new access index tuple
|
||||
if ndims == 1:
|
||||
ind_var = index_vars[0]
|
||||
else:
|
||||
ind_var = ir.Var(scope, mk_unique_var(
|
||||
"$parfor_index_ind_var"), loc)
|
||||
self.typemap[ind_var.name] = types.containers.UniTuple(
|
||||
types.intp, ndims)
|
||||
tuple_call = ir.Expr.build_tuple(index_vars, loc)
|
||||
tuple_assign = ir.Assign(tuple_call, ind_var, loc)
|
||||
new_body.append(tuple_assign)
|
||||
|
||||
# getitem return type is scalar if all indices are integer
|
||||
if all([self.typemap[v.name] == types.intp
|
||||
for v in index_vars]):
|
||||
getitem_return_typ = self.typemap[
|
||||
stmt.value.value.name].dtype
|
||||
else:
|
||||
# getitem returns an array
|
||||
getitem_return_typ = self.typemap[stmt.value.value.name]
|
||||
# new getitem with the new index var
|
||||
getitem_call = ir.Expr.getitem(stmt.value.value, ind_var,
|
||||
loc)
|
||||
self.calltypes[getitem_call] = signature(
|
||||
getitem_return_typ,
|
||||
self.typemap[stmt.value.value.name],
|
||||
self.typemap[ind_var.name])
|
||||
stmt.value = getitem_call
|
||||
|
||||
new_body.append(stmt)
|
||||
block.body = new_body
|
||||
if need_to_calc_kernel and not found_relative_index:
|
||||
raise NumbaValueError("Stencil kernel with no accesses to " \
|
||||
"relatively indexed arrays.")
|
||||
|
||||
return start_lengths, end_lengths
|
||||
|
||||
def _add_index_offsets(self, index_list, index_offsets, new_body,
|
||||
scope, loc):
|
||||
""" Does the actual work of adding loop index variables to the
|
||||
relative index constants or variables.
|
||||
"""
|
||||
assert len(index_list) == len(index_offsets)
|
||||
|
||||
# shortcut if all values are integer
|
||||
if all([isinstance(v, int) for v in index_list+index_offsets]):
|
||||
# add offsets in all dimensions
|
||||
return list(map(add, index_list, index_offsets))
|
||||
|
||||
out_nodes = []
|
||||
index_vars = []
|
||||
for i in range(len(index_list)):
|
||||
# new_index = old_index + offset
|
||||
old_index_var = index_list[i]
|
||||
if isinstance(old_index_var, int):
|
||||
old_index_var = ir.Var(scope,
|
||||
mk_unique_var("old_index_var"), loc)
|
||||
self.typemap[old_index_var.name] = types.intp
|
||||
const_assign = ir.Assign(ir.Const(index_list[i], loc),
|
||||
old_index_var, loc)
|
||||
out_nodes.append(const_assign)
|
||||
|
||||
offset_var = index_offsets[i]
|
||||
if isinstance(offset_var, int):
|
||||
offset_var = ir.Var(scope,
|
||||
mk_unique_var("offset_var"), loc)
|
||||
self.typemap[offset_var.name] = types.intp
|
||||
const_assign = ir.Assign(ir.Const(index_offsets[i], loc),
|
||||
offset_var, loc)
|
||||
out_nodes.append(const_assign)
|
||||
|
||||
if (isinstance(old_index_var, slice)
|
||||
or isinstance(self.typemap[old_index_var.name],
|
||||
types.misc.SliceType)):
|
||||
# only one arg can be slice
|
||||
assert self.typemap[offset_var.name] == types.intp
|
||||
index_var = self._add_offset_to_slice(old_index_var, offset_var,
|
||||
out_nodes, scope, loc)
|
||||
index_vars.append(index_var)
|
||||
continue
|
||||
|
||||
if (isinstance(offset_var, slice)
|
||||
or isinstance(self.typemap[offset_var.name],
|
||||
types.misc.SliceType)):
|
||||
# only one arg can be slice
|
||||
assert self.typemap[old_index_var.name] == types.intp
|
||||
index_var = self._add_offset_to_slice(offset_var, old_index_var,
|
||||
out_nodes, scope, loc)
|
||||
index_vars.append(index_var)
|
||||
continue
|
||||
|
||||
index_var = ir.Var(scope,
|
||||
mk_unique_var("offset_stencil_index"), loc)
|
||||
self.typemap[index_var.name] = types.intp
|
||||
index_call = ir.Expr.binop(operator.add, old_index_var,
|
||||
offset_var, loc)
|
||||
self.calltypes[index_call] = self.typingctx.resolve_function_type(
|
||||
operator.add, (types.intp, types.intp), {})
|
||||
index_assign = ir.Assign(index_call, index_var, loc)
|
||||
out_nodes.append(index_assign)
|
||||
index_vars.append(index_var)
|
||||
|
||||
new_body.extend(out_nodes)
|
||||
return index_vars
|
||||
|
||||
def _add_offset_to_slice(self, slice_var, offset_var, out_nodes, scope,
|
||||
loc):
|
||||
if isinstance(slice_var, slice):
|
||||
f_text = """def f(offset):
|
||||
return slice({} + offset, {} + offset)
|
||||
""".format(slice_var.start, slice_var.stop)
|
||||
loc = {}
|
||||
exec(f_text, {}, loc)
|
||||
f = loc['f']
|
||||
args = [offset_var]
|
||||
arg_typs = (types.intp,)
|
||||
else:
|
||||
def f(old_slice, offset):
|
||||
return slice(old_slice.start + offset, old_slice.stop + offset)
|
||||
args = [slice_var, offset_var]
|
||||
slice_type = self.typemap[slice_var.name]
|
||||
arg_typs = (slice_type, types.intp,)
|
||||
_globals = self.func_ir.func_id.func.__globals__
|
||||
f_ir = compile_to_numba_ir(f, _globals, self.typingctx, self.targetctx,
|
||||
arg_typs, self.typemap, self.calltypes)
|
||||
_, block = f_ir.blocks.popitem()
|
||||
replace_arg_nodes(block, args)
|
||||
new_index = block.body[-2].value.value
|
||||
out_nodes.extend(block.body[:-2]) # ignore return nodes
|
||||
return new_index
|
||||
|
||||
def get_stencil_ir(sf, typingctx, args, scope, loc, input_dict, typemap,
|
||||
calltypes):
|
||||
"""get typed IR from stencil bytecode
|
||||
"""
|
||||
from numba.core.cpu import CPUContext
|
||||
from numba.core.registry import cpu_target
|
||||
from numba.core.annotations import type_annotations
|
||||
from numba.core.typed_passes import type_inference_stage
|
||||
|
||||
# get untyped IR
|
||||
stencil_func_ir = sf.kernel_ir.copy()
|
||||
# copy the IR nodes to avoid changing IR in the StencilFunc object
|
||||
stencil_blocks = copy.deepcopy(stencil_func_ir.blocks)
|
||||
stencil_func_ir.blocks = stencil_blocks
|
||||
|
||||
name_var_table = ir_utils.get_name_var_table(stencil_func_ir.blocks)
|
||||
if "out" in name_var_table:
|
||||
raise NumbaValueError("Cannot use the reserved word 'out' in stencil " \
|
||||
"kernels.")
|
||||
|
||||
# get typed IR with a dummy pipeline (similar to test_parfors.py)
|
||||
from numba.core.registry import cpu_target
|
||||
targetctx = cpu_target.target_context
|
||||
|
||||
tp = DummyPipeline(typingctx, targetctx, args, stencil_func_ir)
|
||||
|
||||
rewrites.rewrite_registry.apply('before-inference', tp.state)
|
||||
|
||||
tp.state.typemap, tp.state.return_type, tp.state.calltypes, _ = type_inference_stage(
|
||||
tp.state.typingctx, tp.state.targetctx, tp.state.func_ir,
|
||||
tp.state.args, None)
|
||||
|
||||
type_annotations.TypeAnnotation(
|
||||
func_ir=tp.state.func_ir,
|
||||
typemap=tp.state.typemap,
|
||||
calltypes=tp.state.calltypes,
|
||||
lifted=(),
|
||||
lifted_from=None,
|
||||
args=tp.state.args,
|
||||
return_type=tp.state.return_type,
|
||||
html_output=config.HTML)
|
||||
|
||||
# make block labels unique
|
||||
stencil_blocks = ir_utils.add_offset_to_labels(stencil_blocks,
|
||||
ir_utils.next_label())
|
||||
min_label = min(stencil_blocks.keys())
|
||||
max_label = max(stencil_blocks.keys())
|
||||
ir_utils._the_max_label.update(max_label)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("Initial stencil_blocks")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
# rename variables,
|
||||
var_dict = {}
|
||||
for v, typ in tp.state.typemap.items():
|
||||
new_var = ir.Var(scope, mk_unique_var(v), loc)
|
||||
var_dict[v] = new_var
|
||||
typemap[new_var.name] = typ # add new var type for overall function
|
||||
ir_utils.replace_vars(stencil_blocks, var_dict)
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("After replace_vars")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
# add call types to overall function
|
||||
for call, call_typ in tp.state.calltypes.items():
|
||||
calltypes[call] = call_typ
|
||||
|
||||
arg_to_arr_dict = {}
|
||||
# replace arg with arr
|
||||
for block in stencil_blocks.values():
|
||||
for stmt in block.body:
|
||||
if isinstance(stmt, ir.Assign) and isinstance(stmt.value, ir.Arg):
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("input_dict", input_dict, stmt.value.index,
|
||||
stmt.value.name, stmt.value.index in input_dict)
|
||||
arg_to_arr_dict[stmt.value.name] = input_dict[stmt.value.index].name
|
||||
stmt.value = input_dict[stmt.value.index]
|
||||
|
||||
if config.DEBUG_ARRAY_OPT >= 1:
|
||||
print("arg_to_arr_dict", arg_to_arr_dict)
|
||||
print("After replace arg with arr")
|
||||
ir_utils.dump_blocks(stencil_blocks)
|
||||
|
||||
ir_utils.remove_dels(stencil_blocks)
|
||||
stencil_func_ir.blocks = stencil_blocks
|
||||
return stencil_func_ir, sf.get_return_type(args)[0], arg_to_arr_dict
|
||||
|
||||
class DummyPipeline(object):
|
||||
def __init__(self, typingctx, targetctx, args, f_ir):
|
||||
from numba.core.compiler import StateDict
|
||||
self.state = StateDict()
|
||||
self.state.typingctx = typingctx
|
||||
self.state.targetctx = targetctx
|
||||
self.state.args = args
|
||||
self.state.func_ir = f_ir
|
||||
self.state.typemap = None
|
||||
self.state.return_type = None
|
||||
self.state.calltypes = None
|
||||
|
||||
|
||||
def _get_const_index_expr(stencil_ir, func_ir, index_var):
|
||||
"""
|
||||
infer index_var as constant if it is of a expression form like c-1 where c
|
||||
is a constant in the outer function.
|
||||
index_var is assumed to be inside stencil kernel
|
||||
"""
|
||||
const_val = guard(
|
||||
_get_const_index_expr_inner, stencil_ir, func_ir, index_var)
|
||||
if const_val is not None:
|
||||
return const_val
|
||||
return index_var
|
||||
|
||||
def _get_const_index_expr_inner(stencil_ir, func_ir, index_var):
|
||||
"""inner constant inference function that calls constant, unary and binary
|
||||
cases.
|
||||
"""
|
||||
require(isinstance(index_var, ir.Var))
|
||||
# case where the index is a const itself in outer function
|
||||
var_const = guard(_get_const_two_irs, stencil_ir, func_ir, index_var)
|
||||
if var_const is not None:
|
||||
return var_const
|
||||
# get index definition
|
||||
index_def = ir_utils.get_definition(stencil_ir, index_var)
|
||||
# match inner_var = unary(index_var)
|
||||
var_const = guard(
|
||||
_get_const_unary_expr, stencil_ir, func_ir, index_def)
|
||||
if var_const is not None:
|
||||
return var_const
|
||||
# match inner_var = arg1 + arg2
|
||||
var_const = guard(
|
||||
_get_const_binary_expr, stencil_ir, func_ir, index_def)
|
||||
if var_const is not None:
|
||||
return var_const
|
||||
raise GuardException
|
||||
|
||||
def _get_const_two_irs(ir1, ir2, var):
|
||||
"""get constant in either of two IRs if available
|
||||
otherwise, throw GuardException
|
||||
"""
|
||||
var_const = guard(find_const, ir1, var)
|
||||
if var_const is not None:
|
||||
return var_const
|
||||
var_const = guard(find_const, ir2, var)
|
||||
if var_const is not None:
|
||||
return var_const
|
||||
raise GuardException
|
||||
|
||||
def _get_const_unary_expr(stencil_ir, func_ir, index_def):
|
||||
"""evaluate constant unary expr if possible
|
||||
otherwise, raise GuardException
|
||||
"""
|
||||
require(isinstance(index_def, ir.Expr) and index_def.op == 'unary')
|
||||
inner_var = index_def.value
|
||||
# return -c as constant
|
||||
const_val = _get_const_index_expr_inner(stencil_ir, func_ir, inner_var)
|
||||
op = OPERATORS_TO_BUILTINS[index_def.fn]
|
||||
return eval("{}{}".format(op, const_val))
|
||||
|
||||
def _get_const_binary_expr(stencil_ir, func_ir, index_def):
|
||||
"""evaluate constant binary expr if possible
|
||||
otherwise, raise GuardException
|
||||
"""
|
||||
require(isinstance(index_def, ir.Expr) and index_def.op == 'binop')
|
||||
arg1 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.lhs)
|
||||
arg2 = _get_const_index_expr_inner(stencil_ir, func_ir, index_def.rhs)
|
||||
op = OPERATORS_TO_BUILTINS[index_def.fn]
|
||||
return eval("{}{}{}".format(arg1, op, arg2))
|
||||
Reference in New Issue
Block a user