blob: 23ce02e87c9471b1fdb92506d17eda7e78fba48f [file] [log] [blame]
#! /usr/bin/python
#
# Copyright (c) 2015 Advanced Micro Devices, Inc.
# All rights reserved.
#
# For use for simulation and test purposes only
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# Author: Steve Reinhardt
#
from __future__ import print_function
import sys, re
from m5.util import code_formatter
if len(sys.argv) != 4:
print("Error: need 3 args (file names)")
sys.exit(0)
header_code = code_formatter()
decoder_code = code_formatter()
exec_code = code_formatter()
###############
#
# Generate file prologs (includes etc.)
#
###############
header_code('''
#include "arch/hsail/insts/decl.hh"
#include "base/bitfield.hh"
#include "gpu-compute/hsail_code.hh"
#include "gpu-compute/wavefront.hh"
namespace HsailISA
{
''')
header_code.indent()
decoder_code('''
#include "arch/hsail/gpu_decoder.hh"
#include "arch/hsail/insts/branch.hh"
#include "arch/hsail/insts/decl.hh"
#include "arch/hsail/insts/gen_decl.hh"
#include "arch/hsail/insts/mem.hh"
#include "arch/hsail/insts/mem_impl.hh"
#include "gpu-compute/brig_object.hh"
namespace HsailISA
{
std::vector<GPUStaticInst*> Decoder::decodedInsts;
GPUStaticInst*
Decoder::decode(MachInst machInst)
{
using namespace Brig;
const BrigInstBase *ib = machInst.brigInstBase;
const BrigObject *obj = machInst.brigObj;
switch(ib->opcode) {
''')
decoder_code.indent()
decoder_code.indent()
exec_code('''
#include "arch/hsail/insts/gen_decl.hh"
#include "base/intmath.hh"
namespace HsailISA
{
''')
exec_code.indent()
###############
#
# Define code templates for class declarations (for header file)
#
###############
# Basic header template for an instruction stub.
header_template_stub = '''
class $class_name : public $base_class
{
public:
typedef $base_class Base;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Basic header template for an instruction with no template parameters.
header_template_nodt = '''
class $class_name : public $base_class
{
public:
typedef $base_class Base;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Basic header template for an instruction with a single DataType
# template parameter.
header_template_1dt = '''
template<typename DataType>
class $class_name : public $base_class<DataType>
{
public:
typedef $base_class<DataType> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
header_template_1dt_noexec = '''
template<typename DataType>
class $class_name : public $base_class<DataType>
{
public:
typedef $base_class<DataType> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
};
'''
# Same as header_template_1dt, except the base class has a second
# template parameter NumSrcOperands to allow a variable number of
# source operands. Note that since this is implemented with an array,
# it only works for instructions where all sources are of the same
# type (like most arithmetics).
header_template_1dt_varsrcs = '''
template<typename DataType>
class $class_name : public $base_class<DataType, $num_srcs>
{
public:
typedef $base_class<DataType, $num_srcs> Base;
typedef typename DataType::CType CType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
# Header template for instruction with two DataType template
# parameters, one for the dest and one for the source. This is used
# by compare and convert.
header_template_2dt = '''
template<typename DestDataType, class SrcDataType>
class $class_name : public $base_class<DestDataType, SrcDataType>
{
public:
typedef $base_class<DestDataType, SrcDataType> Base;
typedef typename DestDataType::CType DestCType;
typedef typename SrcDataType::CType SrcCType;
$class_name(const Brig::BrigInstBase *ib, const BrigObject *obj)
: Base(ib, obj, "$opcode")
{
}
void execute(GPUDynInstPtr gpuDynInst);
};
'''
header_templates = {
'ArithInst': header_template_1dt_varsrcs,
'CmovInst': header_template_1dt,
'ClassInst': header_template_1dt,
'ShiftInst': header_template_1dt,
'ExtractInsertInst': header_template_1dt,
'CmpInst': header_template_2dt,
'CvtInst': header_template_2dt,
'PopcountInst': header_template_2dt,
'LdInst': '',
'StInst': '',
'SpecialInstNoSrc': header_template_nodt,
'SpecialInst1Src': header_template_nodt,
'SpecialInstNoSrcNoDest': '',
'Stub': header_template_stub,
}
###############
#
# Define code templates for exec functions
#
###############
# exec function body
exec_template_stub = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble());
}
'''
exec_template_nodt_nosrc = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef Base::DestCType DestCType;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_nodt_1src = '''
void
$class_name::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef Base::DestCType DestCType;
typedef Base::SrcCType SrcCType;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
SrcCType src_val0 = this->src0.get<SrcCType>(w, lane);
DestCType dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_varsrcs = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
CType src_val[$num_srcs];
for (int i = 0; i < $num_srcs; ++i) {
src_val[i] = this->src[i].template get<CType>(w, lane);
}
dest_val = (CType)($expr);
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_3srcs = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename Base::Src0CType Src0T;
typedef typename Base::Src1CType Src1T;
typedef typename Base::Src2CType Src2T;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
Src2T src_val2 = this->src2.template get<Src2T>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_1dt_2src_1dest = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
typedef typename Base::DestCType DestT;
typedef CType Src0T;
typedef typename Base::Src1CType Src1T;
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestT dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<DestT>(w, lane);
}
Src0T src_val0 = this->src0.template get<Src0T>(w, lane);
Src1T src_val1 = this->src1.template get<Src1T>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_shift = '''
template<typename DataType>
void
$class_name<DataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
if ($dest_is_src_flag) {
dest_val = this->dest.template get<CType>(w, lane);
}
CType src_val0 = this->src0.template get<CType>(w, lane);
uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane);
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_template_2dt = '''
template<typename DestDataType, class SrcDataType>
void
$class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
DestCType dest_val;
SrcCType src_val[$num_srcs];
for (int i = 0; i < $num_srcs; ++i) {
src_val[i] = this->src[i].template get<SrcCType>(w, lane);
}
dest_val = $expr;
this->dest.set(w, lane, dest_val);
}
}
}
'''
exec_templates = {
'ArithInst': exec_template_1dt_varsrcs,
'CmovInst': exec_template_1dt_3srcs,
'ExtractInsertInst': exec_template_1dt_3srcs,
'ClassInst': exec_template_1dt_2src_1dest,
'CmpInst': exec_template_2dt,
'CvtInst': exec_template_2dt,
'PopcountInst': exec_template_2dt,
'LdInst': '',
'StInst': '',
'SpecialInstNoSrc': exec_template_nodt_nosrc,
'SpecialInst1Src': exec_template_nodt_1src,
'SpecialInstNoSrcNoDest': '',
'Stub': exec_template_stub,
}
###############
#
# Define code templates for the decoder cases
#
###############
# decode template for nodt-opcode case
decode_nodt_template = '''
case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);'''
decode_case_prolog_class_inst = '''
case BRIG_OPCODE_$brig_opcode_upper:
{
//const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]);
BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType;
//switch (baseOp->kind) {
// case BRIG_OPERAND_REG:
// type = ((const BrigOperandReg*)baseOp)->type;
// break;
// case BRIG_OPERAND_IMMED:
// type = ((const BrigOperandImmed*)baseOp)->type;
// break;
// default:
// fatal("CLASS unrecognized kind of operand %d\\n",
// baseOp->kind);
//}
switch (type) {'''
# common prolog for 1dt- or 2dt-opcode case: switch on data type
decode_case_prolog = '''
case BRIG_OPCODE_$brig_opcode_upper:
{
switch (ib->type) {'''
# single-level decode case entry (for 1dt opcodes)
decode_case_entry = \
' case BRIG_TYPE_$type_name: return $constructor(ib, obj);'
decode_store_prolog = \
' case BRIG_TYPE_$type_name: {'
decode_store_case_epilog = '''
}'''
decode_store_case_entry = \
' return $constructor(ib, obj);'
# common epilog for type switch
decode_case_epilog = '''
default: fatal("$brig_opcode_upper: unrecognized type %d\\n",
ib->type);
}
}
break;'''
# Additional templates for nested decode on a second type field (for
# compare and convert). These are used in place of the
# decode_case_entry template to create a second-level switch on on the
# second type field inside each case of the first-level type switch.
# Because the name and location of the second type can vary, the Brig
# instruction type must be provided in $brig_type, and the name of the
# second type field must be provided in $type_field.
decode_case2_prolog = '''
case BRIG_TYPE_$type_name:
switch (((Brig$brig_type*)ib)->$type2_field) {'''
decode_case2_entry = \
' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);'
decode_case2_epilog = '''
default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n",
((Brig$brig_type*)ib)->$type2_field);
}
break;'''
# Figure out how many source operands an expr needs by looking for the
# highest-numbered srcN value referenced. Since sources are numbered
# starting at 0, the return value is N+1.
def num_src_operands(expr):
if expr.find('src2') != -1:
return 3
elif expr.find('src1') != -1:
return 2
elif expr.find('src0') != -1:
return 1
else:
return 0
###############
#
# Define final code generation methods
#
# The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for
# generating actual instructions.
#
###############
# Generate class declaration, exec function, and decode switch case
# for an brig_opcode with a single-level type switch. The 'types'
# parameter is a list or tuple of types for which the instruction
# should be instantiated.
def gen(brig_opcode, types=None, expr=None, base_class='ArithInst',
type2_info=None, constructor_prefix='new ', is_store=False):
brig_opcode_upper = brig_opcode.upper()
class_name = brig_opcode
opcode = class_name.lower()
if base_class == 'ArithInst':
# note that expr must be provided with ArithInst so we can
# derive num_srcs for the template
assert expr
if expr:
# Derive several bits of info from expr. If expr is not used,
# this info will be irrelevant.
num_srcs = num_src_operands(expr)
# if the RHS expression includes 'dest', then we're doing an RMW
# on the reg and we need to treat it like a source
dest_is_src = expr.find('dest') != -1
dest_is_src_flag = str(dest_is_src).lower() # for C++
if base_class in ['ShiftInst']:
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']:
expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr)
else:
expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr)
expr = re.sub(r'\bdest\b', r'dest_val', expr)
# Strip template arguments off of base class before looking up
# appropriate templates
base_class_base = re.sub(r'<.*>$', '', base_class)
header_code(header_templates[base_class_base])
if base_class.startswith('SpecialInst') or base_class.startswith('Stub'):
exec_code(exec_templates[base_class_base])
elif base_class.startswith('ShiftInst'):
header_code(exec_template_shift)
else:
header_code(exec_templates[base_class_base])
if not types or isinstance(types, str):
# Just a single type
constructor = constructor_prefix + class_name
decoder_code(decode_nodt_template)
else:
# multiple types, need at least one level of decode
if brig_opcode == 'Class':
decoder_code(decode_case_prolog_class_inst)
else:
decoder_code(decode_case_prolog)
if not type2_info:
if not is_store:
# single list of types, to basic one-level decode
for type_name in types:
full_class_name = '%s<%s>' % (class_name, type_name.upper())
constructor = constructor_prefix + full_class_name
decoder_code(decode_case_entry)
else:
# single list of types, to basic one-level decode
for type_name in types:
decoder_code(decode_store_prolog)
type_size = int(re.findall(r'[0-9]+', type_name)[0])
src_size = 32
type_type = type_name[0]
full_class_name = '%s<%s,%s>' % (class_name, \
type_name.upper(), \
'%s%d' % \
(type_type.upper(), \
type_size))
constructor = constructor_prefix + full_class_name
decoder_code(decode_store_case_entry)
decoder_code(decode_store_case_epilog)
else:
# need secondary type switch (convert, compare)
# unpack extra info on second switch
(type2_field, types2) = type2_info
brig_type = 'Inst%s' % brig_opcode
for type_name in types:
decoder_code(decode_case2_prolog)
fmt = '%s<%s,%%s>' % (class_name, type_name.upper())
for type2_name in types2:
full_class_name = fmt % type2_name.upper()
constructor = constructor_prefix + full_class_name
decoder_code(decode_case2_entry)
decoder_code(decode_case2_epilog)
decoder_code(decode_case_epilog)
###############
#
# Generate instructions
#
###############
# handy abbreviations for common sets of types
# arithmetic ops are typically defined only on 32- and 64-bit sizes
arith_int_types = ('S32', 'U32', 'S64', 'U64')
arith_float_types = ('F32', 'F64')
arith_types = arith_int_types + arith_float_types
bit_types = ('B1', 'B32', 'B64')
all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types
# I think you might be able to do 'f16' memory ops too, but we'll
# ignore them for now.
mem_types = all_int_types + arith_float_types
mem_atom_types = all_int_types + ('B32', 'B64')
##### Arithmetic & logical operations
gen('Add', arith_types, 'src0 + src1')
gen('Sub', arith_types, 'src0 - src1')
gen('Mul', arith_types, 'src0 * src1')
gen('Div', arith_types, 'src0 / src1')
gen('Min', arith_types, 'std::min(src0, src1)')
gen('Max', arith_types, 'std::max(src0, src1)')
gen('Gcnmin', arith_types, 'std::min(src0, src1)')
gen('CopySign', arith_float_types,
'src1 < 0 ? -std::abs(src0) : std::abs(src0)')
gen('Sqrt', arith_float_types, 'sqrt(src0)')
gen('Floor', arith_float_types, 'floor(src0)')
# "fast" sqrt... same as slow for us
gen('Nsqrt', arith_float_types, 'sqrt(src0)')
gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)')
gen('Nrcp', arith_float_types, '1.0/src0')
gen('Fract', arith_float_types,
'(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)')
gen('Ncos', arith_float_types, 'cos(src0)');
gen('Nsin', arith_float_types, 'sin(src0)');
gen('And', bit_types, 'src0 & src1')
gen('Or', bit_types, 'src0 | src1')
gen('Xor', bit_types, 'src0 ^ src1')
gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)')
gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \
('sourceType', ('B32', 'B64')))
gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst')
gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst')
# gen('Mul_hi', types=('s32','u32', '??'))
# gen('Mul24', types=('s32','u32', '??'))
gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)')
gen('Abs', arith_types, 'std::abs(src0)')
gen('Neg', arith_types, '-src0')
gen('Mov', bit_types + arith_types, 'src0')
gen('Not', bit_types, 'heynot(src0)')
# mad and fma differ only in rounding behavior, which we don't emulate
# also there's an integer form of mad, but not of fma
gen('Mad', arith_types, 'src0 * src1 + src2')
gen('Fma', arith_float_types, 'src0 * src1 + src2')
#native floating point operations
gen('Nfma', arith_float_types, 'src0 * src1 + src2')
gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst')
gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))')
gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))')
# see base/bitfield.hh
gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)',
'ExtractInsertInst')
gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)',
'ExtractInsertInst')
##### Compare
gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)',
'CmpInst', ('sourceType', arith_types + bit_types))
gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst')
##### Conversion
# Conversion operations are only defined on B1, not B32 or B64
cvt_types = ('B1',) + mem_types
gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types))
##### Load & Store
gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode')
gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode')
gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode',
is_store=True)
gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode')
gen('AtomicNoRet', mem_atom_types, base_class='StInst',
constructor_prefix='decode')
gen('Cbr', base_class = 'LdInst', constructor_prefix='decode')
gen('Br', base_class = 'LdInst', constructor_prefix='decode')
##### Special operations
def gen_special(brig_opcode, expr, dest_type='U32'):
num_srcs = num_src_operands(expr)
if num_srcs == 0:
base_class = 'SpecialInstNoSrc<%s>' % dest_type
elif num_srcs == 1:
base_class = 'SpecialInst1Src<%s>' % dest_type
else:
assert false
gen(brig_opcode, None, expr, base_class)
gen_special('WorkItemId', 'w->workItemId[src0][lane]')
gen_special('WorkItemAbsId',
'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])')
gen_special('WorkGroupId', 'w->workGroupId[src0]')
gen_special('WorkGroupSize', 'w->workGroupSz[src0]')
gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]')
gen_special('GridSize', 'w->gridSz[src0]')
gen_special('GridGroups',
'divCeil(w->gridSz[src0],w->workGroupSz[src0])')
gen_special('LaneId', 'lane')
gen_special('WaveId', 'w->wfId')
gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64')
# gen_special('CU'', ')
gen('Ret', base_class='SpecialInstNoSrcNoDest')
gen('Barrier', base_class='SpecialInstNoSrcNoDest')
gen('MemFence', base_class='SpecialInstNoSrcNoDest')
# Map magic instructions to the BrigSyscall opcode
# Magic instructions are defined in magic.hh
#
# In the future, real HSA kernel system calls can be implemented and coexist
# with magic instructions.
gen('Call', base_class='SpecialInstNoSrcNoDest')
# Stubs for unimplemented instructions:
# These may need to be implemented at some point in the future, but
# for now we just match the instructions with their operands.
#
# By defining stubs for these instructions, we can work with
# applications that have them in dead/unused code paths.
#
# Needed for rocm-hcc compilations for HSA backends since
# builtins-hsail library is `cat`d onto the generated kernels.
# The builtins-hsail library consists of handcoded hsail functions
# that __might__ be needed by the rocm-hcc compiler in certain binaries.
gen('Bitmask', base_class='Stub')
gen('Bitrev', base_class='Stub')
gen('Firstbit', base_class='Stub')
gen('Lastbit', base_class='Stub')
gen('Unpacklo', base_class='Stub')
gen('Unpackhi', base_class='Stub')
gen('Pack', base_class='Stub')
gen('Unpack', base_class='Stub')
gen('Lerp', base_class='Stub')
gen('Packcvt', base_class='Stub')
gen('Unpackcvt', base_class='Stub')
gen('Sad', base_class='Stub')
gen('Sadhi', base_class='Stub')
gen('Activelanecount', base_class='Stub')
gen('Activelaneid', base_class='Stub')
gen('Activelanemask', base_class='Stub')
gen('Activelanepermute', base_class='Stub')
gen('Groupbaseptr', base_class='Stub')
gen('Signalnoret', base_class='Stub')
###############
#
# Generate file epilogs
#
###############
header_code('''
template<>
inline void
Abs<U32>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
template<>
inline void
Abs<U64>::execute(GPUDynInstPtr gpuDynInst)
{
Wavefront *w = gpuDynInst->wavefront();
const VectorMask &mask = w->getPred();
for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) {
if (mask[lane]) {
CType dest_val;
CType src_val;
src_val = this->src[0].template get<CType>(w, lane);
dest_val = (CType)(src_val);
this->dest.set(w, lane, dest_val);
}
}
}
''')
header_code.dedent()
header_code('''
} // namespace HsailISA
''')
# close off main decode switch
decoder_code.dedent()
decoder_code.dedent()
decoder_code('''
default: fatal("unrecognized Brig opcode %d\\n", ib->opcode);
} // end switch(ib->opcode)
} // end decode()
} // namespace HsailISA
''')
exec_code.dedent()
exec_code('''
} // namespace HsailISA
''')
###############
#
# Output accumulated code to files
#
###############
header_code.write(sys.argv[1])
decoder_code.write(sys.argv[2])
exec_code.write(sys.argv[3])