| #! /usr/bin/python |
| |
| # |
| # Copyright (c) 2015 Advanced Micro Devices, Inc. |
| # All rights reserved. |
| # |
| # For use for simulation and test purposes only |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions are met: |
| # |
| # 1. Redistributions of source code must retain the above copyright notice, |
| # this list of conditions and the following disclaimer. |
| # |
| # 2. Redistributions in binary form must reproduce the above copyright notice, |
| # this list of conditions and the following disclaimer in the documentation |
| # and/or other materials provided with the distribution. |
| # |
| # 3. Neither the name of the copyright holder nor the names of its contributors |
| # may be used to endorse or promote products derived from this software |
| # without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| # POSSIBILITY OF SUCH DAMAGE. |
| # |
| # Author: Steve Reinhardt |
| # |
| |
| from __future__ import print_function |
| |
| import sys, re |
| |
| from m5.util import code_formatter |
| |
| if len(sys.argv) != 4: |
| print("Error: need 3 args (file names)") |
| sys.exit(0) |
| |
| header_code = code_formatter() |
| decoder_code = code_formatter() |
| exec_code = code_formatter() |
| |
| ############### |
| # |
| # Generate file prologs (includes etc.) |
| # |
| ############### |
| |
| header_code(''' |
| #include "arch/hsail/insts/decl.hh" |
| #include "base/bitfield.hh" |
| #include "gpu-compute/hsail_code.hh" |
| #include "gpu-compute/wavefront.hh" |
| |
| namespace HsailISA |
| { |
| ''') |
| header_code.indent() |
| |
| decoder_code(''' |
| #include "arch/hsail/gpu_decoder.hh" |
| #include "arch/hsail/insts/branch.hh" |
| #include "arch/hsail/insts/decl.hh" |
| #include "arch/hsail/insts/gen_decl.hh" |
| #include "arch/hsail/insts/mem.hh" |
| #include "arch/hsail/insts/mem_impl.hh" |
| #include "gpu-compute/brig_object.hh" |
| |
| namespace HsailISA |
| { |
| std::vector<GPUStaticInst*> Decoder::decodedInsts; |
| |
| GPUStaticInst* |
| Decoder::decode(MachInst machInst) |
| { |
| using namespace Brig; |
| |
| const BrigInstBase *ib = machInst.brigInstBase; |
| const BrigObject *obj = machInst.brigObj; |
| |
| switch(ib->opcode) { |
| ''') |
| decoder_code.indent() |
| decoder_code.indent() |
| |
| exec_code(''' |
| #include "arch/hsail/insts/gen_decl.hh" |
| #include "base/intmath.hh" |
| |
| namespace HsailISA |
| { |
| ''') |
| exec_code.indent() |
| |
| ############### |
| # |
| # Define code templates for class declarations (for header file) |
| # |
| ############### |
| |
| # Basic header template for an instruction stub. |
| header_template_stub = ''' |
| class $class_name : public $base_class |
| { |
| public: |
| typedef $base_class Base; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| ''' |
| |
| # Basic header template for an instruction with no template parameters. |
| header_template_nodt = ''' |
| class $class_name : public $base_class |
| { |
| public: |
| typedef $base_class Base; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| ''' |
| |
| # Basic header template for an instruction with a single DataType |
| # template parameter. |
| header_template_1dt = ''' |
| template<typename DataType> |
| class $class_name : public $base_class<DataType> |
| { |
| public: |
| typedef $base_class<DataType> Base; |
| typedef typename DataType::CType CType; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| ''' |
| |
| header_template_1dt_noexec = ''' |
| template<typename DataType> |
| class $class_name : public $base_class<DataType> |
| { |
| public: |
| typedef $base_class<DataType> Base; |
| typedef typename DataType::CType CType; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| }; |
| |
| ''' |
| |
| # Same as header_template_1dt, except the base class has a second |
| # template parameter NumSrcOperands to allow a variable number of |
| # source operands. Note that since this is implemented with an array, |
| # it only works for instructions where all sources are of the same |
| # type (like most arithmetics). |
| header_template_1dt_varsrcs = ''' |
| template<typename DataType> |
| class $class_name : public $base_class<DataType, $num_srcs> |
| { |
| public: |
| typedef $base_class<DataType, $num_srcs> Base; |
| typedef typename DataType::CType CType; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| ''' |
| |
| # Header template for instruction with two DataType template |
| # parameters, one for the dest and one for the source. This is used |
| # by compare and convert. |
| header_template_2dt = ''' |
| template<typename DestDataType, class SrcDataType> |
| class $class_name : public $base_class<DestDataType, SrcDataType> |
| { |
| public: |
| typedef $base_class<DestDataType, SrcDataType> Base; |
| typedef typename DestDataType::CType DestCType; |
| typedef typename SrcDataType::CType SrcCType; |
| |
| $class_name(const Brig::BrigInstBase *ib, const BrigObject *obj) |
| : Base(ib, obj, "$opcode") |
| { |
| } |
| |
| void execute(GPUDynInstPtr gpuDynInst); |
| }; |
| |
| ''' |
| |
| header_templates = { |
| 'ArithInst': header_template_1dt_varsrcs, |
| 'CmovInst': header_template_1dt, |
| 'ClassInst': header_template_1dt, |
| 'ShiftInst': header_template_1dt, |
| 'ExtractInsertInst': header_template_1dt, |
| 'CmpInst': header_template_2dt, |
| 'CvtInst': header_template_2dt, |
| 'PopcountInst': header_template_2dt, |
| 'LdInst': '', |
| 'StInst': '', |
| 'SpecialInstNoSrc': header_template_nodt, |
| 'SpecialInst1Src': header_template_nodt, |
| 'SpecialInstNoSrcNoDest': '', |
| 'Stub': header_template_stub, |
| } |
| |
| ############### |
| # |
| # Define code templates for exec functions |
| # |
| ############### |
| |
| # exec function body |
| exec_template_stub = ''' |
| void |
| $class_name::execute(GPUDynInstPtr gpuDynInst) |
| { |
| fatal("instruction unimplemented %s\\n", gpuDynInst->disassemble()); |
| } |
| |
| ''' |
| exec_template_nodt_nosrc = ''' |
| void |
| $class_name::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| typedef Base::DestCType DestCType; |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| DestCType dest_val = $expr; |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_nodt_1src = ''' |
| void |
| $class_name::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| typedef Base::DestCType DestCType; |
| typedef Base::SrcCType SrcCType; |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| SrcCType src_val0 = this->src0.get<SrcCType>(w, lane); |
| DestCType dest_val = $expr; |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_1dt_varsrcs = ''' |
| template<typename DataType> |
| void |
| $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| CType dest_val; |
| if ($dest_is_src_flag) { |
| dest_val = this->dest.template get<CType>(w, lane); |
| } |
| |
| CType src_val[$num_srcs]; |
| |
| for (int i = 0; i < $num_srcs; ++i) { |
| src_val[i] = this->src[i].template get<CType>(w, lane); |
| } |
| |
| dest_val = (CType)($expr); |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_1dt_3srcs = ''' |
| template<typename DataType> |
| void |
| $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| typedef typename Base::Src0CType Src0T; |
| typedef typename Base::Src1CType Src1T; |
| typedef typename Base::Src2CType Src2T; |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| CType dest_val; |
| |
| if ($dest_is_src_flag) { |
| dest_val = this->dest.template get<CType>(w, lane); |
| } |
| |
| Src0T src_val0 = this->src0.template get<Src0T>(w, lane); |
| Src1T src_val1 = this->src1.template get<Src1T>(w, lane); |
| Src2T src_val2 = this->src2.template get<Src2T>(w, lane); |
| |
| dest_val = $expr; |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_1dt_2src_1dest = ''' |
| template<typename DataType> |
| void |
| $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| typedef typename Base::DestCType DestT; |
| typedef CType Src0T; |
| typedef typename Base::Src1CType Src1T; |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| DestT dest_val; |
| if ($dest_is_src_flag) { |
| dest_val = this->dest.template get<DestT>(w, lane); |
| } |
| Src0T src_val0 = this->src0.template get<Src0T>(w, lane); |
| Src1T src_val1 = this->src1.template get<Src1T>(w, lane); |
| |
| dest_val = $expr; |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_shift = ''' |
| template<typename DataType> |
| void |
| $class_name<DataType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const VectorMask &mask = w->getPred(); |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| CType dest_val; |
| |
| if ($dest_is_src_flag) { |
| dest_val = this->dest.template get<CType>(w, lane); |
| } |
| |
| CType src_val0 = this->src0.template get<CType>(w, lane); |
| uint32_t src_val1 = this->src1.template get<uint32_t>(w, lane); |
| |
| dest_val = $expr; |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_template_2dt = ''' |
| template<typename DestDataType, class SrcDataType> |
| void |
| $class_name<DestDataType, SrcDataType>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| DestCType dest_val; |
| SrcCType src_val[$num_srcs]; |
| |
| for (int i = 0; i < $num_srcs; ++i) { |
| src_val[i] = this->src[i].template get<SrcCType>(w, lane); |
| } |
| |
| dest_val = $expr; |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| ''' |
| |
| exec_templates = { |
| 'ArithInst': exec_template_1dt_varsrcs, |
| 'CmovInst': exec_template_1dt_3srcs, |
| 'ExtractInsertInst': exec_template_1dt_3srcs, |
| 'ClassInst': exec_template_1dt_2src_1dest, |
| 'CmpInst': exec_template_2dt, |
| 'CvtInst': exec_template_2dt, |
| 'PopcountInst': exec_template_2dt, |
| 'LdInst': '', |
| 'StInst': '', |
| 'SpecialInstNoSrc': exec_template_nodt_nosrc, |
| 'SpecialInst1Src': exec_template_nodt_1src, |
| 'SpecialInstNoSrcNoDest': '', |
| 'Stub': exec_template_stub, |
| } |
| |
| ############### |
| # |
| # Define code templates for the decoder cases |
| # |
| ############### |
| |
| # decode template for nodt-opcode case |
| decode_nodt_template = ''' |
| case BRIG_OPCODE_$brig_opcode_upper: return $constructor(ib, obj);''' |
| |
| decode_case_prolog_class_inst = ''' |
| case BRIG_OPCODE_$brig_opcode_upper: |
| { |
| //const BrigOperandBase *baseOp = obj->getOperand(ib->operands[1]); |
| BrigType16_t type = ((BrigInstSourceType*)ib)->sourceType; |
| //switch (baseOp->kind) { |
| // case BRIG_OPERAND_REG: |
| // type = ((const BrigOperandReg*)baseOp)->type; |
| // break; |
| // case BRIG_OPERAND_IMMED: |
| // type = ((const BrigOperandImmed*)baseOp)->type; |
| // break; |
| // default: |
| // fatal("CLASS unrecognized kind of operand %d\\n", |
| // baseOp->kind); |
| //} |
| switch (type) {''' |
| |
| # common prolog for 1dt- or 2dt-opcode case: switch on data type |
| decode_case_prolog = ''' |
| case BRIG_OPCODE_$brig_opcode_upper: |
| { |
| switch (ib->type) {''' |
| |
| # single-level decode case entry (for 1dt opcodes) |
| decode_case_entry = \ |
| ' case BRIG_TYPE_$type_name: return $constructor(ib, obj);' |
| |
| decode_store_prolog = \ |
| ' case BRIG_TYPE_$type_name: {' |
| |
| decode_store_case_epilog = ''' |
| }''' |
| |
| decode_store_case_entry = \ |
| ' return $constructor(ib, obj);' |
| |
| # common epilog for type switch |
| decode_case_epilog = ''' |
| default: fatal("$brig_opcode_upper: unrecognized type %d\\n", |
| ib->type); |
| } |
| } |
| break;''' |
| |
| # Additional templates for nested decode on a second type field (for |
| # compare and convert). These are used in place of the |
| # decode_case_entry template to create a second-level switch on on the |
| # second type field inside each case of the first-level type switch. |
| # Because the name and location of the second type can vary, the Brig |
| # instruction type must be provided in $brig_type, and the name of the |
| # second type field must be provided in $type_field. |
| decode_case2_prolog = ''' |
| case BRIG_TYPE_$type_name: |
| switch (((Brig$brig_type*)ib)->$type2_field) {''' |
| |
| decode_case2_entry = \ |
| ' case BRIG_TYPE_$type2_name: return $constructor(ib, obj);' |
| |
| decode_case2_epilog = ''' |
| default: fatal("$brig_opcode_upper: unrecognized $type2_field %d\\n", |
| ((Brig$brig_type*)ib)->$type2_field); |
| } |
| break;''' |
| |
| # Figure out how many source operands an expr needs by looking for the |
| # highest-numbered srcN value referenced. Since sources are numbered |
| # starting at 0, the return value is N+1. |
| def num_src_operands(expr): |
| if expr.find('src2') != -1: |
| return 3 |
| elif expr.find('src1') != -1: |
| return 2 |
| elif expr.find('src0') != -1: |
| return 1 |
| else: |
| return 0 |
| |
| ############### |
| # |
| # Define final code generation methods |
| # |
| # The gen_nodt, and gen_1dt, and gen_2dt methods are the interface for |
| # generating actual instructions. |
| # |
| ############### |
| |
| # Generate class declaration, exec function, and decode switch case |
| # for an brig_opcode with a single-level type switch. The 'types' |
| # parameter is a list or tuple of types for which the instruction |
| # should be instantiated. |
| def gen(brig_opcode, types=None, expr=None, base_class='ArithInst', |
| type2_info=None, constructor_prefix='new ', is_store=False): |
| brig_opcode_upper = brig_opcode.upper() |
| class_name = brig_opcode |
| opcode = class_name.lower() |
| |
| if base_class == 'ArithInst': |
| # note that expr must be provided with ArithInst so we can |
| # derive num_srcs for the template |
| assert expr |
| |
| if expr: |
| # Derive several bits of info from expr. If expr is not used, |
| # this info will be irrelevant. |
| num_srcs = num_src_operands(expr) |
| # if the RHS expression includes 'dest', then we're doing an RMW |
| # on the reg and we need to treat it like a source |
| dest_is_src = expr.find('dest') != -1 |
| dest_is_src_flag = str(dest_is_src).lower() # for C++ |
| if base_class in ['ShiftInst']: |
| expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) |
| elif base_class in ['ArithInst', 'CmpInst', 'CvtInst', 'PopcountInst']: |
| expr = re.sub(r'\bsrc(\d)\b', r'src_val[\1]', expr) |
| else: |
| expr = re.sub(r'\bsrc(\d)\b', r'src_val\1', expr) |
| expr = re.sub(r'\bdest\b', r'dest_val', expr) |
| |
| # Strip template arguments off of base class before looking up |
| # appropriate templates |
| base_class_base = re.sub(r'<.*>$', '', base_class) |
| header_code(header_templates[base_class_base]) |
| |
| if base_class.startswith('SpecialInst') or base_class.startswith('Stub'): |
| exec_code(exec_templates[base_class_base]) |
| elif base_class.startswith('ShiftInst'): |
| header_code(exec_template_shift) |
| else: |
| header_code(exec_templates[base_class_base]) |
| |
| if not types or isinstance(types, str): |
| # Just a single type |
| constructor = constructor_prefix + class_name |
| decoder_code(decode_nodt_template) |
| else: |
| # multiple types, need at least one level of decode |
| if brig_opcode == 'Class': |
| decoder_code(decode_case_prolog_class_inst) |
| else: |
| decoder_code(decode_case_prolog) |
| if not type2_info: |
| if not is_store: |
| # single list of types, to basic one-level decode |
| for type_name in types: |
| full_class_name = '%s<%s>' % (class_name, type_name.upper()) |
| constructor = constructor_prefix + full_class_name |
| decoder_code(decode_case_entry) |
| else: |
| # single list of types, to basic one-level decode |
| for type_name in types: |
| decoder_code(decode_store_prolog) |
| type_size = int(re.findall(r'[0-9]+', type_name)[0]) |
| src_size = 32 |
| type_type = type_name[0] |
| full_class_name = '%s<%s,%s>' % (class_name, \ |
| type_name.upper(), \ |
| '%s%d' % \ |
| (type_type.upper(), \ |
| type_size)) |
| constructor = constructor_prefix + full_class_name |
| decoder_code(decode_store_case_entry) |
| decoder_code(decode_store_case_epilog) |
| else: |
| # need secondary type switch (convert, compare) |
| # unpack extra info on second switch |
| (type2_field, types2) = type2_info |
| brig_type = 'Inst%s' % brig_opcode |
| for type_name in types: |
| decoder_code(decode_case2_prolog) |
| fmt = '%s<%s,%%s>' % (class_name, type_name.upper()) |
| for type2_name in types2: |
| full_class_name = fmt % type2_name.upper() |
| constructor = constructor_prefix + full_class_name |
| decoder_code(decode_case2_entry) |
| |
| decoder_code(decode_case2_epilog) |
| |
| decoder_code(decode_case_epilog) |
| |
| ############### |
| # |
| # Generate instructions |
| # |
| ############### |
| |
| # handy abbreviations for common sets of types |
| |
| # arithmetic ops are typically defined only on 32- and 64-bit sizes |
| arith_int_types = ('S32', 'U32', 'S64', 'U64') |
| arith_float_types = ('F32', 'F64') |
| arith_types = arith_int_types + arith_float_types |
| |
| bit_types = ('B1', 'B32', 'B64') |
| |
| all_int_types = ('S8', 'U8', 'S16', 'U16') + arith_int_types |
| |
| # I think you might be able to do 'f16' memory ops too, but we'll |
| # ignore them for now. |
| mem_types = all_int_types + arith_float_types |
| mem_atom_types = all_int_types + ('B32', 'B64') |
| |
| ##### Arithmetic & logical operations |
| gen('Add', arith_types, 'src0 + src1') |
| gen('Sub', arith_types, 'src0 - src1') |
| gen('Mul', arith_types, 'src0 * src1') |
| gen('Div', arith_types, 'src0 / src1') |
| gen('Min', arith_types, 'std::min(src0, src1)') |
| gen('Max', arith_types, 'std::max(src0, src1)') |
| gen('Gcnmin', arith_types, 'std::min(src0, src1)') |
| |
| gen('CopySign', arith_float_types, |
| 'src1 < 0 ? -std::abs(src0) : std::abs(src0)') |
| gen('Sqrt', arith_float_types, 'sqrt(src0)') |
| gen('Floor', arith_float_types, 'floor(src0)') |
| |
| # "fast" sqrt... same as slow for us |
| gen('Nsqrt', arith_float_types, 'sqrt(src0)') |
| gen('Nrsqrt', arith_float_types, '1.0/sqrt(src0)') |
| gen('Nrcp', arith_float_types, '1.0/src0') |
| gen('Fract', arith_float_types, |
| '(src0 >= 0.0)?(src0-floor(src0)):(floor(src0)-src0)') |
| |
| gen('Ncos', arith_float_types, 'cos(src0)'); |
| gen('Nsin', arith_float_types, 'sin(src0)'); |
| |
| gen('And', bit_types, 'src0 & src1') |
| gen('Or', bit_types, 'src0 | src1') |
| gen('Xor', bit_types, 'src0 ^ src1') |
| |
| gen('Bitselect', bit_types, '(src1 & src0) | (src2 & ~(uint64_t)src0)') |
| gen('Popcount', ('U32',), '__builtin_popcount(src0)', 'PopcountInst', \ |
| ('sourceType', ('B32', 'B64'))) |
| |
| gen('Shl', arith_int_types, 'src0 << (unsigned)src1', 'ShiftInst') |
| gen('Shr', arith_int_types, 'src0 >> (unsigned)src1', 'ShiftInst') |
| |
| # gen('Mul_hi', types=('s32','u32', '??')) |
| # gen('Mul24', types=('s32','u32', '??')) |
| gen('Rem', arith_int_types, 'src0 - ((src0 / src1) * src1)') |
| |
| gen('Abs', arith_types, 'std::abs(src0)') |
| gen('Neg', arith_types, '-src0') |
| |
| gen('Mov', bit_types + arith_types, 'src0') |
| gen('Not', bit_types, 'heynot(src0)') |
| |
| # mad and fma differ only in rounding behavior, which we don't emulate |
| # also there's an integer form of mad, but not of fma |
| gen('Mad', arith_types, 'src0 * src1 + src2') |
| gen('Fma', arith_float_types, 'src0 * src1 + src2') |
| |
| #native floating point operations |
| gen('Nfma', arith_float_types, 'src0 * src1 + src2') |
| |
| gen('Cmov', bit_types, 'src0 ? src1 : src2', 'CmovInst') |
| gen('BitAlign', bit_types, '(src0 << src2)|(src1 >> (32 - src2))') |
| gen('ByteAlign', bit_types, '(src0 << 8 * src2)|(src1 >> (32 - 8 * src2))') |
| |
| # see base/bitfield.hh |
| gen('BitExtract', arith_int_types, 'bits(src0, src1, src1 + src2 - 1)', |
| 'ExtractInsertInst') |
| |
| gen('BitInsert', arith_int_types, 'insertBits(dest, src1, src2, src0)', |
| 'ExtractInsertInst') |
| |
| ##### Compare |
| gen('Cmp', ('B1', 'S32', 'U32', 'F32'), 'compare(src0, src1, this->cmpOp)', |
| 'CmpInst', ('sourceType', arith_types + bit_types)) |
| gen('Class', arith_float_types, 'fpclassify(src0,src1)','ClassInst') |
| |
| ##### Conversion |
| |
| # Conversion operations are only defined on B1, not B32 or B64 |
| cvt_types = ('B1',) + mem_types |
| |
| gen('Cvt', cvt_types, 'src0', 'CvtInst', ('sourceType', cvt_types)) |
| |
| |
| ##### Load & Store |
| gen('Lda', mem_types, base_class = 'LdInst', constructor_prefix='decode') |
| gen('Ld', mem_types, base_class = 'LdInst', constructor_prefix='decode') |
| gen('St', mem_types, base_class = 'StInst', constructor_prefix='decode', |
| is_store=True) |
| gen('Atomic', mem_atom_types, base_class='StInst', constructor_prefix='decode') |
| gen('AtomicNoRet', mem_atom_types, base_class='StInst', |
| constructor_prefix='decode') |
| |
| gen('Cbr', base_class = 'LdInst', constructor_prefix='decode') |
| gen('Br', base_class = 'LdInst', constructor_prefix='decode') |
| |
| ##### Special operations |
| def gen_special(brig_opcode, expr, dest_type='U32'): |
| num_srcs = num_src_operands(expr) |
| if num_srcs == 0: |
| base_class = 'SpecialInstNoSrc<%s>' % dest_type |
| elif num_srcs == 1: |
| base_class = 'SpecialInst1Src<%s>' % dest_type |
| else: |
| assert false |
| |
| gen(brig_opcode, None, expr, base_class) |
| |
| gen_special('WorkItemId', 'w->workItemId[src0][lane]') |
| gen_special('WorkItemAbsId', |
| 'w->workItemId[src0][lane] + (w->workGroupId[src0] * w->workGroupSz[src0])') |
| gen_special('WorkGroupId', 'w->workGroupId[src0]') |
| gen_special('WorkGroupSize', 'w->workGroupSz[src0]') |
| gen_special('CurrentWorkGroupSize', 'w->workGroupSz[src0]') |
| gen_special('GridSize', 'w->gridSz[src0]') |
| gen_special('GridGroups', |
| 'divCeil(w->gridSz[src0],w->workGroupSz[src0])') |
| gen_special('LaneId', 'lane') |
| gen_special('WaveId', 'w->wfId') |
| gen_special('Clock', 'w->computeUnit->shader->tick_cnt', 'U64') |
| |
| # gen_special('CU'', ') |
| |
| gen('Ret', base_class='SpecialInstNoSrcNoDest') |
| gen('Barrier', base_class='SpecialInstNoSrcNoDest') |
| gen('MemFence', base_class='SpecialInstNoSrcNoDest') |
| |
| # Map magic instructions to the BrigSyscall opcode |
| # Magic instructions are defined in magic.hh |
| # |
| # In the future, real HSA kernel system calls can be implemented and coexist |
| # with magic instructions. |
| gen('Call', base_class='SpecialInstNoSrcNoDest') |
| |
| # Stubs for unimplemented instructions: |
| # These may need to be implemented at some point in the future, but |
| # for now we just match the instructions with their operands. |
| # |
| # By defining stubs for these instructions, we can work with |
| # applications that have them in dead/unused code paths. |
| # |
| # Needed for rocm-hcc compilations for HSA backends since |
| # builtins-hsail library is `cat`d onto the generated kernels. |
| # The builtins-hsail library consists of handcoded hsail functions |
| # that __might__ be needed by the rocm-hcc compiler in certain binaries. |
| gen('Bitmask', base_class='Stub') |
| gen('Bitrev', base_class='Stub') |
| gen('Firstbit', base_class='Stub') |
| gen('Lastbit', base_class='Stub') |
| gen('Unpacklo', base_class='Stub') |
| gen('Unpackhi', base_class='Stub') |
| gen('Pack', base_class='Stub') |
| gen('Unpack', base_class='Stub') |
| gen('Lerp', base_class='Stub') |
| gen('Packcvt', base_class='Stub') |
| gen('Unpackcvt', base_class='Stub') |
| gen('Sad', base_class='Stub') |
| gen('Sadhi', base_class='Stub') |
| gen('Activelanecount', base_class='Stub') |
| gen('Activelaneid', base_class='Stub') |
| gen('Activelanemask', base_class='Stub') |
| gen('Activelanepermute', base_class='Stub') |
| gen('Groupbaseptr', base_class='Stub') |
| gen('Signalnoret', base_class='Stub') |
| |
| ############### |
| # |
| # Generate file epilogs |
| # |
| ############### |
| header_code(''' |
| template<> |
| inline void |
| Abs<U32>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| CType dest_val; |
| CType src_val; |
| |
| src_val = this->src[0].template get<CType>(w, lane); |
| |
| dest_val = (CType)(src_val); |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| |
| template<> |
| inline void |
| Abs<U64>::execute(GPUDynInstPtr gpuDynInst) |
| { |
| Wavefront *w = gpuDynInst->wavefront(); |
| |
| const VectorMask &mask = w->getPred(); |
| |
| for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { |
| if (mask[lane]) { |
| CType dest_val; |
| CType src_val; |
| |
| src_val = this->src[0].template get<CType>(w, lane); |
| |
| dest_val = (CType)(src_val); |
| |
| this->dest.set(w, lane, dest_val); |
| } |
| } |
| } |
| ''') |
| |
| header_code.dedent() |
| header_code(''' |
| } // namespace HsailISA |
| ''') |
| |
| # close off main decode switch |
| decoder_code.dedent() |
| decoder_code.dedent() |
| decoder_code(''' |
| default: fatal("unrecognized Brig opcode %d\\n", ib->opcode); |
| } // end switch(ib->opcode) |
| } // end decode() |
| } // namespace HsailISA |
| ''') |
| |
| exec_code.dedent() |
| exec_code(''' |
| } // namespace HsailISA |
| ''') |
| |
| ############### |
| # |
| # Output accumulated code to files |
| # |
| ############### |
| header_code.write(sys.argv[1]) |
| decoder_code.write(sys.argv[2]) |
| exec_code.write(sys.argv[3]) |