| /* |
| * Copyright (c) 2012-2015 Advanced Micro Devices, Inc. |
| * All rights reserved. |
| * |
| * For use for simulation and test purposes only |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright notice, |
| * this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright notice, |
| * this list of conditions and the following disclaimer in the documentation |
| * and/or other materials provided with the distribution. |
| * |
| * 3. Neither the name of the copyright holder nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE |
| * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| * POSSIBILITY OF SUCH DAMAGE. |
| * |
| * Author: Anthony Gutierrez |
| */ |
| |
| #include "gpu-compute/cl_driver.hh" |
| |
| #include <memory> |
| |
| #include "base/intmath.hh" |
| #include "cpu/thread_context.hh" |
| #include "gpu-compute/dispatcher.hh" |
| #include "gpu-compute/hsa_code.hh" |
| #include "gpu-compute/hsa_kernel_info.hh" |
| #include "gpu-compute/hsa_object.hh" |
| #include "params/ClDriver.hh" |
| #include "sim/process.hh" |
| #include "sim/syscall_emul_buf.hh" |
| |
| ClDriver::ClDriver(ClDriverParams *p) |
| : EmulatedDriver(p), hsaCode(0) |
| { |
| for (const auto &codeFile : p->codefile) |
| codeFiles.push_back(&codeFile); |
| |
| maxFuncArgsSize = 0; |
| |
| for (int i = 0; i < codeFiles.size(); ++i) { |
| HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]); |
| |
| for (int k = 0; k < obj->numKernels(); ++k) { |
| assert(obj->getKernel(k)); |
| kernels.push_back(obj->getKernel(k)); |
| kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData); |
| int kern_funcargs_size = kernels.back()->funcarg_size; |
| maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ? |
| kern_funcargs_size : maxFuncArgsSize; |
| } |
| } |
| |
| int name_offs = 0; |
| int code_offs = 0; |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| kernelInfo.push_back(HsaKernelInfo()); |
| HsaCode *k = kernels[i]; |
| |
| k->generateHsaKernelInfo(&kernelInfo[i]); |
| |
| kernelInfo[i].name_offs = name_offs; |
| kernelInfo[i].code_offs = code_offs; |
| |
| name_offs += k->name().size() + 1; |
| code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst); |
| } |
| } |
| |
| void |
| ClDriver::handshake(GpuDispatcher *_dispatcher) |
| { |
| dispatcher = _dispatcher; |
| dispatcher->setFuncargsSize(maxFuncArgsSize); |
| } |
| |
| int |
| ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags) |
| { |
| std::shared_ptr<DeviceFDEntry> fdp; |
| fdp = std::make_shared<DeviceFDEntry>(this, filename); |
| int tgt_fd = p->fds->allocFD(fdp); |
| return tgt_fd; |
| } |
| |
| int |
| ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req) |
| { |
| int index = 2; |
| Addr buf_addr = process->getSyscallArg(tc, index); |
| |
| switch (req) { |
| case HSA_GET_SIZES: |
| { |
| TypedBufferArg<HsaDriverSizes> sizes(buf_addr); |
| sizes->num_kernels = kernels.size(); |
| sizes->string_table_size = 0; |
| sizes->code_size = 0; |
| sizes->readonly_size = 0; |
| |
| if (kernels.size() > 0) { |
| // all kernels will share the same read-only memory |
| sizes->readonly_size = |
| kernels[0]->getSize(HsaCode::MemorySegment::READONLY); |
| // check our assumption |
| for (int i = 1; i<kernels.size(); ++i) { |
| assert(sizes->readonly_size == |
| kernels[i]->getSize(HsaCode::MemorySegment::READONLY)); |
| } |
| } |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaCode *k = kernels[i]; |
| // add one for terminating '\0' |
| sizes->string_table_size += k->name().size() + 1; |
| sizes->code_size += |
| k->numInsts() * sizeof(TheGpuISA::RawMachInst); |
| } |
| |
| sizes.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_KINFO: |
| { |
| TypedBufferArg<HsaKernelInfo> |
| kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size()); |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaKernelInfo *ki = &kinfo[i]; |
| ki->name_offs = kernelInfo[i].name_offs; |
| ki->code_offs = kernelInfo[i].code_offs; |
| ki->sRegCount = kernelInfo[i].sRegCount; |
| ki->dRegCount = kernelInfo[i].dRegCount; |
| ki->cRegCount = kernelInfo[i].cRegCount; |
| ki->static_lds_size = kernelInfo[i].static_lds_size; |
| ki->private_mem_size = kernelInfo[i].private_mem_size; |
| ki->spill_mem_size = kernelInfo[i].spill_mem_size; |
| } |
| |
| kinfo.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_STRINGS: |
| { |
| int string_table_size = 0; |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaCode *k = kernels[i]; |
| string_table_size += k->name().size() + 1; |
| } |
| |
| BufferArg buf(buf_addr, string_table_size); |
| char *bufp = (char*)buf.bufferPtr(); |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaCode *k = kernels[i]; |
| const char *n = k->name().c_str(); |
| |
| // idiomatic string copy |
| while ((*bufp++ = *n++)); |
| } |
| |
| assert(bufp - (char *)buf.bufferPtr() == string_table_size); |
| |
| buf.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_READONLY_DATA: |
| { |
| // we can pick any kernel --- they share the same |
| // readonly segment (this assumption is checked in GET_SIZES) |
| uint64_t size = |
| kernels.back()->getSize(HsaCode::MemorySegment::READONLY); |
| BufferArg data(buf_addr, size); |
| char *datap = (char *)data.bufferPtr(); |
| memcpy(datap, |
| kernels.back()->readonly_data, |
| size); |
| data.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_CODE: |
| { |
| // set hsaCode pointer |
| hsaCode = buf_addr; |
| int code_size = 0; |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaCode *k = kernels[i]; |
| code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst); |
| } |
| |
| TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size); |
| TheGpuISA::RawMachInst *bufp = buf; |
| |
| int buf_idx = 0; |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| HsaCode *k = kernels[i]; |
| |
| for (int j = 0; j < k->numInsts(); ++j) { |
| bufp[buf_idx] = k->insts()->at(j); |
| ++buf_idx; |
| } |
| } |
| |
| buf.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_CU_CNT: |
| { |
| BufferArg buf(buf_addr, sizeof(uint32_t)); |
| *((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs(); |
| buf.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| case HSA_GET_VSZ: |
| { |
| BufferArg buf(buf_addr, sizeof(uint32_t)); |
| *((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize(); |
| buf.copyOut(tc->getMemProxy()); |
| } |
| break; |
| case HSA_GET_HW_STATIC_CONTEXT_SIZE: |
| { |
| BufferArg buf(buf_addr, sizeof(uint32_t)); |
| *((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize(); |
| buf.copyOut(tc->getMemProxy()); |
| } |
| break; |
| |
| default: |
| fatal("ClDriver: bad ioctl %d\n", req); |
| } |
| |
| return 0; |
| } |
| |
| const char* |
| ClDriver::codeOffToKernelName(uint64_t code_ptr) |
| { |
| assert(hsaCode); |
| uint32_t code_offs = code_ptr - hsaCode; |
| |
| for (int i = 0; i < kernels.size(); ++i) { |
| if (code_offs == kernelInfo[i].code_offs) { |
| return kernels[i]->name().c_str(); |
| } |
| } |
| |
| return nullptr; |
| } |
| |
| ClDriver* |
| ClDriverParams::create() |
| { |
| return new ClDriver(this); |
| } |