blob: 119091fc5eccc51540f9dc1766774134f784ee14 [file] [log] [blame] [edit]
/*
* Copyright (c) 2012-2015 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Author: Anthony Gutierrez
*/
#include "gpu-compute/cl_driver.hh"
#include <memory>
#include "base/intmath.hh"
#include "cpu/thread_context.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/hsa_code.hh"
#include "gpu-compute/hsa_kernel_info.hh"
#include "gpu-compute/hsa_object.hh"
#include "params/ClDriver.hh"
#include "sim/process.hh"
#include "sim/syscall_emul_buf.hh"
ClDriver::ClDriver(ClDriverParams *p)
: EmulatedDriver(p), hsaCode(0)
{
for (const auto &codeFile : p->codefile)
codeFiles.push_back(&codeFile);
maxFuncArgsSize = 0;
for (int i = 0; i < codeFiles.size(); ++i) {
HsaObject *obj = HsaObject::createHsaObject(*codeFiles[i]);
for (int k = 0; k < obj->numKernels(); ++k) {
assert(obj->getKernel(k));
kernels.push_back(obj->getKernel(k));
kernels.back()->setReadonlyData((uint8_t*)obj->readonlyData);
int kern_funcargs_size = kernels.back()->funcarg_size;
maxFuncArgsSize = maxFuncArgsSize < kern_funcargs_size ?
kern_funcargs_size : maxFuncArgsSize;
}
}
int name_offs = 0;
int code_offs = 0;
for (int i = 0; i < kernels.size(); ++i) {
kernelInfo.push_back(HsaKernelInfo());
HsaCode *k = kernels[i];
k->generateHsaKernelInfo(&kernelInfo[i]);
kernelInfo[i].name_offs = name_offs;
kernelInfo[i].code_offs = code_offs;
name_offs += k->name().size() + 1;
code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
}
void
ClDriver::handshake(GpuDispatcher *_dispatcher)
{
dispatcher = _dispatcher;
dispatcher->setFuncargsSize(maxFuncArgsSize);
}
int
ClDriver::open(Process *p, ThreadContext *tc, int mode, int flags)
{
std::shared_ptr<DeviceFDEntry> fdp;
fdp = std::make_shared<DeviceFDEntry>(this, filename);
int tgt_fd = p->fds->allocFD(fdp);
return tgt_fd;
}
int
ClDriver::ioctl(Process *process, ThreadContext *tc, unsigned req)
{
int index = 2;
Addr buf_addr = process->getSyscallArg(tc, index);
switch (req) {
case HSA_GET_SIZES:
{
TypedBufferArg<HsaDriverSizes> sizes(buf_addr);
sizes->num_kernels = kernels.size();
sizes->string_table_size = 0;
sizes->code_size = 0;
sizes->readonly_size = 0;
if (kernels.size() > 0) {
// all kernels will share the same read-only memory
sizes->readonly_size =
kernels[0]->getSize(HsaCode::MemorySegment::READONLY);
// check our assumption
for (int i = 1; i<kernels.size(); ++i) {
assert(sizes->readonly_size ==
kernels[i]->getSize(HsaCode::MemorySegment::READONLY));
}
}
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
// add one for terminating '\0'
sizes->string_table_size += k->name().size() + 1;
sizes->code_size +=
k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
sizes.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_KINFO:
{
TypedBufferArg<HsaKernelInfo>
kinfo(buf_addr, sizeof(HsaKernelInfo) * kernels.size());
for (int i = 0; i < kernels.size(); ++i) {
HsaKernelInfo *ki = &kinfo[i];
ki->name_offs = kernelInfo[i].name_offs;
ki->code_offs = kernelInfo[i].code_offs;
ki->sRegCount = kernelInfo[i].sRegCount;
ki->dRegCount = kernelInfo[i].dRegCount;
ki->cRegCount = kernelInfo[i].cRegCount;
ki->static_lds_size = kernelInfo[i].static_lds_size;
ki->private_mem_size = kernelInfo[i].private_mem_size;
ki->spill_mem_size = kernelInfo[i].spill_mem_size;
}
kinfo.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_STRINGS:
{
int string_table_size = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
string_table_size += k->name().size() + 1;
}
BufferArg buf(buf_addr, string_table_size);
char *bufp = (char*)buf.bufferPtr();
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
const char *n = k->name().c_str();
// idiomatic string copy
while ((*bufp++ = *n++));
}
assert(bufp - (char *)buf.bufferPtr() == string_table_size);
buf.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_READONLY_DATA:
{
// we can pick any kernel --- they share the same
// readonly segment (this assumption is checked in GET_SIZES)
uint64_t size =
kernels.back()->getSize(HsaCode::MemorySegment::READONLY);
BufferArg data(buf_addr, size);
char *datap = (char *)data.bufferPtr();
memcpy(datap,
kernels.back()->readonly_data,
size);
data.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_CODE:
{
// set hsaCode pointer
hsaCode = buf_addr;
int code_size = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
code_size += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
}
TypedBufferArg<TheGpuISA::RawMachInst> buf(buf_addr, code_size);
TheGpuISA::RawMachInst *bufp = buf;
int buf_idx = 0;
for (int i = 0; i < kernels.size(); ++i) {
HsaCode *k = kernels[i];
for (int j = 0; j < k->numInsts(); ++j) {
bufp[buf_idx] = k->insts()->at(j);
++buf_idx;
}
}
buf.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_CU_CNT:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->getNumCUs();
buf.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_VSZ:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->wfSize();
buf.copyOut(tc->getMemProxy());
}
break;
case HSA_GET_HW_STATIC_CONTEXT_SIZE:
{
BufferArg buf(buf_addr, sizeof(uint32_t));
*((uint32_t*)buf.bufferPtr()) = dispatcher->getStaticContextSize();
buf.copyOut(tc->getMemProxy());
}
break;
default:
fatal("ClDriver: bad ioctl %d\n", req);
}
return 0;
}
const char*
ClDriver::codeOffToKernelName(uint64_t code_ptr)
{
assert(hsaCode);
uint32_t code_offs = code_ptr - hsaCode;
for (int i = 0; i < kernels.size(); ++i) {
if (code_offs == kernelInfo[i].code_offs) {
return kernels[i]->name().c_str();
}
}
return nullptr;
}
ClDriver*
ClDriverParams::create()
{
return new ClDriver(this);
}