| /* |
| * Copyright (C) 2008 Nicolai Haehnle. |
| * |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining |
| * a copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sublicense, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial |
| * portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
| * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
| * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| */ |
| |
| /** |
| * @file |
| * |
| * "Not-quite SSA" and Dead-Code Elimination. |
| * |
| * @note This code uses SWIZZLE_NIL in a source register to indicate that |
| * the corresponding component is ignored by the corresponding instruction. |
| */ |
| |
| #include "radeon_nqssadce.h" |
| |
| |
| /** |
| * Return the @ref register_state for the given register (or 0 for untracked |
| * registers, i.e. constants). |
| */ |
| static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index) |
| { |
| switch(file) { |
| case PROGRAM_TEMPORARY: return &s->Temps[index]; |
| case PROGRAM_OUTPUT: return &s->Outputs[index]; |
| default: return 0; |
| } |
| } |
| |
| |
| /** |
| * Left multiplication of a register with a swizzle |
| * |
| * @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles. |
| */ |
| static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg) |
| { |
| struct prog_src_register tmp = srcreg; |
| int i; |
| tmp.Swizzle = 0; |
| tmp.NegateBase = 0; |
| for(i = 0; i < 4; ++i) { |
| GLuint swz = GET_SWZ(swizzle, i); |
| if (swz < 4) { |
| tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3); |
| tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i; |
| } else { |
| tmp.Swizzle |= swz << (i*3); |
| } |
| } |
| return tmp; |
| } |
| |
| |
| static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s, |
| struct prog_instruction *inst, GLint src, GLuint sourced) |
| { |
| int i; |
| GLuint deswz_source = 0; |
| |
| for(i = 0; i < 4; ++i) { |
| if (GET_BIT(sourced, i)) { |
| GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i); |
| deswz_source |= 1 << swz; |
| } else { |
| inst->SrcReg[src].Swizzle &= ~(7 << (3*i)); |
| inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); |
| } |
| } |
| |
| if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) { |
| struct prog_dst_register dstreg = inst->DstReg; |
| dstreg.File = PROGRAM_TEMPORARY; |
| dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); |
| dstreg.WriteMask = sourced; |
| |
| s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]); |
| |
| inst = s->Program->Instructions + s->IP; |
| inst->SrcReg[src].File = PROGRAM_TEMPORARY; |
| inst->SrcReg[src].Index = dstreg.Index; |
| inst->SrcReg[src].Swizzle = 0; |
| inst->SrcReg[src].NegateBase = 0; |
| inst->SrcReg[src].Abs = 0; |
| inst->SrcReg[src].NegateAbs = 0; |
| for(i = 0; i < 4; ++i) { |
| if (GET_BIT(sourced, i)) |
| inst->SrcReg[src].Swizzle |= i << (3*i); |
| else |
| inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i); |
| } |
| deswz_source = sourced; |
| } |
| |
| struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index); |
| if (regstate) |
| regstate->Sourced |= deswz_source & 0xf; |
| |
| return inst; |
| } |
| |
| |
| static void rewrite_depth_out(struct prog_instruction *inst) |
| { |
| if (inst->DstReg.WriteMask & WRITEMASK_Z) { |
| inst->DstReg.WriteMask = WRITEMASK_W; |
| } else { |
| inst->DstReg.WriteMask = 0; |
| return; |
| } |
| |
| switch (inst->Opcode) { |
| case OPCODE_FRC: |
| case OPCODE_MOV: |
| inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); |
| break; |
| case OPCODE_ADD: |
| case OPCODE_MAX: |
| case OPCODE_MIN: |
| case OPCODE_MUL: |
| inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); |
| inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); |
| break; |
| case OPCODE_CMP: |
| case OPCODE_MAD: |
| inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]); |
| inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]); |
| inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]); |
| break; |
| default: |
| // Scalar instructions needn't be reswizzled |
| break; |
| } |
| } |
| |
| static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex) |
| { |
| int nsrc = _mesa_num_inst_src_regs(inst->Opcode); |
| int i; |
| for(i = 0; i < nsrc; ++i) |
| if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex) |
| inst->SrcReg[i].Index = newindex; |
| } |
| |
| static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex) |
| { |
| GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY); |
| int ip; |
| for(ip = 0; ip < s->IP; ++ip) { |
| struct prog_instruction* inst = s->Program->Instructions + ip; |
| if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex) |
| inst->DstReg.Index = newindex; |
| unalias_srcregs(inst, oldindex, newindex); |
| } |
| unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex); |
| } |
| |
| |
| /** |
| * Handle one instruction. |
| */ |
| static void process_instruction(struct nqssadce_state* s) |
| { |
| struct prog_instruction *inst = s->Program->Instructions + s->IP; |
| |
| if (inst->Opcode == OPCODE_END) |
| return; |
| |
| if (inst->Opcode != OPCODE_KIL) { |
| if (s->Descr->RewriteDepthOut) { |
| if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPR) |
| rewrite_depth_out(inst); |
| } |
| |
| struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index); |
| if (!regstate) { |
| _mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n", |
| inst->DstReg.File, inst->DstReg.Index); |
| return; |
| } |
| |
| inst->DstReg.WriteMask &= regstate->Sourced; |
| regstate->Sourced &= ~inst->DstReg.WriteMask; |
| |
| if (inst->DstReg.WriteMask == 0) { |
| _mesa_delete_instructions(s->Program, s->IP, 1); |
| return; |
| } |
| |
| if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced) |
| unalias_temporary(s, inst->DstReg.Index); |
| } |
| |
| /* Attention: Due to swizzle emulation code, the following |
| * might change the instruction stream under us, so we have |
| * to be careful with the inst pointer. */ |
| switch (inst->Opcode) { |
| case OPCODE_DDX: |
| case OPCODE_DDY: |
| case OPCODE_FRC: |
| case OPCODE_MOV: |
| inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); |
| break; |
| case OPCODE_ADD: |
| case OPCODE_MAX: |
| case OPCODE_MIN: |
| case OPCODE_MUL: |
| inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); |
| inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); |
| break; |
| case OPCODE_CMP: |
| case OPCODE_MAD: |
| inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask); |
| inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask); |
| inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask); |
| break; |
| case OPCODE_COS: |
| case OPCODE_EX2: |
| case OPCODE_LG2: |
| case OPCODE_RCP: |
| case OPCODE_RSQ: |
| case OPCODE_SIN: |
| inst = track_used_srcreg(s, inst, 0, 0x1); |
| break; |
| case OPCODE_DP3: |
| inst = track_used_srcreg(s, inst, 0, 0x7); |
| inst = track_used_srcreg(s, inst, 1, 0x7); |
| break; |
| case OPCODE_DP4: |
| inst = track_used_srcreg(s, inst, 0, 0xf); |
| inst = track_used_srcreg(s, inst, 1, 0xf); |
| break; |
| case OPCODE_KIL: |
| case OPCODE_TEX: |
| case OPCODE_TXB: |
| case OPCODE_TXP: |
| inst = track_used_srcreg(s, inst, 0, 0xf); |
| break; |
| default: |
| _mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode); |
| return; |
| } |
| } |
| |
| |
| void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr) |
| { |
| struct nqssadce_state s; |
| |
| _mesa_bzero(&s, sizeof(s)); |
| s.Ctx = ctx; |
| s.Program = p; |
| s.Descr = descr; |
| s.Descr->Init(&s); |
| s.IP = p->NumInstructions; |
| |
| while(s.IP > 0) { |
| s.IP--; |
| process_instruction(&s); |
| } |
| } |