src/mem/ruby/protocol/GPU_VIPER-TCC.sm - public/gem5 - Git at Google

 /*
  * Copyright (c) 2010-2015 Advanced Micro Devices, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its
  * contributors may be used to endorse or promote products derived from this
  * software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  *
  * Author: Blake Hechtman
  */

 machine(MachineType:TCC, "TCC Cache")
  : CacheMemory * L2cache;
    bool WB; /*is this cache Writeback?*/
    Cycles l2_request_latency := 50;
    Cycles l2_response_latency := 20;

   // From the TCPs or SQCs
   MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request";
   // To the Cores. TCC deals only with TCPs/SQCs.
   MessageBuffer * responseToCore, network="To", virtual_network="3", vnet_type="response";
   // From the NB
   MessageBuffer * probeFromNB, network="From", virtual_network="0", vnet_type="request";
   MessageBuffer * responseFromNB, network="From", virtual_network="2", vnet_type="response";
   // To the NB
   MessageBuffer * requestToNB, network="To", virtual_network="0", vnet_type="request";
   MessageBuffer * responseToNB, network="To", virtual_network="2", vnet_type="response";
   MessageBuffer * unblockToNB, network="To", virtual_network="4", vnet_type="unblock";

   MessageBuffer * triggerQueue;

 {
   // EVENTS
   enumeration(Event, desc="TCC Events") {
     // Requests coming from the Cores
     RdBlk,                  desc="RdBlk event";
     RdBypassEvict,          desc="Bypass L2 on reads. Evict if cache block already allocated";
     WrVicBlk,               desc="L1 Write Through";
     WrVicBlkBack,           desc="L1 Write Through(dirty cache)";
     WrVicBlkEvict,          desc="L1 Write Through(dirty cache) and evict";
     Atomic,                 desc="Atomic Op";
     AtomicDone,             desc="AtomicOps Complete";
     AtomicNotDone,          desc="AtomicOps not Complete";
     Data,                   desc="data messgae";
     // Coming from this TCC
     L2_Repl,                desc="L2 Replacement";
     // Probes
     PrbInv,                 desc="Invalidating probe";
     // Coming from Memory Controller
     WBAck,                  desc="writethrough ack from memory";
     Bypass,                 desc="Bypass the entire L2 cache";
   }

   // STATES
   state_declaration(State, desc="TCC State", default="TCC_State_I") {
     M, AccessPermission:Read_Write, desc="Modified(dirty cache only)";
     W, AccessPermission:Read_Write, desc="Written(dirty cache only)";
     V, AccessPermission:Read_Only,  desc="Valid";
     I, AccessPermission:Invalid,    desc="Invalid";
     IV, AccessPermission:Busy,      desc="Waiting for Data";
     WI, AccessPermission:Busy,      desc="Waiting on Writethrough Ack";
     A, AccessPermission:Busy,       desc="Invalid waiting on atomici Data";
   }

   enumeration(RequestType, desc="To communicate stats from transitions to recordStats") {
     DataArrayRead,    desc="Read the data array";
     DataArrayWrite,   desc="Write the data array";
     TagArrayRead,     desc="Read the data array";
     TagArrayWrite,    desc="Write the data array";
   }


   // STRUCTURES

   structure(Entry, desc="...", interface="AbstractCacheEntry") {
     State CacheState,           desc="cache state";
     bool Dirty,                 desc="Is the data dirty (diff from memory?)";
     DataBlock DataBlk,          desc="Data for the block";
     WriteMask writeMask,        desc="Dirty byte mask";
   }

   structure(TBE, desc="...") {
     State TBEState,     desc="Transient state";
     DataBlock DataBlk,  desc="data for the block";
     bool Dirty,         desc="Is the data dirty?";
     bool Shared,        desc="Victim hit by shared probe";
     MachineID From,     desc="Waiting for writeback from...";
     NetDest Destination, desc="Data destination";
     int numAtomics,     desc="number remaining atomics";
     int atomicDoneCnt,  desc="number AtomicDones triggered";
     bool isGLCSet,      desc="Bypass L1 Cache";
     bool isSLCSet,      desc="Bypass L1 and L2 Cache";
   }

   structure(TBETable, external="yes") {
     TBE lookup(Addr);
     void allocate(Addr);
     void deallocate(Addr);
     bool isPresent(Addr);
   }

   TBETable TBEs, template="<TCC_TBE>", constructor="m_number_of_TBEs";

   void set_cache_entry(AbstractCacheEntry b);
   void unset_cache_entry();
   void set_tbe(TBE b);
   void unset_tbe();
   void wakeUpAllBuffers();
   void wakeUpBuffers(Addr a);
   void wakeUpAllBuffers(Addr a);

   MachineID mapAddressToMachine(Addr addr, MachineType mtype);

   // FUNCTION DEFINITIONS
   Tick clockEdge();

   Entry getCacheEntry(Addr addr), return_by_pointer="yes" {
     return static_cast(Entry, "pointer", L2cache.lookup(addr));
   }

   DataBlock getDataBlock(Addr addr), return_by_ref="yes" {
     return getCacheEntry(addr).DataBlk;
   }

   bool presentOrAvail(Addr addr) {
     return L2cache.isTagPresent(addr) || L2cache.cacheAvail(addr);
   }

   State getState(TBE tbe, Entry cache_entry, Addr addr) {
     if (is_valid(tbe)) {
       return tbe.TBEState;
     } else if (is_valid(cache_entry)) {
       return cache_entry.CacheState;
     }
     return State:I;
   }

   void setState(TBE tbe, Entry cache_entry, Addr addr, State state) {
     if (is_valid(tbe)) {
         tbe.TBEState := state;
     }

     if (is_valid(cache_entry)) {
         cache_entry.CacheState := state;
     }
   }

   void functionalRead(Addr addr, Packet *pkt) {
     TBE tbe := TBEs.lookup(addr);
     if(is_valid(tbe)) {
       testAndRead(addr, tbe.DataBlk, pkt);
     } else {
       functionalMemoryRead(pkt);
     }
   }

   int functionalWrite(Addr addr, Packet *pkt) {
     int num_functional_writes := 0;
     TBE tbe := TBEs.lookup(addr);
     if(is_valid(tbe)) {
       num_functional_writes := num_functional_writes +
             testAndWrite(addr, tbe.DataBlk, pkt);
     }

     num_functional_writes := num_functional_writes +
         functionalMemoryWrite(pkt);
     return num_functional_writes;
   }

   AccessPermission getAccessPermission(Addr addr) {
     TBE tbe := TBEs.lookup(addr);
     if(is_valid(tbe)) {
       return TCC_State_to_permission(tbe.TBEState);
     }

     Entry cache_entry := getCacheEntry(addr);
     if(is_valid(cache_entry)) {
       return TCC_State_to_permission(cache_entry.CacheState);
     }

     return AccessPermission:NotPresent;
   }

   void setAccessPermission(Entry cache_entry, Addr addr, State state) {
     if (is_valid(cache_entry)) {
       cache_entry.changePermission(TCC_State_to_permission(state));
     }
   }

   void recordRequestType(RequestType request_type, Addr addr) {
     if (request_type == RequestType:DataArrayRead) {
         L2cache.recordRequestType(CacheRequestType:DataArrayRead, addr);
     } else if (request_type == RequestType:DataArrayWrite) {
         L2cache.recordRequestType(CacheRequestType:DataArrayWrite, addr);
     } else if (request_type == RequestType:TagArrayRead) {
         L2cache.recordRequestType(CacheRequestType:TagArrayRead, addr);
     } else if (request_type == RequestType:TagArrayWrite) {
         L2cache.recordRequestType(CacheRequestType:TagArrayWrite, addr);
     }
   }

   bool checkResourceAvailable(RequestType request_type, Addr addr) {
     if (request_type == RequestType:DataArrayRead) {
       return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
     } else if (request_type == RequestType:DataArrayWrite) {
       return L2cache.checkResourceAvailable(CacheResourceType:DataArray, addr);
     } else if (request_type == RequestType:TagArrayRead) {
       return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
     } else if (request_type == RequestType:TagArrayWrite) {
       return L2cache.checkResourceAvailable(CacheResourceType:TagArray, addr);
     } else {
       error("Invalid RequestType type in checkResourceAvailable");
       return true;
     }
   }


   // ** OUT_PORTS **

   // Three classes of ports
   // Class 1: downward facing network links to NB
   out_port(requestToNB_out, CPURequestMsg, requestToNB);
   out_port(responseToNB_out, ResponseMsg, responseToNB);
   out_port(unblockToNB_out, UnblockMsg, unblockToNB);

   // Class 2: upward facing ports to GPU cores
   out_port(responseToCore_out, ResponseMsg, responseToCore);

   out_port(triggerQueue_out, TriggerMsg, triggerQueue);
   //
   // request queue going to NB
   //


 // ** IN_PORTS **
   in_port(triggerQueue_in, TriggerMsg, triggerQueue) {
     if (triggerQueue_in.isReady(clockEdge())) {
       peek(triggerQueue_in, TriggerMsg) {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);

         // There is a possible race where multiple AtomicDone triggers can be
         // sent if another Atomic to the same address is issued after the
         // AtomicDone is triggered but before the message arrives here. For
         // that case we count the number of AtomicDones in flight for this
         // address and only call AtomicDone to deallocate the TBE when it is
         // the last in flight message.
         if (tbe.numAtomics == 0 && tbe.atomicDoneCnt == 1) {
             trigger(Event:AtomicDone, in_msg.addr, cache_entry, tbe);
         } else {
             trigger(Event:AtomicNotDone, in_msg.addr, cache_entry, tbe);
         }
       }
     }
   }


   in_port(responseFromNB_in, ResponseMsg, responseFromNB) {
     if (responseFromNB_in.isReady(clockEdge())) {
       peek(responseFromNB_in, ResponseMsg, block_on="addr") {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
         bool is_slc_set := false;

         if (!is_invalid(tbe)) {
             is_slc_set := tbe.isSLCSet;
         }

         if (is_slc_set) {
             // If the SLC bit is set, the response needs to bypass the cache
             // and should not be allocated an entry.
             trigger(Event:Bypass, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceResponseType:NBSysResp) {
           if(presentOrAvail(in_msg.addr)) {
             trigger(Event:Data, in_msg.addr, cache_entry, tbe);
           } else {
             Addr victim :=  L2cache.cacheProbe(in_msg.addr);
             trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
           }
         } else if (in_msg.Type == CoherenceResponseType:NBSysWBAck) {
           trigger(Event:WBAck, in_msg.addr, cache_entry, tbe);
         } else {
           error("Unexpected Response Message to Core");
         }
       }
     }
   }

   // Finally handling incoming requests (from TCP) and probes (from NB).
   in_port(probeNetwork_in, NBProbeRequestMsg, probeFromNB) {
     if (probeNetwork_in.isReady(clockEdge())) {
       peek(probeNetwork_in, NBProbeRequestMsg) {
         DPRINTF(RubySlicc, "%s\n", in_msg);
         Entry cache_entry := getCacheEntry(in_msg.addr);
         TBE tbe := TBEs.lookup(in_msg.addr);
         trigger(Event:PrbInv, in_msg.addr, cache_entry, tbe);
       }
     }
   }

   in_port(coreRequestNetwork_in, CPURequestMsg, requestFromTCP, rank=0) {
     if (coreRequestNetwork_in.isReady(clockEdge())) {
       peek(coreRequestNetwork_in, CPURequestMsg) {
         TBE tbe := TBEs.lookup(in_msg.addr);
         Entry cache_entry := getCacheEntry(in_msg.addr);
         if (in_msg.Type == CoherenceRequestType:WriteThrough) {
             if (in_msg.isSLCSet) {
                 // The request should bypass the cache if SLC bit is set.
                 // If the cache entry exists already, then evict it.
                 // Else, perform a normal cache access.
                 // The cache entry is allocated only on response and bypass is
                 // handled there
                 if(presentOrAvail(in_msg.addr)) {
                     trigger(Event:WrVicBlkEvict, in_msg.addr, cache_entry, tbe);
                 } else {
                     trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
                 }
             } else if(WB) {
                 if(presentOrAvail(in_msg.addr)) {
                     trigger(Event:WrVicBlkBack, in_msg.addr, cache_entry, tbe);
                 } else {
                     Addr victim :=  L2cache.cacheProbe(in_msg.addr);
                     trigger(Event:L2_Repl, victim, getCacheEntry(victim), TBEs.lookup(victim));
                 }
             } else {
                 trigger(Event:WrVicBlk, in_msg.addr, cache_entry, tbe);
             }
         } else if (in_msg.Type == CoherenceRequestType:Atomic) {
           // Currently the Atomic requests do not have GLC/SLC bit handing
           // support. The assert ensures that the requests do not have
           // these set, and therefore do not expect to bypass the cache
           assert(!in_msg.isSLCSet);
           trigger(Event:Atomic, in_msg.addr, cache_entry, tbe);
         } else if (in_msg.Type == CoherenceRequestType:RdBlk) {
           if (in_msg.isSLCSet) {
             // If SLC bit is set, the request needs to go directly to memory.
             // If a cache block already exists, then evict it.
             trigger(Event:RdBypassEvict, in_msg.addr, cache_entry, tbe);
           } else {
             trigger(Event:RdBlk, in_msg.addr, cache_entry, tbe);
           }
         } else {
           DPRINTF(RubySlicc, "%s\n", in_msg);
           error("Unexpected Response Message to Core");
         }
       }
     }
   }
   // BEGIN ACTIONS

   action(i_invL2, "i", desc="invalidate TCC cache block") {
     if (is_valid(cache_entry)) {
         L2cache.deallocate(address);
     }
     unset_cache_entry();
   }

   action(sd_sendData, "sd", desc="send Shared response") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
         out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:TDSysResp;
         out_msg.Sender := machineID;
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.DataBlk := cache_entry.DataBlk;
         out_msg.MessageSize := MessageSizeType:Response_Data;
         out_msg.Dirty := false;
         out_msg.State := CoherenceState:Shared;
         out_msg.isGLCSet := in_msg.isGLCSet;
         out_msg.isSLCSet := in_msg.isSLCSet;
         DPRINTF(RubySlicc, "%s\n", out_msg);
       }
     }
   }


   action(sdr_sendDataResponse, "sdr", desc="send Shared response") {
     enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
       out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:TDSysResp;
       out_msg.Sender := machineID;
       out_msg.Destination := tbe.Destination;
       out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.MessageSize := MessageSizeType:Response_Data;
       out_msg.Dirty := false;
       out_msg.State := CoherenceState:Shared;
       DPRINTF(RubySlicc, "%s\n", out_msg);
       peek(responseFromNB_in, ResponseMsg) {
         out_msg.isGLCSet := tbe.isGLCSet;
         out_msg.isSLCSet := tbe.isSLCSet;
       }
     }
     enqueue(unblockToNB_out, UnblockMsg, 1) {
       out_msg.addr := address;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Unblock_Control;
       peek(responseFromNB_in, ResponseMsg) {
         out_msg.isGLCSet := tbe.isGLCSet;
         out_msg.isSLCSet := tbe.isSLCSet;
       }
       DPRINTF(RubySlicc, "%s\n", out_msg);
     }
   }

   action(rb_bypassDone, "rb", desc="bypass L2 of read access") {
     peek(responseFromNB_in, ResponseMsg) {
         enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
           out_msg.addr := address;
           out_msg.Type := CoherenceResponseType:TDSysResp;
           out_msg.Sender := machineID;
           out_msg.Destination := tbe.Destination;
           out_msg.DataBlk := in_msg.DataBlk;
           out_msg.MessageSize := MessageSizeType:Response_Data;
           out_msg.Dirty := false;
           out_msg.State := CoherenceState:Shared;
           out_msg.isGLCSet := tbe.isGLCSet;
           out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
         enqueue(unblockToNB_out, UnblockMsg, 1) {
           out_msg.addr := address;
           out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
           out_msg.MessageSize := MessageSizeType:Unblock_Control;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
     }
   }

   action(rd_requestData, "r", desc="Miss in L2, pass on") {
     if(tbe.Destination.count()==1){
       peek(coreRequestNetwork_in, CPURequestMsg) {
         enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
           out_msg.addr := address;
           out_msg.Type := in_msg.Type;
           out_msg.Requestor := machineID;
           out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
           out_msg.Shared := false; // unneeded for this request
           out_msg.MessageSize := in_msg.MessageSize;
           out_msg.isGLCSet := tbe.isGLCSet;
           out_msg.isSLCSet := tbe.isSLCSet;
           DPRINTF(RubySlicc, "%s\n", out_msg);
         }
       }
     }
   }

   action(w_sendResponseWBAck, "w", desc="send WB Ack") {
     peek(responseFromNB_in, ResponseMsg) {
       enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
         out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:TDSysWBAck;
         out_msg.Destination.clear();
         out_msg.Destination.add(in_msg.WTRequestor);
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
       }
     }
   }

   action(swb_sendWBAck, "swb", desc="send WB Ack") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
         out_msg.addr := address;
         out_msg.Type := CoherenceResponseType:TDSysWBAck;
         out_msg.Destination.clear();
         out_msg.Destination.add(in_msg.Requestor);
         out_msg.Sender := machineID;
         out_msg.MessageSize := MessageSizeType:Writeback_Control;
         out_msg.instSeqNum := in_msg.instSeqNum;
       }
     }
   }

   action(ar_sendAtomicResponse, "ar", desc="send Atomic Ack") {
     peek(responseFromNB_in, ResponseMsg) {
         enqueue(responseToCore_out, ResponseMsg, l2_response_latency) {
           out_msg.addr := address;
           out_msg.Type := CoherenceResponseType:TDSysResp;
           out_msg.Destination.add(in_msg.WTRequestor);
           out_msg.Sender := machineID;
           out_msg.MessageSize := in_msg.MessageSize;
           out_msg.DataBlk := in_msg.DataBlk;
           out_msg.isGLCSet := tbe.isGLCSet;
           out_msg.isSLCSet := tbe.isSLCSet;
         }
     }
   }

   action(a_allocateBlock, "a", desc="allocate TCC block") {
     if (is_invalid(cache_entry)) {
       set_cache_entry(L2cache.allocate(address, new Entry));
       cache_entry.writeMask.clear();
     }
   }

   action(p_profileMiss, "pm", desc="Profile cache miss") {
       L2cache.profileDemandMiss();
   }

   action(p_profileHit, "ph", desc="Profile cache hit") {
       L2cache.profileDemandHit();
   }

   action(t_allocateTBE, "t", desc="allocate TBE Entry") {
     if (is_invalid(tbe)) {
       check_allocate(TBEs);
       TBEs.allocate(address);
       set_tbe(TBEs.lookup(address));
       tbe.Destination.clear();
       tbe.numAtomics := 0;
       tbe.atomicDoneCnt := 0;
     }
     if (coreRequestNetwork_in.isReady(clockEdge())) {
       peek(coreRequestNetwork_in, CPURequestMsg) {
         if(in_msg.Type == CoherenceRequestType:RdBlk || in_msg.Type == CoherenceRequestType:Atomic){
           tbe.Destination.add(in_msg.Requestor);
         }
         tbe.isGLCSet := in_msg.isGLCSet;
         tbe.isSLCSet := in_msg.isSLCSet;
       }
     }
   }

   action(dt_deallocateTBE, "dt", desc="Deallocate TBE entry") {
     tbe.Destination.clear();
     TBEs.deallocate(address);
     unset_tbe();
   }

   action(wcb_writeCacheBlock, "wcb", desc="write data to TCC") {
     peek(responseFromNB_in, ResponseMsg) {
       cache_entry.DataBlk := in_msg.DataBlk;
       DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
     }
   }

   action(wdb_writeDirtyBytes, "wdb", desc="write data to TCC") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       cache_entry.DataBlk.copyPartial(in_msg.DataBlk,in_msg.writeMask);
       cache_entry.writeMask.orMask(in_msg.writeMask);
       DPRINTF(RubySlicc, "Writing to TCC: %s\n", in_msg);
     }
   }

   action(wt_writeThrough, "wt", desc="write back data") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
         out_msg.addr := address;
         out_msg.Requestor := machineID;
         out_msg.WTRequestor := in_msg.Requestor;
         out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
         out_msg.MessageSize := MessageSizeType:Data;
         out_msg.Type := CoherenceRequestType:WriteThrough;
         out_msg.Dirty := true;
         out_msg.DataBlk := in_msg.DataBlk;
         out_msg.writeMask.orMask(in_msg.writeMask);
         out_msg.instSeqNum := in_msg.instSeqNum;
       }
     }
   }

   action(wb_writeBack, "wb", desc="write back data") {
     enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
       out_msg.addr := address;
       out_msg.Requestor := machineID;
       out_msg.WTRequestor := machineID;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.MessageSize := MessageSizeType:Data;
       out_msg.Type := CoherenceRequestType:WriteThrough;
       out_msg.Dirty := true;
       out_msg.DataBlk := cache_entry.DataBlk;
       out_msg.writeMask.orMask(cache_entry.writeMask);
     }
   }

   action(at_atomicThrough, "at", desc="write back data") {
     peek(coreRequestNetwork_in, CPURequestMsg) {
       enqueue(requestToNB_out, CPURequestMsg, l2_request_latency) {
         out_msg.addr := address;
         out_msg.Requestor := machineID;
         out_msg.WTRequestor := in_msg.Requestor;
         out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
         out_msg.MessageSize := MessageSizeType:Data;
         out_msg.Type := CoherenceRequestType:Atomic;
         out_msg.Dirty := true;
         out_msg.writeMask.orMask(in_msg.writeMask);
       }
     }
   }

   action(pi_sendProbeResponseInv, "pi", desc="send probe ack inv, no data") {
     enqueue(responseToNB_out, ResponseMsg, 1) {
       out_msg.addr := address;
       out_msg.Type := CoherenceResponseType:CPUPrbResp;  // TCC, L3  respond in same way to probes
       out_msg.Sender := machineID;
       out_msg.Destination.add(mapAddressToMachine(address, MachineType:Directory));
       out_msg.Dirty := false;
       out_msg.Hit := false;
       out_msg.Ntsl := true;
       out_msg.State := CoherenceState:NA;
       out_msg.MessageSize := MessageSizeType:Response_Control;
     }
   }
   action(ut_updateTag, "ut", desc="update Tag (i.e. set MRU)") {
     L2cache.setMRU(address);
   }

   action(p_popRequestQueue, "p", desc="pop request queue") {
     coreRequestNetwork_in.dequeue(clockEdge());
   }

   action(pr_popResponseQueue, "pr", desc="pop response queue") {
     responseFromNB_in.dequeue(clockEdge());
   }

   action(pp_popProbeQueue, "pp", desc="pop probe queue") {
     probeNetwork_in.dequeue(clockEdge());
   }

   action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") {
     stall_and_wait(coreRequestNetwork_in, address);
   }

   action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") {
     wakeUpAllBuffers(address);
   }

   action(z_stall, "z", desc="stall") {
       // built-in
   }


   action(ina_incrementNumAtomics, "ina", desc="inc num atomics") {
     tbe.numAtomics := tbe.numAtomics + 1;
   }


   action(dna_decrementNumAtomics, "dna", desc="inc num atomics") {
     tbe.numAtomics := tbe.numAtomics - 1;
     if (tbe.numAtomics==0) {
       enqueue(triggerQueue_out, TriggerMsg, 1) {
         tbe.atomicDoneCnt := tbe.atomicDoneCnt + 1;
         out_msg.addr := address;
         out_msg.Type := TriggerType:AtomicDone;
         peek(responseFromNB_in, ResponseMsg) {
           out_msg.isGLCSet := tbe.isGLCSet;
           out_msg.isSLCSet := tbe.isSLCSet;
         }
       }
     }
   }

   action(dadc_decrementAtomicDoneCnt, "dadc", desc="decrement atomics done cnt flag") {
     tbe.atomicDoneCnt := tbe.atomicDoneCnt - 1;
   }

   action(ptr_popTriggerQueue, "ptr", desc="pop Trigger") {
     triggerQueue_in.dequeue(clockEdge());
   }

   // END ACTIONS

   // BEGIN TRANSITIONS
   // transitions from base
   // Assumptions for ArrayRead/Write
   // TBE checked before tags
   // Data Read/Write requires Tag Read

   // Stalling transitions do NOT check the tag array...and if they do,
   // they can cause a resource stall deadlock!

   transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
       // by putting the stalled requests in a buffer, we reduce resource contention
       // since they won't try again every cycle and will instead only try again once
       // woken up
       st_stallAndWaitRequest;
   }
   transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} {
       // by putting the stalled requests in a buffer, we reduce resource contention
       // since they won't try again every cycle and will instead only try again once
       // woken up
       st_stallAndWaitRequest;
   }
   transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} {
       // by putting the stalled requests in a buffer, we reduce resource contention
       // since they won't try again every cycle and will instead only try again once
       // woken up
       st_stallAndWaitRequest;
   }
   transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} {
     p_profileHit;
     sd_sendData;
     ut_updateTag;
     p_popRequestQueue;
   }
   transition(W, RdBlk, WI) {TagArrayRead, DataArrayRead} {
     p_profileHit;
     t_allocateTBE;
     wb_writeBack;
     p_popRequestQueue;
   }

   transition(I, RdBlk, IV) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     rd_requestData;
     p_popRequestQueue;
   }

   transition(IV, RdBlk) {
     p_profileMiss;
     t_allocateTBE;
     rd_requestData;
     p_popRequestQueue;
   }

   transition(I, RdBypassEvict) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     rd_requestData;
     p_popRequestQueue;
   }

 // Transition to be called when a read request with SLC flag set arrives at
 // entry in state W. It evicts and invalidates the cache entry before
 // forwarding the request to global memory
   transition(W, RdBypassEvict, I) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
     i_invL2;
     rd_requestData;
     p_popRequestQueue;
   }

 // Transition to be called when a read request with SLC flag set arrives at
 // entry in state M. It evicts and invalidates the cache entry before
 // forwarding the request to global memory to main memory
   transition(M, RdBypassEvict, I) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     wb_writeBack;
     i_invL2;
     rd_requestData;
     p_popRequestQueue;
   }

 // Transition to be called when a read request with SLC flag set arrives at
 // entry in state V. It invalidates the cache entry before forwarding the
 // request to global memory.
   transition(V, RdBypassEvict, I) {TagArrayRead} {
     p_profileMiss;
     t_allocateTBE;
     i_invL2;
     rd_requestData;
     p_popRequestQueue;
   }

 // Transition to be called when a read request with SLC flag arrives at entry
 // in transient state. The request stalls until the pending transition is complete.
   transition({WI, IV}, RdBypassEvict)  {
     st_stallAndWaitRequest;
   }

   transition(V, Atomic, A) {TagArrayRead} {
     p_profileHit;
     i_invL2;
     t_allocateTBE;
     at_atomicThrough;
     ina_incrementNumAtomics;
     p_popRequestQueue;
   }

 transition(I, Atomic, A) {TagArrayRead} {
     p_profileMiss;
     i_invL2;
     t_allocateTBE;
     at_atomicThrough;
     ina_incrementNumAtomics;
     p_popRequestQueue;
   }

   transition(A, Atomic) {
     p_profileMiss;
     // by putting the stalled requests in a buffer, we reduce resource contention
     // since they won't try again every cycle and will instead only try again once
     // woken up
     st_stallAndWaitRequest;
   }

   transition({M, W}, Atomic, WI) {TagArrayRead} {
     p_profileHit;
     t_allocateTBE;
     wb_writeBack;
     p_popRequestQueue;
   }

   transition(I, WrVicBlk) {TagArrayRead} {
     p_profileMiss;
     wt_writeThrough;
     p_popRequestQueue;
   }

   transition(V, WrVicBlk) {TagArrayRead, DataArrayWrite} {
     p_profileHit;
     ut_updateTag;
     wdb_writeDirtyBytes;
     wt_writeThrough;
     p_popRequestQueue;
   }

   transition({V, M}, WrVicBlkBack, M) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileHit;
     ut_updateTag;
     swb_sendWBAck;
     wdb_writeDirtyBytes;
     p_popRequestQueue;
   }

   transition(W, WrVicBlkBack) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileHit;
     ut_updateTag;
     swb_sendWBAck;
     wdb_writeDirtyBytes;
     p_popRequestQueue;
   }

   transition(I, WrVicBlkBack, W) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileMiss;
     a_allocateBlock;
     ut_updateTag;
     swb_sendWBAck;
     wdb_writeDirtyBytes;
     p_popRequestQueue;
   }

 // Transition to be called when a write request with SLC bit set arrives at an
 // entry with state V. The entry has to be evicted and invalidated before the
 // request is forwarded to global memory
   transition(V, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileMiss;
     ut_updateTag;
     t_allocateTBE;
     wt_writeThrough;
     i_invL2;
     p_popRequestQueue;
   }

 // Transition to be called when a write request with SLC bit set arrives at an
 // entry with state W. The entry has to be evicted and invalidated before the
 // request is forwarded to global memory.
   transition(W, WrVicBlkEvict, I) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     p_profileMiss;
     ut_updateTag;
     wdb_writeDirtyBytes;
     t_allocateTBE;
     wb_writeBack;
     i_invL2;
     p_popRequestQueue;
   }

   transition({W, M}, L2_Repl, WI) {TagArrayRead, DataArrayRead} {
     t_allocateTBE;
     wb_writeBack;
     i_invL2;
   }

   transition({I, V}, L2_Repl, I) {TagArrayRead, TagArrayWrite} {
     i_invL2;
   }

   transition({A, IV, WI}, L2_Repl) {
     i_invL2;
   }

   transition({I, V}, PrbInv, I) {TagArrayRead, TagArrayWrite} {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }

   transition(M, PrbInv, W) {TagArrayRead, TagArrayWrite} {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }

   transition(W, PrbInv) {TagArrayRead} {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }

   transition({A, IV, WI}, PrbInv) {
     pi_sendProbeResponseInv;
     pp_popProbeQueue;
   }

 // Transition to be called when the response for a request with SLC bit set
 // arrives. The request has to be forwarded to the core that needs it while
 // making sure no entry is allocated.
   transition(I, Bypass, I) {
     rb_bypassDone;
     pr_popResponseQueue;
     wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
   }

   transition(IV, Data, V) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     a_allocateBlock;
     ut_updateTag;
     wcb_writeCacheBlock;
     sdr_sendDataResponse;
     pr_popResponseQueue;
     wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
   }

   transition(A, Data) {TagArrayRead, TagArrayWrite, DataArrayWrite} {
     a_allocateBlock;
     ar_sendAtomicResponse;
     dna_decrementNumAtomics;
     pr_popResponseQueue;
   }

   transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} {
     dt_deallocateTBE;
     wada_wakeUpAllDependentsAddr;
     ptr_popTriggerQueue;
   }

   transition(A, AtomicNotDone) {TagArrayRead} {
     dadc_decrementAtomicDoneCnt;
     ptr_popTriggerQueue;
   }

   //M,W should not see WBAck as the cache is in WB mode
   //WBAcks do not need to check tags
   transition({I, V, IV, A}, WBAck) {
     w_sendResponseWBAck;
     pr_popResponseQueue;
   }

   transition(WI, WBAck,I) {
     dt_deallocateTBE;
     wada_wakeUpAllDependentsAddr;
     pr_popResponseQueue;
   }
 }