src/mem/cache/compressors/bdi.cc - public/gem5 - Git at Google

 /*
  * Copyright (c) 2018 Inria
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met: redistributions of source code must retain the above copyright
  * notice, this list of conditions and the following disclaimer;
  * redistributions in binary form must reproduce the above copyright
  * notice, this list of conditions and the following disclaimer in the
  * documentation and/or other materials provided with the distribution;
  * neither the name of the copyright holders nor the names of its
  * contributors may be used to endorse or promote products derived from
  * this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Authors: Daniel Carvalho
  */

 /** @file
  * Implementation of the BDI cache compressor.
  */

 #include "mem/cache/compressors/bdi.hh"

 #include <algorithm>
 #include <climits>
 #include <cstring>
 #include <type_traits>

 #include "debug/CacheComp.hh"
 #include "params/BDI.hh"

 // Number of bytes in a qword
 #define BYTES_PER_QWORD 8

 // Declare BDI encoding names
 const char* BDI::ENCODING_NAMES[] =
     {"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1",
      "Base4_2", "Base2_1", "Uncompressed"};

 BDI::BDICompData::BDICompData(const uint8_t encoding)
     : CompressionData(), _encoding(encoding)
 {
 }

 uint8_t
 BDI::BDICompData::getEncoding() const
 {
     return _encoding;
 }

 std::string
 BDI::BDICompData::getName() const
 {
     return ENCODING_NAMES[_encoding];
 }

 BDI::BDICompDataZeros::BDICompDataZeros()
     : BDICompData(ZERO)
 {
     // Calculate compressed size
     calculateCompressedSize();
 }

 uint64_t
 BDI::BDICompDataZeros::access(const int index) const
 {
     return 0;
 }

 void
 BDI::BDICompDataZeros::calculateCompressedSize()
 {
     // Number of bits used by Encoding
     std::size_t size = encodingBits;

     setSizeBits(size);
 }

 BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value)
     : BDICompData(REP_VALUES)
 {
     // Set base value
     base = rep_value;

     // Calculate compressed size
     calculateCompressedSize();
 }

 uint64_t
 BDI::BDICompDataRep::access(const int index) const
 {
     return base;
 }

 void
 BDI::BDICompDataRep::calculateCompressedSize()
 {
     // Number of bits used by Encoding
     std::size_t size = encodingBits;

     // Number of bits used by Base
     size += sizeof(base)*CHAR_BIT;

     setSizeBits(size);
 }

 BDI::BDICompDataUncompressed::BDICompDataUncompressed(
     const uint64_t* data, const std::size_t blk_size)
     : BDICompData(UNCOMPRESSED), blkSize(blk_size),
       _data(data, data + blk_size/CHAR_BIT)
 {
     // Calculate compressed size
     calculateCompressedSize();
 }

 uint64_t
 BDI::BDICompDataUncompressed::access(const int index) const
 {
     return _data[index];
 }

 void
 BDI::BDICompDataUncompressed::calculateCompressedSize()
 {
     // Number of bits used by Encoding
     std::size_t size = encodingBits;

     // Number of bits used by uncompressed line
     size += blkSize*CHAR_BIT;

     setSizeBits(size);
 }

 template <class TB, class TD>
 BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding,
     const std::size_t blk_size, const std::size_t max_num_bases)
     : BDICompData(encoding), maxNumBases(max_num_bases)
 {
     // Reserve the maximum possible size for the vectors
     bases.reserve(maxNumBases);
     bitMask.reserve(blk_size/sizeof(TD));
     deltas.reserve(blk_size/sizeof(TD));

     // Push virtual base 0 to bases list
     bases.push_back(0);
 }

 template <class TB, class TD>
 void
 BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize()
 {
     // Number of bits used by Encoding
     std::size_t size = encodingBits;

     // Number of bits used by BitMask
     size += bitMask.size()*std::ceil(std::log2(maxNumBases));

     // Number of bits used by Bases. bases[0] is implicit in a hardware
     // implementation, therefore its size is 0
     size += (maxNumBases-1)*sizeof(TB)*CHAR_BIT;

     // Number of bits used by Deltas
     size += deltas.size()*sizeof(TD)*CHAR_BIT;

     CompressionData::setSizeBits(size);
 }

 template <class TB, class TD>
 bool
 BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base)
 {
     // Can't add base if reached limit of number of bases
     if (bases.size() >= maxNumBases) {
         return false;
     }

     // Push new base to end of bases list
     bases.push_back(base);

     // New delta is 0, as it is a difference between the new base and itself
     addDelta(bases.size() - 1, 0);

     return true;
 }

 template <class TB, class TD>
 void
 BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index,
     const TD delta)
 {
     // Insert new delta with respect to the given base
     bitMask.push_back(base_index);

     // Insert new delta
     deltas.push_back(delta);
 }

 template <class TB, class TD> bool
 BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data,
     const std::size_t blk_size)
 {
     // Parse through data in a sizeof(TB) granularity
     for (std::size_t byte_start = 0; byte_start < blk_size;
          byte_start += sizeof(TB))
     {
         // Get current value
         TB curValue;
         std::memcpy(&curValue, ((uint8_t*)data) + byte_start,
                     sizeof(TB));

         // Iterate through all bases to search for a valid delta
         bool foundDelta = false;
         for (int base_index = 0; base_index < bases.size(); base_index++) {
             // Calculate delta relative to currently parsed base
             typename std::make_signed<TB>::type delta = curValue -
                                                         bases[base_index];

             // Check if the delta is within the limits of the delta size. If
             // that is the case, add delta to compressed data and keep parsing
             // the input data
             typename std::make_signed<TB>::type limit =
                 ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1);
             if ((delta >= -limit) && (delta <= limit)) {
                 addDelta(base_index, delta);
                 foundDelta = true;
                 break;
             }
         }

         // If we cannot find a base that can accommodate this new line's data,
         // add this value as the new base and insert its respective delta of 0.
         // If the new base can't be added, it means that we reached the base
         // limit, so line is uncompressible using the given encoding
         if (!foundDelta && !addBase(curValue)) {
             return false;
         }
     }

     // Calculate compressed size
     calculateCompressedSize();

     return true;
 }

 template <class TB, class TD>
 uint64_t
 BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const
 {
     // We decompress all base-delta pairs that form the 64-bit entry
     // corresponding to the given 64-bit-array index
     uint64_t value = 0;

     // Get relationship between the size of an uint64_t base and size of TB
     const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB);

     // Mask for a base entry
     const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT);

     // Size, in bits, of a base entry. Cant be const because compiler will
     // optimize and create a 64 bit instance, which will generate a shift size
     // compilation error
     int base_size_bits = sizeof(TB)*CHAR_BIT;

     // Concatenate all base-delta entries until they form a 64-bit entry
     for (int delta_index = size_diff * (index + 1) - 1;
          delta_index >= (int)(size_diff * index); delta_index--) {
         // Get base and delta entries corresponding to the current delta
         assert(delta_index < deltas.size());
         const TD delta = deltas[delta_index];
         const int base_index = bitMask[delta_index];
         assert(base_index < bases.size());
         const TB base = bases[base_index];

         // Concatenate decompressed value
         value <<= base_size_bits;
         value |= static_cast<uint64_t>((base + delta) & mask);
     }

     return value;
 }

 BDI::BDI(const Params *p)
     : BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors),
       qwordsPerCacheLine(blkSize/BYTES_PER_QWORD)
 {
     static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS,
                   "Number of encodings doesn't match the number of names");
 }

 bool
 BDI::isZeroPackable(const uint64_t* data) const
 {
     return std::all_of(data, data + qwordsPerCacheLine,
                        [](const uint64_t entry){ return entry == 0; });
 }

 bool
 BDI::isSameValuePackable(const uint64_t* data) const
 {
     // We don't want to copy the whole array to the lambda expression
     const uint64_t rep_value = data[0];
     return std::all_of(data, data + qwordsPerCacheLine,
                        [rep_value](const uint64_t entry)
                            {return entry == rep_value;});
 }

 template <class TB, class TD>
 std::unique_ptr<BDI::BDICompData>
 BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const
 {
     // Instantiate compressor
     auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>(
         new BDICompDataBaseDelta<TB, TD>(encoding, blkSize));

     // Try compressing. Return nullptr if compressor can't compress given line
     if (temp_data->compress(data, blkSize)) {
         return std::move(temp_data);
     } else {
         return std::unique_ptr<BDICompData>{};
     }
 }

 void
 BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data,
                 uint64_t* data)
 {
     // Decompress and go back to host endianness
     for (std::size_t i = 0; i < qwordsPerCacheLine; i++)
         data[i] = static_cast<const BDICompData*>(comp_data)->access(i);
 }

 std::unique_ptr<BaseCacheCompressor::CompressionData>
 BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
 {
     std::unique_ptr<BDICompData> bdi_data;

     // Check if it is a zero line
     if (isZeroPackable(data)) {
         bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros());

         // Set compression latency (compare 1 qword per cycle)
         comp_lat = Cycles(qwordsPerCacheLine);
     // Check if all values in the line are the same
     } else if (isSameValuePackable(data)) {
         bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0]));

         // Set compression latency (compare 1 qword per cycle)
         comp_lat = Cycles(qwordsPerCacheLine);
     } else {
         // Initialize compressed data as an uncompressed instance
         bdi_data = std::unique_ptr<BDICompData>(
                   new BDICompDataUncompressed(data, blkSize));

         // Base size-delta size ratio. Used to optimize run and try to
         // compress less combinations when their size is worse than the
         // current best. It does not reflect the compression ratio, as
         // it does not take the metadata into account.
         int base_delta_ratio = 2;

         // Check which base-delta size combination is the best. This is
         // optimized by giving priority to trying the compressor that would
         // generate the smallest compression size. This way we waste less
         // simulation time by not doing all possible combinations
         for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) {
             for (int base_size = 8; base_size >= ratio; base_size/=2) {
                 // If using more compressors, parse all delta sizes from 1 to
                 // one size smaller than the base size, otherwise just parse
                 // highest possible delta. When we only instantiate one delta
                 // size per base size, we use less area and energy, at the
                 // cost of lower compression efficiency
                 const int delta_size = base_size/ratio;
                 if (!useMoreCompressors && delta_size != base_size/2) {
                     continue;
                 }

                 std::unique_ptr<BDICompData> temp_bdi_data;

                 // Get the compression result for the current combination
                 if ((base_size == 8)&&(delta_size == 4)) {
                     temp_bdi_data = tryCompress<uint64_t, int32_t>(data,
                                                                    BASE8_4);
                 } else if ((base_size == 8)&&(delta_size == 2)) {
                     temp_bdi_data = tryCompress<uint64_t, int16_t>(data,
                                                                    BASE8_2);
                 } else if ((base_size == 8)&&(delta_size == 1)) {
                     temp_bdi_data = tryCompress<uint64_t, int8_t>(data,
                                                                   BASE8_1);
                 } else if ((base_size == 4)&&(delta_size == 2)) {
                     temp_bdi_data = tryCompress<uint32_t, int16_t>(data,
                                                                    BASE4_2);
                 } else if ((base_size == 4)&&(delta_size == 1)) {
                     temp_bdi_data = tryCompress<uint32_t, int8_t>(data,
                                                                   BASE4_1);
                 } else if ((base_size == 2)&&(delta_size == 1)) {
                     temp_bdi_data = tryCompress<uint16_t, int8_t>(data,
                                                                   BASE2_1);
                 } else {
                     fatal("Invalid combination of base and delta sizes.");
                 }

                 // If compressor was successful, check if new compression
                 // improves the compression factor
                 if (temp_bdi_data &&
                     (bdi_data->getSizeBits() > temp_bdi_data->getSizeBits()))
                 {
                     bdi_data = std::move(temp_bdi_data);
                     base_delta_ratio = ratio;
                 }

                 // Clear temp pointer
                 temp_bdi_data.reset(nullptr);
             }
         }

         // Set compression latency. A successful compressor will stop all
         // other compressors when it knows no other will generate a better
         // compression. However, this requires either hard-coding, or a complex
         // function that can calculate the exact size of every compressor for
         // every cache line size. We decide to take a conservative
         // optimization: assume that all compressors with a given base size
         // delta size ratio (no-metadata ratio) must wait for each other.
         // In the worst case scenario the data is left uncompressed, so
         // it loses the time of the worst base size (2 bytes per cycle)
         comp_lat = Cycles(blkSize/base_delta_ratio);
     }

     // Update stats
     encodingStats[bdi_data->getEncoding()]++;

     // Pack compression results (1 extra cycle)
     comp_lat += Cycles(1);

     // Set decompression latency (latency of an adder)
     decomp_lat = Cycles(1);

     // Print debug information
     DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n",
             bdi_data->getName(), bdi_data->getSizeBits());

     return std::move(bdi_data);
 }

 void
 BDI::regStats()
 {
     BaseCacheCompressor::regStats();

     // We store the frequency of each encoding
     encodingStats
         .init(NUM_ENCODINGS)
         .name(name() + ".encoding")
         .desc("Number of data entries that were compressed to this encoding.")
         ;

     for (unsigned i = 0; i < NUM_ENCODINGS; ++i) {
         encodingStats.subname(i, ENCODING_NAMES[i]);
         encodingStats.subdesc(i, "Number of data entries that match " \
                                  "encoding " + std::string(ENCODING_NAMES[i]));
     }
 }

 BDI*
 BDIParams::create()
 {
     return new BDI(this);
 }

 #undef BYTES_PER_QWORD
	/*
	* Copyright (c) 2018 Inria
	* All rights reserved.
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met: redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer;
	* redistributions in binary form must reproduce the above copyright
	* notice, this list of conditions and the following disclaimer in the
	* documentation and/or other materials provided with the distribution;
	* neither the name of the copyright holders nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*
	* Authors: Daniel Carvalho
	*/

	/** @file
	* Implementation of the BDI cache compressor.
	*/

	#include "mem/cache/compressors/bdi.hh"

	#include <algorithm>
	#include <climits>
	#include <cstring>
	#include <type_traits>

	#include "debug/CacheComp.hh"
	#include "params/BDI.hh"

	// Number of bytes in a qword
	#define BYTES_PER_QWORD 8

	// Declare BDI encoding names
	const char* BDI::ENCODING_NAMES[] =
	{"Zero", "Repeated_Values", "Base8_1", "Base8_2", "Base8_4", "Base4_1",
	"Base4_2", "Base2_1", "Uncompressed"};

	BDI::BDICompData::BDICompData(const uint8_t encoding)
	: CompressionData(), _encoding(encoding)
	{
	}

	uint8_t
	BDI::BDICompData::getEncoding() const
	{
	return _encoding;
	}

	std::string
	BDI::BDICompData::getName() const
	{
	return ENCODING_NAMES[_encoding];
	}

	BDI::BDICompDataZeros::BDICompDataZeros()
	: BDICompData(ZERO)
	{
	// Calculate compressed size
	calculateCompressedSize();
	}

	uint64_t
	BDI::BDICompDataZeros::access(const int index) const
	{
	return 0;
	}

	void
	BDI::BDICompDataZeros::calculateCompressedSize()
	{
	// Number of bits used by Encoding
	std::size_t size = encodingBits;

	setSizeBits(size);
	}

	BDI::BDICompDataRep::BDICompDataRep(const uint64_t rep_value)
	: BDICompData(REP_VALUES)
	{
	// Set base value
	base = rep_value;

	// Calculate compressed size
	calculateCompressedSize();
	}

	uint64_t
	BDI::BDICompDataRep::access(const int index) const
	{
	return base;
	}

	void
	BDI::BDICompDataRep::calculateCompressedSize()
	{
	// Number of bits used by Encoding
	std::size_t size = encodingBits;

	// Number of bits used by Base
	size += sizeof(base)*CHAR_BIT;

	setSizeBits(size);
	}

	BDI::BDICompDataUncompressed::BDICompDataUncompressed(
	const uint64_t* data, const std::size_t blk_size)
	: BDICompData(UNCOMPRESSED), blkSize(blk_size),
	_data(data, data + blk_size/CHAR_BIT)
	{
	// Calculate compressed size
	calculateCompressedSize();
	}

	uint64_t
	BDI::BDICompDataUncompressed::access(const int index) const
	{
	return _data[index];
	}

	void
	BDI::BDICompDataUncompressed::calculateCompressedSize()
	{
	// Number of bits used by Encoding
	std::size_t size = encodingBits;

	// Number of bits used by uncompressed line
	size += blkSize*CHAR_BIT;

	setSizeBits(size);
	}

	template <class TB, class TD>
	BDI::BDICompDataBaseDelta<TB, TD>::BDICompDataBaseDelta(const uint8_t encoding,
	const std::size_t blk_size, const std::size_t max_num_bases)
	: BDICompData(encoding), maxNumBases(max_num_bases)
	{
	// Reserve the maximum possible size for the vectors
	bases.reserve(maxNumBases);
	bitMask.reserve(blk_size/sizeof(TD));
	deltas.reserve(blk_size/sizeof(TD));

	// Push virtual base 0 to bases list
	bases.push_back(0);
	}

	template <class TB, class TD>
	void
	BDI::BDICompDataBaseDelta<TB, TD>::calculateCompressedSize()
	{
	// Number of bits used by Encoding
	std::size_t size = encodingBits;

	// Number of bits used by BitMask
	size += bitMask.size()*std::ceil(std::log2(maxNumBases));

	// Number of bits used by Bases. bases[0] is implicit in a hardware
	// implementation, therefore its size is 0
	size += (maxNumBases-1)sizeof(TB)CHAR_BIT;

	// Number of bits used by Deltas
	size += deltas.size()sizeof(TD)CHAR_BIT;

	CompressionData::setSizeBits(size);
	}

	template <class TB, class TD>
	bool
	BDI::BDICompDataBaseDelta<TB, TD>::addBase(const TB base)
	{
	// Can't add base if reached limit of number of bases
	if (bases.size() >= maxNumBases) {
	return false;
	}

	// Push new base to end of bases list
	bases.push_back(base);

	// New delta is 0, as it is a difference between the new base and itself
	addDelta(bases.size() - 1, 0);

	return true;
	}

	template <class TB, class TD>
	void
	BDI::BDICompDataBaseDelta<TB, TD>::addDelta(const std::size_t base_index,
	const TD delta)
	{
	// Insert new delta with respect to the given base
	bitMask.push_back(base_index);

	// Insert new delta
	deltas.push_back(delta);
	}

	template <class TB, class TD> bool
	BDI::BDICompDataBaseDelta<TB, TD>::compress(const uint64_t* data,
	const std::size_t blk_size)
	{
	// Parse through data in a sizeof(TB) granularity
	for (std::size_t byte_start = 0; byte_start < blk_size;
	byte_start += sizeof(TB))
	{
	// Get current value
	TB curValue;
	std::memcpy(&curValue, ((uint8_t*)data) + byte_start,
	sizeof(TB));

	// Iterate through all bases to search for a valid delta
	bool foundDelta = false;
	for (int base_index = 0; base_index < bases.size(); base_index++) {
	// Calculate delta relative to currently parsed base
	typename std::make_signed<TB>::type delta = curValue -
	bases[base_index];

	// Check if the delta is within the limits of the delta size. If
	// that is the case, add delta to compressed data and keep parsing
	// the input data
	typename std::make_signed<TB>::type limit =
	ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TD))*CHAR_BIT+1);
	if ((delta >= -limit) && (delta <= limit)) {
	addDelta(base_index, delta);
	foundDelta = true;
	break;
	}
	}

	// If we cannot find a base that can accommodate this new line's data,
	// add this value as the new base and insert its respective delta of 0.
	// If the new base can't be added, it means that we reached the base
	// limit, so line is uncompressible using the given encoding
	if (!foundDelta && !addBase(curValue)) {
	return false;
	}
	}

	// Calculate compressed size
	calculateCompressedSize();

	return true;
	}

	template <class TB, class TD>
	uint64_t
	BDI::BDICompDataBaseDelta<TB, TD>::access(const int index) const
	{
	// We decompress all base-delta pairs that form the 64-bit entry
	// corresponding to the given 64-bit-array index
	uint64_t value = 0;

	// Get relationship between the size of an uint64_t base and size of TB
	const std::size_t size_diff = sizeof(uint64_t)/sizeof(TB);

	// Mask for a base entry
	const uint64_t mask = ULLONG_MAX>>((BYTES_PER_QWORD-sizeof(TB))*CHAR_BIT);

	// Size, in bits, of a base entry. Cant be const because compiler will
	// optimize and create a 64 bit instance, which will generate a shift size
	// compilation error
	int base_size_bits = sizeof(TB)*CHAR_BIT;

	// Concatenate all base-delta entries until they form a 64-bit entry
	for (int delta_index = size_diff * (index + 1) - 1;
	delta_index >= (int)(size_diff * index); delta_index--) {
	// Get base and delta entries corresponding to the current delta
	assert(delta_index < deltas.size());
	const TD delta = deltas[delta_index];
	const int base_index = bitMask[delta_index];
	assert(base_index < bases.size());
	const TB base = bases[base_index];

	// Concatenate decompressed value
	value <<= base_size_bits;
	value \|= static_cast<uint64_t>((base + delta) & mask);
	}

	return value;
	}

	BDI::BDI(const Params *p)
	: BaseCacheCompressor(p), useMoreCompressors(p->use_more_compressors),
	qwordsPerCacheLine(blkSize/BYTES_PER_QWORD)
	{
	static_assert(sizeof(ENCODING_NAMES)/sizeof(char*) == NUM_ENCODINGS,
	"Number of encodings doesn't match the number of names");
	}

	bool
	BDI::isZeroPackable(const uint64_t* data) const
	{
	return std::all_of(data, data + qwordsPerCacheLine,
	[](const uint64_t entry){ return entry == 0; });
	}

	bool
	BDI::isSameValuePackable(const uint64_t* data) const
	{
	// We don't want to copy the whole array to the lambda expression
	const uint64_t rep_value = data[0];
	return std::all_of(data, data + qwordsPerCacheLine,
	[rep_value](const uint64_t entry)
	{return entry == rep_value;});
	}

	template <class TB, class TD>
	std::unique_ptr<BDI::BDICompData>
	BDI::tryCompress(const uint64_t* data, const uint8_t encoding) const
	{
	// Instantiate compressor
	auto temp_data = std::unique_ptr<BDICompDataBaseDelta<TB, TD>>(
	new BDICompDataBaseDelta<TB, TD>(encoding, blkSize));

	// Try compressing. Return nullptr if compressor can't compress given line
	if (temp_data->compress(data, blkSize)) {
	return std::move(temp_data);
	} else {
	return std::unique_ptr<BDICompData>{};
	}
	}

	void
	BDI::decompress(const BaseCacheCompressor::CompressionData* comp_data,
	uint64_t* data)
	{
	// Decompress and go back to host endianness
	for (std::size_t i = 0; i < qwordsPerCacheLine; i++)
	data[i] = static_cast<const BDICompData*>(comp_data)->access(i);
	}

	std::unique_ptr<BaseCacheCompressor::CompressionData>
	BDI::compress(const uint64_t* data, Cycles& comp_lat, Cycles& decomp_lat)
	{
	std::unique_ptr<BDICompData> bdi_data;

	// Check if it is a zero line
	if (isZeroPackable(data)) {
	bdi_data = std::unique_ptr<BDICompData>(new BDICompDataZeros());

	// Set compression latency (compare 1 qword per cycle)
	comp_lat = Cycles(qwordsPerCacheLine);
	// Check if all values in the line are the same
	} else if (isSameValuePackable(data)) {
	bdi_data = std::unique_ptr<BDICompData>(new BDICompDataRep(data[0]));

	// Set compression latency (compare 1 qword per cycle)
	comp_lat = Cycles(qwordsPerCacheLine);
	} else {
	// Initialize compressed data as an uncompressed instance
	bdi_data = std::unique_ptr<BDICompData>(
	new BDICompDataUncompressed(data, blkSize));

	// Base size-delta size ratio. Used to optimize run and try to
	// compress less combinations when their size is worse than the
	// current best. It does not reflect the compression ratio, as
	// it does not take the metadata into account.
	int base_delta_ratio = 2;

	// Check which base-delta size combination is the best. This is
	// optimized by giving priority to trying the compressor that would
	// generate the smallest compression size. This way we waste less
	// simulation time by not doing all possible combinations
	for (int ratio = 8; ratio >= base_delta_ratio; ratio/=2) {
	for (int base_size = 8; base_size >= ratio; base_size/=2) {
	// If using more compressors, parse all delta sizes from 1 to
	// one size smaller than the base size, otherwise just parse
	// highest possible delta. When we only instantiate one delta
	// size per base size, we use less area and energy, at the
	// cost of lower compression efficiency
	const int delta_size = base_size/ratio;
	if (!useMoreCompressors && delta_size != base_size/2) {
	continue;
	}

	std::unique_ptr<BDICompData> temp_bdi_data;

	// Get the compression result for the current combination
	if ((base_size == 8)&&(delta_size == 4)) {
	temp_bdi_data = tryCompress<uint64_t, int32_t>(data,
	BASE8_4);
	} else if ((base_size == 8)&&(delta_size == 2)) {
	temp_bdi_data = tryCompress<uint64_t, int16_t>(data,
	BASE8_2);
	} else if ((base_size == 8)&&(delta_size == 1)) {
	temp_bdi_data = tryCompress<uint64_t, int8_t>(data,
	BASE8_1);
	} else if ((base_size == 4)&&(delta_size == 2)) {
	temp_bdi_data = tryCompress<uint32_t, int16_t>(data,
	BASE4_2);
	} else if ((base_size == 4)&&(delta_size == 1)) {
	temp_bdi_data = tryCompress<uint32_t, int8_t>(data,
	BASE4_1);
	} else if ((base_size == 2)&&(delta_size == 1)) {
	temp_bdi_data = tryCompress<uint16_t, int8_t>(data,
	BASE2_1);
	} else {
	fatal("Invalid combination of base and delta sizes.");
	}

	// If compressor was successful, check if new compression
	// improves the compression factor
	if (temp_bdi_data &&
	(bdi_data->getSizeBits() > temp_bdi_data->getSizeBits()))
	{
	bdi_data = std::move(temp_bdi_data);
	base_delta_ratio = ratio;
	}

	// Clear temp pointer
	temp_bdi_data.reset(nullptr);
	}
	}

	// Set compression latency. A successful compressor will stop all
	// other compressors when it knows no other will generate a better
	// compression. However, this requires either hard-coding, or a complex
	// function that can calculate the exact size of every compressor for
	// every cache line size. We decide to take a conservative
	// optimization: assume that all compressors with a given base size
	// delta size ratio (no-metadata ratio) must wait for each other.
	// In the worst case scenario the data is left uncompressed, so
	// it loses the time of the worst base size (2 bytes per cycle)
	comp_lat = Cycles(blkSize/base_delta_ratio);
	}

	// Update stats
	encodingStats[bdi_data->getEncoding()]++;

	// Pack compression results (1 extra cycle)
	comp_lat += Cycles(1);

	// Set decompression latency (latency of an adder)
	decomp_lat = Cycles(1);

	// Print debug information
	DPRINTF(CacheComp, "BDI: Compressed cache line to encoding %s (%d bits)\n",
	bdi_data->getName(), bdi_data->getSizeBits());

	return std::move(bdi_data);
	}

	void
	BDI::regStats()
	{
	BaseCacheCompressor::regStats();

	// We store the frequency of each encoding
	encodingStats
	.init(NUM_ENCODINGS)
	.name(name() + ".encoding")
	.desc("Number of data entries that were compressed to this encoding.")
	;

	for (unsigned i = 0; i < NUM_ENCODINGS; ++i) {
	encodingStats.subname(i, ENCODING_NAMES[i]);
	encodingStats.subdesc(i, "Number of data entries that match " \
	"encoding " + std::string(ENCODING_NAMES[i]));
	}
	}

	BDI*
	BDIParams::create()
	{
	return new BDI(this);
	}

	#undef BYTES_PER_QWORD