blob: 7ffc12a41c73f801c19beb216a5b2db10335cb9e [file] [log] [blame]
// See LICENSE for license details.
//**************************************************************************
// Quicksort benchmark
//--------------------------------------------------------------------------
//
// This benchmark uses quicksort to sort an array of integers. The
// implementation is largely adapted from Numerical Recipes for C. The
// input data (and reference data) should be generated using the
// qsort_gendata.pl perl script and dumped to a file named
// dataset1.h
#include "util.h"
#include <string.h>
#include <limits.h>
//--------------------------------------------------------------------------
// Input/Reference Data
#define type unsigned int
#include "dataset1.h"
#define LOG_BASE 8
#define BASE (1 << LOG_BASE)
#if 0
# define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc)
#else
# define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc))
#endif
void sort(size_t n, type* arrIn, type* scratchIn)
{
size_t log_exp = 0;
size_t buckets[BASE];
size_t *bucket = buckets;
asm("":"+r"(bucket));
type *arr = arrIn, *scratch = scratchIn, *p;
size_t *b;
while (log_exp < CHAR_BIT * sizeof(type))
{
for (b = bucket; b < bucket + BASE; b++)
*b = 0;
for (p = arr; p < &arr[n-3]; p += 4)
{
type a0 = p[0];
type a1 = p[1];
type a2 = p[2];
type a3 = p[3];
fetch_add(&bucket[(a0 >> log_exp) % BASE], 1);
fetch_add(&bucket[(a1 >> log_exp) % BASE], 1);
fetch_add(&bucket[(a2 >> log_exp) % BASE], 1);
fetch_add(&bucket[(a3 >> log_exp) % BASE], 1);
}
for ( ; p < &arr[n]; p++)
bucket[(*p >> log_exp) % BASE]++;
size_t prev = bucket[0];
prev += fetch_add(&bucket[1], prev);
for (b = &bucket[2]; b < bucket + BASE; b += 2)
{
prev += fetch_add(&b[0], prev);
prev += fetch_add(&b[1], prev);
}
static_assert(BASE % 2 == 0);
for (p = &arr[n-1]; p >= &arr[3]; p -= 4)
{
type a0 = p[-0];
type a1 = p[-1];
type a2 = p[-2];
type a3 = p[-3];
size_t* pb0 = &bucket[(a0 >> log_exp) % BASE];
size_t* pb1 = &bucket[(a1 >> log_exp) % BASE];
size_t* pb2 = &bucket[(a2 >> log_exp) % BASE];
size_t* pb3 = &bucket[(a3 >> log_exp) % BASE];
type* s0 = scratch + fetch_add(pb0, -1);
type* s1 = scratch + fetch_add(pb1, -1);
type* s2 = scratch + fetch_add(pb2, -1);
type* s3 = scratch + fetch_add(pb3, -1);
s0[-1] = a0;
s1[-1] = a1;
s2[-1] = a2;
s3[-1] = a3;
}
for ( ; p >= &arr[0]; p--)
scratch[--bucket[(*p >> log_exp) % BASE]] = *p;
type* tmp = arr;
arr = scratch;
scratch = tmp;
log_exp += LOG_BASE;
}
if (arr != arrIn)
memcpy(arr, scratch, n*sizeof(type));
}
//--------------------------------------------------------------------------
// Main
int main( int argc, char* argv[] )
{
static type scratch[DATA_SIZE];
#if PREALLOCATE
// If needed we preallocate everything in the caches
sort(DATA_SIZE, verify_data, scratch);
if (verify(DATA_SIZE, input_data, input_data))
return 1;
#endif
// Do the sort
setStats(1);
sort(DATA_SIZE, input_data, scratch);
setStats(0);
// Check the results
return verify( DATA_SIZE, input_data, verify_data );
}