| // See LICENSE for license details. |
| |
| //************************************************************************** |
| // Quicksort benchmark |
| //-------------------------------------------------------------------------- |
| // |
| // This benchmark uses quicksort to sort an array of integers. The |
| // implementation is largely adapted from Numerical Recipes for C. The |
| // input data (and reference data) should be generated using the |
| // qsort_gendata.pl perl script and dumped to a file named |
| // dataset1.h |
| |
| #include "util.h" |
| #include <string.h> |
| #include <limits.h> |
| |
| //-------------------------------------------------------------------------- |
| // Input/Reference Data |
| |
| #define type unsigned int |
| #include "dataset1.h" |
| |
| #define LOG_BASE 8 |
| #define BASE (1 << LOG_BASE) |
| |
| #if 0 |
| # define fetch_add(ptr, inc) __sync_fetch_and_add(ptr, inc) |
| #else |
| # define fetch_add(ptr, inc) ((*(ptr) += (inc)) - (inc)) |
| #endif |
| |
| void sort(size_t n, type* arrIn, type* scratchIn) |
| { |
| size_t log_exp = 0; |
| size_t buckets[BASE]; |
| size_t *bucket = buckets; |
| asm("":"+r"(bucket)); |
| type *arr = arrIn, *scratch = scratchIn, *p; |
| size_t *b; |
| |
| while (log_exp < CHAR_BIT * sizeof(type)) |
| { |
| for (b = bucket; b < bucket + BASE; b++) |
| *b = 0; |
| |
| for (p = arr; p < &arr[n-3]; p += 4) |
| { |
| type a0 = p[0]; |
| type a1 = p[1]; |
| type a2 = p[2]; |
| type a3 = p[3]; |
| fetch_add(&bucket[(a0 >> log_exp) % BASE], 1); |
| fetch_add(&bucket[(a1 >> log_exp) % BASE], 1); |
| fetch_add(&bucket[(a2 >> log_exp) % BASE], 1); |
| fetch_add(&bucket[(a3 >> log_exp) % BASE], 1); |
| } |
| for ( ; p < &arr[n]; p++) |
| bucket[(*p >> log_exp) % BASE]++; |
| |
| size_t prev = bucket[0]; |
| prev += fetch_add(&bucket[1], prev); |
| for (b = &bucket[2]; b < bucket + BASE; b += 2) |
| { |
| prev += fetch_add(&b[0], prev); |
| prev += fetch_add(&b[1], prev); |
| } |
| static_assert(BASE % 2 == 0); |
| |
| for (p = &arr[n-1]; p >= &arr[3]; p -= 4) |
| { |
| type a0 = p[-0]; |
| type a1 = p[-1]; |
| type a2 = p[-2]; |
| type a3 = p[-3]; |
| size_t* pb0 = &bucket[(a0 >> log_exp) % BASE]; |
| size_t* pb1 = &bucket[(a1 >> log_exp) % BASE]; |
| size_t* pb2 = &bucket[(a2 >> log_exp) % BASE]; |
| size_t* pb3 = &bucket[(a3 >> log_exp) % BASE]; |
| type* s0 = scratch + fetch_add(pb0, -1); |
| type* s1 = scratch + fetch_add(pb1, -1); |
| type* s2 = scratch + fetch_add(pb2, -1); |
| type* s3 = scratch + fetch_add(pb3, -1); |
| s0[-1] = a0; |
| s1[-1] = a1; |
| s2[-1] = a2; |
| s3[-1] = a3; |
| } |
| for ( ; p >= &arr[0]; p--) |
| scratch[--bucket[(*p >> log_exp) % BASE]] = *p; |
| |
| type* tmp = arr; |
| arr = scratch; |
| scratch = tmp; |
| |
| log_exp += LOG_BASE; |
| } |
| if (arr != arrIn) |
| memcpy(arr, scratch, n*sizeof(type)); |
| } |
| |
| //-------------------------------------------------------------------------- |
| // Main |
| |
| int main( int argc, char* argv[] ) |
| { |
| static type scratch[DATA_SIZE]; |
| |
| #if PREALLOCATE |
| // If needed we preallocate everything in the caches |
| sort(DATA_SIZE, verify_data, scratch); |
| if (verify(DATA_SIZE, input_data, input_data)) |
| return 1; |
| #endif |
| |
| // Do the sort |
| setStats(1); |
| sort(DATA_SIZE, input_data, scratch); |
| setStats(0); |
| |
| // Check the results |
| return verify( DATA_SIZE, input_data, verify_data ); |
| } |