blob: 3b687eb3eabb41163474e8004d64f171d9a7d5f6 [file] [log] [blame]
/*
Copyright 2005-2010 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#include "common.h"
#include "tbb/tick_count.h"
#include "tbb/task.h"
#include "tbb/task_scheduler_init.h"
#include <cstdlib>
#include <cstdio>
#include <cstring>
// The performance of this example can be significantly better when
// the objects are allocated by the scalable_allocator instead of the
// default "operator new". The reason is that the scalable_allocator
// typically packs small objects more tightly than the default "operator new",
// resulting in a smaller memory footprint, and thus more efficient use of
// cache and virtual memory. Also the scalable_allocator works faster for
// multi-threaded allocations.
//
// Pass -stdmalloc as the 1st command line parameter to use the default "operator new"
// and see the performance difference.
#include "tbb/scalable_allocator.h"
using namespace std;
static double Pi = 3.14159265358979;
const bool tbbmalloc = true;
const bool stdmalloc = false;
template<bool use_tbbmalloc>
class TreeMaker {
class SubTreeCreationTask: public tbb::task {
TreeNode*& my_root;
bool is_continuation;
typedef TreeMaker<use_tbbmalloc> MyTreeMaker;
public:
SubTreeCreationTask( TreeNode*& root, long number_of_nodes ) : my_root(root), is_continuation(false) {
my_root = MyTreeMaker::allocate_node();
my_root->node_count = number_of_nodes;
my_root->value = Value(Pi*number_of_nodes);
}
tbb::task* execute() {
tbb::task* next = NULL;
if( !is_continuation ) {
long subtree_size = my_root->node_count - 1;
if( subtree_size<1000 ) { /* grainsize */
my_root->left = MyTreeMaker::do_in_one_thread(subtree_size/2);
my_root->right = MyTreeMaker::do_in_one_thread(subtree_size - subtree_size/2);
} else {
// Create tasks before spawning any of them.
tbb::task* a = new( allocate_child() ) SubTreeCreationTask(my_root->left,subtree_size/2);
tbb::task* b = new( allocate_child() ) SubTreeCreationTask(my_root->right,subtree_size - subtree_size/2);
recycle_as_continuation();
is_continuation = true;
set_ref_count(2);
spawn(*b);
next = a;
}
}
return next;
}
};
public:
static TreeNode* allocate_node() {
return use_tbbmalloc? tbb::scalable_allocator<TreeNode>().allocate(1) : new TreeNode;
}
static TreeNode* do_in_one_thread( long number_of_nodes ) {
if( number_of_nodes==0 ) {
return NULL;
} else {
TreeNode* n = allocate_node();
n->node_count = number_of_nodes;
n->value = Value(Pi*number_of_nodes);
--number_of_nodes;
n->left = do_in_one_thread( number_of_nodes/2 );
n->right = do_in_one_thread( number_of_nodes - number_of_nodes/2 );
return n;
}
}
static TreeNode* do_in_parallel( long number_of_nodes ) {
TreeNode* root_node;
SubTreeCreationTask& a = *new(tbb::task::allocate_root()) SubTreeCreationTask(root_node, number_of_nodes);
tbb::task::spawn_root_and_wait(a);
return root_node;
}
static TreeNode* create_and_time( long number_of_nodes ) {
tbb::tick_count t0, t1;
TreeNode* root = allocate_node();
root->node_count = number_of_nodes;
root->value = Value(Pi*number_of_nodes);
--number_of_nodes;
t0 = tbb::tick_count::now();
root->left = do_in_one_thread( number_of_nodes/2 );
t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec\n", "half created serially", (t1-t0).seconds()*1000);
t0 = tbb::tick_count::now();
root->right = do_in_parallel( number_of_nodes - number_of_nodes/2 );
t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec\n", "half done in parallel", (t1-t0).seconds()*1000);
return root;
}
};
int main( int argc, char *argv[] ) {
// Parse command line parameters
// The format is: <exe_name> [-stdmalloc] [num_of_nodes [num_of_threads]]
bool use_tbbmalloc = true;
int arg_idx = 1;
if( argc>1 && strcmp(argv[1], "-stdmalloc")==0 ) {
use_tbbmalloc = false;
arg_idx = 2;
}
long number_of_nodes = argc>arg_idx ? strtol(argv[arg_idx],0,0) : 10000000;
++arg_idx;
int nthread = argc>arg_idx ? strtol(argv[arg_idx],0,0) : tbb::task_scheduler_init::automatic;
// Start up scheduler
// For production, no argument should be provided to the constructor, so that
// the application gets the number of threads that are physically available.
tbb::task_scheduler_init init(nthread);
TreeNode* root;
if( use_tbbmalloc ) {
printf("Tree creation using TBB scalable allocator\n");
root = TreeMaker<tbbmalloc>::create_and_time( number_of_nodes );
} else {
printf("Tree creation using standard operator new\n");
root = TreeMaker<stdmalloc>::create_and_time( number_of_nodes );
}
// Warm up caches
SerialSumTree(root);
printf("Calculations:\n");
const char* which;
for( int i=0; i<3; ++i ) {
tbb::tick_count t0 = tbb::tick_count::now();
Value result;
switch( i ) {
case 0:
which = "SerialSumTree";
result = SerialSumTree(root);
break;
case 1:
which = "SimpleParallelSumTree";
result = SimpleParallelSumTree(root);
break;
case 2:
which = "OptimizedParallelSumTree";
result = OptimizedParallelSumTree(root);
break;
}
tbb::tick_count t1 = tbb::tick_count::now();
printf ("%24s: time = %.1f msec, sum=%g\n", which, (t1-t0).seconds()*1000, result);
}
return 0;
}