#include <iostream>
#include <tbb/parallel_reduce.h>
#include <tbb/blocked_range.h>
#include <tbb/task_scheduler_init.h>

#define N 1001

using namespace std;
using namespace tbb;

float func( float *x )
{
	return *x;
}

class Task {
	private:
		float *local_a;

	public:
		float sum;

		Task( float _a[] ) : local_a( _a ), sum( 0. ) {}

		Task( Task &subtask, split )
		{
			local_a = subtask.local_a;
			sum = 0.;
		}

		void operator()( const blocked_range<int> &r ) {
			printf("%08d - %08d\n",r.begin(),r.end());

			for ( int i=r.begin(); i!=r.end( ); ++i ) {
				sum += func( &local_a[i] );
			}
		}

		void join( const Task &subtask )
		{
			sum += subtask.sum;
		}
};

float par_func(int x, int y, float *a)
{
	Task task(a);

	parallel_reduce(blocked_range<int>(x,y,N/10), task);

	return task.sum;
}

int main()
{
	task_scheduler_init init(2);

	float a[N];

	for ( int i=0; i<N; i++ ) {
		a[i] = (float) i;
	}

	cout << endl << "Total: " << par_func(0,N,a) << endl;

	return 0;
}

