#include <stdio.h>
#include <stdlib.h>

#include <pthread.h>

#include "mylib_barrier.h"


#define NUM_THREADS  4
#define NUM_ITER     5
#define SIZE         2048*8
#define N_JOBS       8
#define TERMINATE    -1

int a[SIZE];

// queue of the array segments to be added up 
typedef struct {
  int from[N_JOBS];
  int to[N_JOBS];
  int njobs;
} queue;

queue q;
pthread_mutex_t mutex_q;

mylib_barrier_t bar;

int final_sum;
pthread_mutex_t mutex_sum;


// This funcion is executed by each thread. 
// A double barrier is executed at the end of each thread summation. 
// The first barrier to allow the master thread to safely reinizialize the data structures, along 
// whith the queue.
// The second barrier to allow all the threads to wait for the master inizialization phase, before starting the 
// computation.
void *partial_sum(void *threadid)
{
  int *ptr = (int *) threadid;
  int i, f, t, local_sum;
  int done;

  //int iter = 0;

  while (1) {
    pthread_mutex_lock(&mutex_q);
    if (q.njobs == TERMINATE) {
      pthread_mutex_unlock(&mutex_q);
      break;
    }
    pthread_mutex_unlock(&mutex_q);

    done = 0;
    while (!done) {
      // access the queue to self-schedule a job
      pthread_mutex_lock(&mutex_q);
      if (q.njobs == 0)
	done = 1;
      else {
	q.njobs--;
	f = q.from[q.njobs];
	t = q.to[q.njobs];
      }
      pthread_mutex_unlock(&mutex_q);

      if (!done) {
	local_sum = 0;
	for (i=f; i<t; i++)
	  local_sum += a[i];

	pthread_mutex_lock (&mutex_sum);
	final_sum += local_sum;
	pthread_mutex_unlock (&mutex_sum);

	//printf("From thread #%d (local_sum=%d from=%d to=%d)\n", *ptr, local_sum, f, t);
      }
    }

    mylib_barrier(&bar, NUM_THREADS+1);  // final barrier 
    //printf("From thread #%d (exit final barrier=%d)\n", *ptr, iter); 
    //iter++;

    mylib_barrier(&bar, NUM_THREADS+1); // init barrier 
    //printf("From thread #%d (exit init barrier=%d  a[0]=%d)\n", *ptr, iter, a[0]); 
  }
  pthread_exit(NULL);
}



// This function initialize the array a[] and the the task queue of the various threads.
// The parameter "seed" is used to set the values of a[].
// If seed==TERMINATE (-1), then prepare the queue for the termination, by setting q.njobs = TERMINATE (-1)
void initialize(int seed) {
   int i, delta;
	
   if (seed == TERMINATE) {
     q.njobs = TERMINATE;
     return;
   }

   // Initialize a[]
   for (i=0; i<SIZE; i++)
     a[i] = seed + i*2;
   
   // Initalize q[]
   delta = SIZE/N_JOBS;
   for (i=0; i<N_JOBS; i++) {
     q.from[i] = i * delta;
     q.to[i] = q.from[i] + delta;
   }
   q.njobs = N_JOBS;
}




int main (int argc, char *argv[])
{
   pthread_t threads[NUM_THREADS];
   int myid[NUM_THREADS];
   int rc;
   int t;
   int iter;

   pthread_mutex_init(&mutex_sum, NULL);
   pthread_mutex_init(&mutex_q, NULL);
	
   mylib_init_barrier(&bar);
   initialize(100);
   final_sum = 0;
	
   for(t=0; t<NUM_THREADS; t++) {
     myid[t] = t;
     printf("In main: creating thread %d\n", t);
     rc = pthread_create(&threads[t], NULL, partial_sum, (void *) &myid[t]);
     if (rc) {
       printf("ERROR; return code from pthread_create() is %d\n", rc);
       exit(0);
     }
   }

   
   for (iter=0; iter < NUM_ITER-1; iter++) {
     mylib_barrier(&bar, NUM_THREADS+1);  // final barrier
     printf("FINAL SUM: %d\n", final_sum);
     fflush(stdout);

     initialize(101+iter);
     final_sum = 0;
   
     mylib_barrier(&bar, NUM_THREADS+1); // start barrier
   }

   mylib_barrier(&bar, NUM_THREADS+1);  // final barrier
   printf("FINAL SUM: %d\n", final_sum);
   fflush(stdout);

   initialize(TERMINATE);  // termination
   
   mylib_barrier(&bar, NUM_THREADS+1); // start barrier


   for (t=0; t < NUM_THREADS; t++) {
     pthread_join(threads[t], NULL);
     printf("Joined thread %d\n", t);
   }


   pthread_mutex_destroy(&mutex_sum);
   pthread_mutex_destroy(&mutex_q);
 

   /* Last thing that main() should do */
   pthread_exit(NULL);

}

