
    #include <math.h>
    #include <stdio.h>
    #include <sys/time.h>

    #include <hmpi.h>

    #include "bounds.h"
    #include "cholesky_i.c"
    #include "cholesky_data.c"
    #include "counter.h"

    int main(int argc, char **argv)
    {
        int p, size, myNp, i, j, k, x, y, rc, *np, *mlimits, me, *cumnp;
        int *bounds;
	int myi = 0;
        double *speeds;
        double *A, l11, *l21;
        double barrier_time;
        MPI_Comm choleskycomm;
        struct timeval start, end;
        int color = 0;
        int steps = NSTEPS;

        gettimeofday(&start, NULL); 

        {
           rc = MPI_Init(&argc, &argv);

           if (rc != MPI_SUCCESS)
           {
              printf(
                 "MAIN:Problems initializing MPI "
                 "...Exiting...\n"
              );

              MPI_Abort(MPI_COMM_WORLD, -1);
           }
        }

        {
           rc = MPI_Comm_rank(MPI_COMM_WORLD, &me);

           if (rc != MPI_SUCCESS)
           {
              printf(
                 "MAIN:Problems getting rank in MPI_COMM_WORLD "
                 "...Exiting...\n"
              );

              MPI_Abort(MPI_COMM_WORLD, -2);
           }
        }

        {
           rc = MPI_Comm_size(MPI_COMM_WORLD, &size);

           if (rc != MPI_SUCCESS)
           {
              printf(
                 "MAIN:Problems getting size of MPI_COMM_WORLD"
                 "...Exiting...\n"
              );

              MPI_Abort(MPI_COMM_WORLD, -3);
           }
        }

        if (me == (size - 1))
        {
           color = MPI_UNDEFINED;
        }

        rc = MPI_Comm_split(
                MPI_COMM_WORLD,
                color,
                me,
                &choleskycomm
        );

        if (rc != MPI_SUCCESS)
        {
           printf("Problems with MPI_Comm_split for MPI_COMM_WORLD\n");

           MPI_Abort(MPI_COMM_WORLD, -4);
        }

        if (me == (size - 1))
        {
           MPI_Finalize();

           return 0;
        }

        p = size - 1;

        np = (int*)malloc(
                   sizeof(int)
                   *
                   p
        );

        if (np == NULL)
        {
           printf("Cannot allocate np\n");
           MPI_Abort(MPI_COMM_WORLD, -5);
        }

        bounds = (int*)malloc(
                       sizeof(double)
                       *
                       p
        );

        if (bounds == NULL)
        {
           printf("No memory to allocate bounds\n");
           return -1;
        }

        /*
         * Fill in the bounds here
         */
        bounds[0] = bcsserver;
        bounds[1] = bcssparc01;
        bounds[2] = bzaphod;
        bounds[3] = bpg1cluster01;
        bounds[4] = bpg1cluster02;
        bounds[5] = bpg1cluster03;
        bounds[6] = bpg1cluster04;
        bounds[7] = bcsultra05;
        bounds[8] = bcsultra06;
        bounds[9] = bcsultra07;
        bounds[10] = bcsultra08;

	if (me == 0)
        {
           speeds = (double*)malloc(
                             sizeof(double)
                             *
                             p
                             *
                             steps
           );

           if (speeds == NULL)
           {
              printf("No memory to allocate speeds\n");
              MPI_Abort(MPI_COMM_WORLD, -6);
           }

           mlimits = (int*)malloc(
                           sizeof(int)
                           *
                           p
                           *
                           steps
           );

           if (mlimits == NULL)
           {
              printf("No memory to allocate mlimits\n");
              MPI_Abort(MPI_COMM_WORLD, -7);
           }

           Fill_input_parameters(
               p,
               steps,
               speeds,
               mlimits
           );

           if (VERBOSE > 0)
           {
              Display_input_parameters(
                     p,
                     steps,
                     speeds,
                     mlimits
              );
           }

           rc = HMPI_Partition_set(
                    p,
                    steps,
                    speeds,
                    mlimits,
                    bounds,
                    N*N,
                    NULL,
                    0,
                    0,
                    -1,
                    NULL,
                    NULL,
                    np                
           );

           if (rc != HMPI_OK)
           {
              printf("Problems partitioning\n");
              MPI_Abort(MPI_COMM_WORLD, -8);
           }
        }

	/*
	 * Broadcast the partition parameter np to
	 * all the processors
	 */
        if (me == 0)
        {
           int temp[p];

           for (i = 0; i < p; i++)
           {
               temp[i] = np[i];   
           }

	   rc = MPI_Bcast(
                   &temp,
		   p,
		   MPI_INT,
		   me,
                   choleskycomm
           );
        }
        else
        {
	   rc = MPI_Bcast(
                   np,
		   p,
		   MPI_INT,
		   0,
                   choleskycomm
           );
        }

        if (rc != MPI_SUCCESS)
        {
           printf("Problems broadcasting partition parameter\n");
           MPI_Abort(MPI_COMM_WORLD, -9);
        }

        rc = GetStripes(
                p,
                N,
                bounds,
                np 
        );

        if (rc != HMPI_OK)
        {
           printf("Problems getting striped partitioning\n");
           MPI_Abort(MPI_COMM_WORLD, -10);
        }

	if (me == 0)
        {
           free(speeds);
           free(mlimits);
        }

        free(bounds);

        if (VERBOSE > 0)
        {
           if (me == 0)
           {
              printf("Allocations are: \n");

              for (i = 0; i < p; i++)
              {
                  printf("%d ", np[i]);
              }

              printf("\n");
           }
        }

        myNp = np[me];

        A = (double*)malloc(
                     sizeof(double)
                     *
                     (N*myNp)
        );

        if (A == NULL)
        {
           printf("Cannot allocate A\n");
           MPI_Abort(MPI_COMM_WORLD, -11);
        }

        InitializeMatrices(
           N*myNp,
           A
        );

	cumnp = (int*)malloc(
	              sizeof(int)
	              *
	              (p+1)
        );

	if (cumnp == NULL)
        {
           printf("Can't allocate cumnp during the execution of the algorithm\n");
           MPI_Abort(MPI_COMM_WORLD, -12);      
        }

	for (i = 0; i <= p; i++)
        {
            cumnp[i] = 0;
	    for (j = 0; j < i; j++)
            {
                cumnp[i] += np[j];
            }
        }

	/*
	 * Start of the execution of the algorithm
	 */
        for (i = 0; i < N; i++)
        {
            int PivotProcessor;

            rc = GetPivotProcessor(
                    i,
		    p,
		    cumnp,
		    &PivotProcessor
            );

            if (rc != HMPI_OK)
            {
               printf("Invalid pivot\n");
               MPI_Abort(MPI_COMM_WORLD, -13);      
            }

            if ((i+1) == N)
            {
               break;
            }

	    if (PivotProcessor == me)
            {
               double temp;

               l11 = sqrt(A[myi*N + i]);
	       temp =l11;
               myi++;

	       rc = MPI_Bcast(
                       &temp,
		       1,
                       MPI_DOUBLE,
		       PivotProcessor,
		       choleskycomm
               );
            }
            else
            {
               myi = 0;
	       rc = MPI_Bcast(
                       &l11,
		       1,
                       MPI_DOUBLE,
		       PivotProcessor,
		       choleskycomm
               );
            }

	    if (rc != MPI_SUCCESS)
            {
               printf("Problems broadcasting l11\n");
               MPI_Abort(MPI_COMM_WORLD, -15);
            }

            if (VERBOSE > 0)
            {
               if (me == 0)
               {
                  printf("Step%d, Pivot Processor=%d, l11=%0.6f\n", i, PivotProcessor, l11);
               }
            }

	    if (PivotProcessor == me)
            {
               for (j = myi; j < myNp; j++)
               {
                   A[j*N + i] = (double)A[j*N + i]
			        /
			        (double)l11;
               }
            }
            else
            {
               if (me >= PivotProcessor)
               {
                  for (j = 0; j < myNp; j++)
                  {
                      A[j*N + i] = (double)A[j*N + i]
			           /
			           (double)l11;
                  }
               }
            }

            if (VERBOSE > 0)
            {
	       if (PivotProcessor == me)
               {
                  printf("The contents of A21 are:\n");
                  for (j = myi; j < myNp; j++)
                  {
                      printf("%0.6f ", A[j*N + i]);
                  }
                  printf("\n");
               }
            }

	    l21 = (double*)malloc(
	                   sizeof(double)
		           *
			   (N - i - 1)
            );

	    if (l21 == NULL)
            {
               printf("Cannot allocate l21\n");
               MPI_Abort(MPI_COMM_WORLD, -16);
            }

	    if (PivotProcessor == me)
            {
               int *counts, *displs, ind = 0;
               double *l21buf = (double*)malloc(
			                 sizeof(double)
					 *
					 (myNp - myi)
               );

	       if (l21buf == NULL)
               {
                  printf("Cannot allocate l21buf\n");
                  MPI_Abort(MPI_COMM_WORLD, -16);
               }

               for (j = myi; j < myNp; j++)
               {
                   l21buf[ind++] = A[j*N + i]; 
               }

	       counts = (int*)malloc(
                              sizeof(int)
			      *
			      p
               );

	       if (counts == NULL)
               {
                  printf("Cannot allocate counts\n");
                  MPI_Abort(MPI_COMM_WORLD, -17);
               }

	       displs = (int*)malloc(
                              sizeof(int)
			      *
			      p
               );

	       if (displs == NULL)
               {
                  printf("Cannot allocate displs\n");
                  MPI_Abort(MPI_COMM_WORLD, -18);
               }

	       for (j = 0; j < me; j++)
               {
                   counts[j] = 0;
               }

	       counts[me] = (myNp - myi);
	       for (j = me+1; j < p; j++)
               {
                   counts[j] = np[j];
               }

	       for (j = 0; j < me; j++)
               {
                   displs[j] = 0;
               }

	       displs[me] = 0;

               if ((me+1) != p)
               {
	          displs[me+1] = (myNp - myi);
               }

	       for (j = me+2; j < p; j++)
               {
                   displs[j] = displs[j - 1] + np[j - 1];
               }

	       rc = MPI_Allgatherv(
                       l21buf,
		       (myNp - myi),
		       MPI_DOUBLE,
		       l21,
                       counts,
		       displs,
		       MPI_DOUBLE,
		       choleskycomm
               );

	       free(l21buf);
	       free(counts);
	       free(displs);
            }
            else
            {
               int *counts, *displs;
	       double *l21buf = NULL;

	       if (me >= PivotProcessor)
               {
                  l21buf = (double*)malloc(
		                    sizeof(double)
				    *
				    myNp
                  );

	          if (l21buf == NULL)
                  {
                     printf("Cannot allocate l21buf\n");
                     MPI_Abort(MPI_COMM_WORLD, -16);
                  }

                  for (j = 0; j < myNp; j++)
                  {
                      l21buf[j] = A[j*N + i];
                  }
               }

	       counts = (int*)malloc(
                              sizeof(int)
			      *
			      p
               );

	       if (counts == NULL)
               {
                  printf("Cannot allocate counts\n");
                  MPI_Abort(MPI_COMM_WORLD, -17);
               }

	       displs = (int*)malloc(
                              sizeof(int)
			      *
			      p
               );

	       if (displs == NULL)
               {
                  printf("Cannot allocate displs\n");
                  MPI_Abort(MPI_COMM_WORLD, -18);
               }

	       for (j = 0; j < PivotProcessor; j++)
               {
                   counts[j] = 0;
               }

	       counts[PivotProcessor] = np[0];
               for (j = 1; j <= PivotProcessor; j++)
               {
                   counts[PivotProcessor] += np[j];
               }

               counts[PivotProcessor] -= (i+1);

	       for (j = PivotProcessor+1; j < p; j++)
               {
                   counts[j] = np[j];
               }

	       for (j = 0; j < PivotProcessor; j++)
               {
                   displs[0] = 0;
               }

	       displs[PivotProcessor] = 0;

               if ((PivotProcessor+1) != p)
               {
	          displs[PivotProcessor + 1] = counts[PivotProcessor];
               }

	       for (j = PivotProcessor+2; j < p; j++)
               {
                   displs[j] = displs[j - 1] + np[j - 1];
               }

	       if (me >= PivotProcessor)
               {
	          rc = MPI_Allgatherv(
                          l21buf,
		          myNp,
		          MPI_DOUBLE,
	   	          l21,
                          counts,
		          displs,
		          MPI_DOUBLE,
		          choleskycomm
                  );
               }
	       else
               {
	          rc = MPI_Allgatherv(
                          l21buf,
		          0,
		          MPI_DOUBLE,
	   	          l21,
                          counts,
		          displs,
		          MPI_DOUBLE,
		          choleskycomm
                  );
	       }

	       if (l21buf != NULL)
               {
	          free(l21buf);
               }

	       free(counts);
	       free(displs);
            }

            if (VERBOSE > 0)
            {
               if (me == 0)
               {
                  printf("The contents of l21 are:\n");
                  for (j = 0; j < (N - i - 1); j++)
                  {
                      printf("%0.6f ", l21[j]);
                  }
                  printf("\n");
               }
            }

	    if (PivotProcessor == me)
            {
	       for (j = myi; j < myNp; j++)
               {
	           for (k = i+1; k < N; k++)
                   {
                       A[j*N + k] = l21[j - myi]*l21[k - i - 1]; 
                   }
               }
            }
            else
            {
               if (me >= PivotProcessor)
               {
                  int jj = np[0];
                  for (j = 1; j <= PivotProcessor; j++)
                  {
                      jj += np[j];
                  }

                  jj -= (i+1);

                  for (j = PivotProcessor + 1; j < me; j++)
                  {
                      jj += np[j];
                  }

	          for (j = 0; j < myNp; j++)
                  {
	              for (k = i+1; k < N; k++)
                      {
                          A[j*N + k] = l21[jj + j]*l21[k - i - 1]; 
                      }
                  }
               }
            }

	    free(l21);
        }

        /*
         * Print the contents of the matrices
         */
        if (VERBOSE > 0)
        {
           for (x = 0; x < myNp; x++)
           {
               for (y = 0; y < N; y++)
               {
                   printf("me = %d, A[%d][%d]=%0.4f\n", me, x, y, A[x*N+y]);
               }
           }
        }

        free(np);        
        free(A);
	free(cumnp);

        {
           double i_barrier_time, f_barrier_time;

           i_barrier_time = MPI_Wtime();

           rc = MPI_Barrier(choleskycomm);

           if (rc != MPI_SUCCESS)
           {
              return rc;
           }

           f_barrier_time = MPI_Wtime();

           barrier_time = (
                           f_barrier_time
                           -
                           i_barrier_time
           );
        }

        {
           rc = MPI_Comm_free(&choleskycomm);

           if (rc != MPI_SUCCESS)
           {
              printf(
                 "MAIN:Problems freeing MXM comm "
                 "...Exiting...\n"
              );

              MPI_Abort(MPI_COMM_WORLD, -13);
           }
        }

        gettimeofday(&end, NULL); 

        /*
         * Print Execution time
         */
	if (me == 0)
        {
           double tstart = start.tv_sec + (start.tv_usec/pow(10, 6));
           double tend = end.tv_sec + (end.tv_usec/pow(10, 6));

           printf(
             "N=%d, t(sec)=%0.9f\n",
             N,
             (tend - tstart - barrier_time)
           );
        }

        MPI_Finalize();
    }

  /*-----------------------------------------------------*/
