
    #include <math.h>
    #include <stdio.h>
    #include <sys/time.h>

    #include <hmpi.h>

    #include "counter.h"
    #include "mxm_i.c"
    #include "mxm_recon.c"

    int main(int argc, char **argv)
    {
        int p, myNp, i, x, y, rc, *d, me;
        double *speeds;
        double *A, *B, *C;
        double barrier_time;
        struct timeval start, end;
        double *temp;

        HMPI_Group gid;
        int param_count, *model_params;

        MPI_Comm *mxmcomm;

        gettimeofday(&start, NULL); 

        {
           rc = HMPI_Init(
                    &argc,
                    &argv
           );

           if (rc != HMPI_OK)
           {
              printf(
                  "Problems initializing HMPI runtime "
                  "...Exiting...\n"
              );

              HMPI_Finalize(-1);
           }
        }


        if (HMPI_Is_recon_required)
        {
           int gsize;
           double *gperf;

           rc = Do_recon();

           if (rc != HMPI_OK)
           {
              printf(
                 "MAIN:Problems reconning the network"
                 "...Exiting...\n"
              );

              HMPI_Finalize(-2);
           }

           gsize = HMPI_Group_size(HMPI_COMM_WORLD_GROUP);

           gperf = (double*)malloc(
                            sizeof(double)
                            *
                            gsize
           );

           if (gperf == NULL)
           {
              printf("Cannot allocate performances\n");
              HMPI_Finalize(-6);
           }

           rc = HMPI_Group_performances(
                    HMPI_COMM_WORLD_GROUP,
                    gperf
           );

           if (HMPI_Is_host())
           {
              printf("Performances are \n");
              for (i = 0; i < gsize; i++)
              {
                 printf("%0.6f ", gperf[i]);
              }
              printf("\n");
           }

           free(gperf);
        }

        if (HMPI_Is_host())
        {
           param_count = 3;

           model_params = (int*)malloc(
                                sizeof(int*)
                                *
                                param_count
           );
          
           if (model_params == NULL)
           {
              printf(
                 "HOST:Problems allocating model params"
                 "...Exiting...\n"
              );

              HMPI_Finalize(-3);
           }

           model_params[0] = N;
           model_params[1] = recon_r;
           model_params[2] = recon_t;
        }

        if (HMPI_Is_member(HMPI_HOST_GROUP))
        {
           HMPI_Debug(1);

           HMPI_Group_auto_create(
                &gid,
                &MPC_NetType_ParallelAxB,
                model_params,
                param_count
           );
        }

        if (HMPI_Is_free())
        {
           HMPI_Group_auto_create(
                &gid,
                &MPC_NetType_ParallelAxB,
                NULL,
                0
           );
        }

        if (HMPI_Is_free())
        {
           HMPI_Finalize(0);
        }  
        
        mxmcomm = (MPI_Comm*)HMPI_Get_comm(&gid);

        if (mxmcomm == NULL)
        {
           printf("communicator given by the Get_comm operation is NULL\n");
           HMPI_Finalize(-1);
        }

        me = HMPI_Group_rank(&gid);

        /*
         * Broadcast the number of processes to
         * all the processors
         */
        if (HMPI_Is_parent(&gid))
        {
           p = HMPI_Group_size(&gid);

           printf("Optimal number of processes = %d\n", p);

           rc = MPI_Bcast(
                   &p,
                   1,
                   MPI_INT,
                   0,
                   *mxmcomm
           );
        }
        else
        {  
           rc = MPI_Bcast(
                   &p,
                   1,
                   MPI_INT,
                   0,
                   *mxmcomm
           );
        }
        
        if (rc != MPI_SUCCESS)
        {  
           printf("Problems broadcasting partition parameter\n");
           MPI_Abort(MPI_COMM_WORLD, -9);
        }
        
        speeds = (double*)malloc(
                          sizeof(double)
                          *
                          p
        );

        if (speeds == NULL)
        {
           printf(
              "Problems allocating the speeds of processors"
                 "...Exiting...\n"
              );

              HMPI_Finalize(-3);
        }

        HMPI_Group_performances(
            &gid,
            speeds
        );

        if ((VERBOSE > 0)
            && (HMPI_Is_parent(&gid)
           )
        )
        {
           printf("Member speeds are: ");

           for (i = 0; i < p; i++)
           {
               printf("%0.2f ", speeds[i]);
           }

           printf("\n");
        }
        
        d = (int*)malloc(
                  sizeof(int)
                  *
                  p
        );

        if (d == NULL)
        {
           printf(
              "Problems allocating distribution parameter d"
              "...Exiting...\n"
           );

           HMPI_Finalize(-4);
        }

        rc = HMPI_Partition_set(
                 p,
                 1,
                 speeds,
                 NULL,
                 NULL,
                 N,
                 NULL,
                 0,
                 0,
                 -1,
                 NULL,
                 NULL,
                 d
        );

        if (rc != HMPI_OK)
        {
           printf("Problems partitioning\n");
           HMPI_Finalize(-5);
        }

        if (HMPI_Is_member(HMPI_HOST_GROUP))
        {
           free(model_params);
        }

        free(speeds);

        if ((VERBOSE > 0)
            && (HMPI_Is_parent(&gid)
           )
        )
        {
           printf("Distribution parameters are :");

           for (i = 0; i < p; i++)
           {
               printf("%d ", d[i]);
           }

           printf("\n");
        }

        myNp = d[me];

        A = (double*)malloc(
                     sizeof(double)
                     *
                     (N*myNp)
        );

        if (A == NULL)
        {
           printf("Cannot allocate A, N=%d, myNp=%d, me=%d\n", N, myNp, me);
           HMPI_Finalize(-6);
        }

        B = (double*)malloc(
                     sizeof(double)
                     *
                     (N*myNp)
        );

        if (B == NULL)
        {
           printf("Cannot allocate B, N=%d, myNp=%d, me=%d\n", N, myNp, me);
           HMPI_Finalize(-7);
        }

        C = (double*)malloc(
                     sizeof(double)
                     *
                     (N*myNp)
        );

        if (C == NULL)
        {
           printf("Cannot allocate C, N=%d, myNp=%d, me=%d \n", N, myNp, me);
           HMPI_Finalize(-8);
        }

        /*
         * Initilization can be expensive, hence ignored.
         */
        InitializeMatrices(
           N*myNp,
           A,
           B,
           C 
        );

        temp = (double*)malloc(
                        sizeof(double)
                        *
                        N
        );

        if (temp == NULL)
        {
           printf("Cannot allocate temp\n");
           HMPI_Finalize(-9);
        }

        for (i = 0; i < N; i++)
        {
            int PivotProcessor;

            PivotProcessor = GetPivotProcessor(
                                i,
                                N,
                                p,
                                d
            );
           
            if (VERBOSE > 0)
            {
               if (me == 0)
               {
                  printf("Step %d, pivot processor is %d\n", i, PivotProcessor);
               }
            }

            /*
             * Broadcast the pivot row
             */
            if (PivotProcessor == me)
            {
               int myrow;

               myrow = i;
               for (x = 0; x < me; x++)
               {
                   myrow -= d[x];
               }

               for (x = 0; x < N; x++)
               {
                   temp[x] = B[myrow*N + x];
               }

               rc = MPI_Bcast(
                       temp,
                       N,
                       MPI_DOUBLE,
                       me,
                       *mxmcomm
               );
            }
            else
            {
               rc = MPI_Bcast(
                       temp,
                       N,
                       MPI_DOUBLE,
                       PivotProcessor,
                       *mxmcomm                  
               );
            }

            if (rc != MPI_SUCCESS)
            {
               printf("Problems broadcasting pivot row\n");
               MPI_Abort(MPI_COMM_WORLD, -12);
            }

            for (x = 0; x < myNp; x++)
            {
                for (y = 0; y < N; y++)
                {
                    C[x*N+y] += A[x*N + i]
                                *
                                temp[y];
                }
            }
        }

        /*
         * Print the contents of the matrices
         */
        if (VERBOSE > 0)
        {
           for (x = 0; x < myNp; x++)
           {
               for (y = 0; y < N; y++)
               {
                   printf("C[%d][%d]=%0.4f\n", x, y, C[x*N+y]);
               }
           }
        }

        free(temp);
        free(d);  
        free(A);
        free(B);
        free(C);

        {
           double i_barrier_time, f_barrier_time;

           i_barrier_time = MPI_Wtime();

           rc = MPI_Barrier(*mxmcomm);

           if (rc != MPI_SUCCESS)
           {
              return rc;
           }

           f_barrier_time = MPI_Wtime();

           barrier_time = (
                           f_barrier_time
                           -
                           i_barrier_time
           );
        }

        if (HMPI_Is_member(&gid))
        {
           HMPI_Group_free(
              &gid
           );
        }

        gettimeofday(&end, NULL); 

	if (HMPI_Is_host())
        {
           double tstart = start.tv_sec + (start.tv_usec/pow(10, 6));
           double tend = end.tv_sec + (end.tv_usec/pow(10, 6));

           printf(
             "N=%d, t(sec)=%0.9f\n",
             N,
             (tend - tstart - barrier_time)
           );
        }

        HMPI_Finalize(0);
    }
