
#include <math.h>
#include <stdio.h>
#include <sys/time.h>

#include <hmpi.h>

#include "f2c.h"
#include "counter.h"
#include "simpleGrid.c"
#include "pdgemm_i.c"
#include "pmatgeninc.c"
#include "pdmatgen.c"

#define  NEED_INITIALIZATION   1
#define  VERBOSE               0

#ifdef  max
#undef  max
#endif  /* max */
#define max(a, b)       ((a) < (b) ? (b) : (a))

static doublereal c_b106 = 1.;

int main( int argc, char **argv)
{
    extern /* Subroutine */descinit_(integer *, integer *, integer *, integer *,
            integer *, integer *, integer *, integer *, integer *, integer *);

    extern /* Subroutine */ int blacs_get__(integer *, integer *, integer *);
    extern /* Subroutine */ int blacs_exit__(integer *);
    extern /* Subroutine */ int blacs_pinfo__(integer *, integer *),
            pdpblasinfo_(char *, integer *, integer *, integer *, integer *,
            integer *, integer *, integer *, doublereal *, integer *, integer
            *, ftnlen);
    extern /* Subroutine */ int pdgemm_(char *, char *, integer *, integer *,
            integer *, doublereal *, doublereal *, integer *, integer *,
            integer *, doublereal *, integer *, integer *, integer *,
            doublereal *, doublereal *, integer *, integer *, integer *,
            ftnlen, ftnlen);

    extern integer numroc_(integer *, integer *, integer *, integer *,
            integer *);
    extern /* Subroutine */ int blacs_gridinit__(integer *, char *, integer *,
             integer *, ftnlen), blacs_gridexit__(integer *);

    static integer i, kp, kq, mp, nq, info, i__1;
    static integer desca[9], descb[9], descc[9], mycol, ictxt, myrow;

    static integer iaseed, ibseed, icseed;
    
    static doublereal *a, *b, *c;

    MPI_Comm mxmcomm;

    HMPI_Group gid;
    int param_count, modelp[2];

    struct timeval start, end;

    gettimeofday(&start, NULL);

    // Initialize HMPI runtime
    HMPI_Init(&argc, &argv);

    {
       int nump  = HMPI_Get_number_of_processors();
       recon_n = n/sqrt(nump);
       int rc = Do_recon();

       if (rc != HMPI_OK)
       {
          printf(
             "MAIN:Problems reconning the network"
             "...Exiting...\n"
          );

          HMPI_Finalize(-2);
       }

       if (VERBOSE > 0)
       {
          int num_of_processors = HMPI_Get_number_of_processors();
          double* gperf = (double*)malloc(
                                   sizeof(double)
                                   *
                                   num_of_processors
          );

          HMPI_Get_processors_info(
             gperf
          );

          if (HMPI_Is_host())
          {
             int i;

             printf("Number of processors is = %d\n", num_of_processors);

             printf("Processor performances are\n");

             for (i = 0; i < num_of_processors; i++)
             {
                 printf("%0.1f ", gperf[i]);
             }

             printf("\n");
          }

          free(gperf);
       }
    }

    // Create HMPI group
    if (HMPI_Is_host())
    {
       param_count = 2;
       modelp[0] = p;
       modelp[1] = q;
    }

    if (HMPI_Is_member(HMPI_HOST_GROUP))
    {
       HMPI_Group_create(
           &gid,
           &HMPI_NetType_simpleGrid,
           modelp,
           param_count
       );
    }

    if (HMPI_Is_free())
    {
       HMPI_Group_create(
           &gid,
           &HMPI_NetType_simpleGrid,
           NULL,
           0
       );
    }

    if (HMPI_Is_free())
    {
       HMPI_Finalize(0);
    }

    mxmcomm = *(MPI_Comm*)HMPI_Get_comm(&gid);

    /*
     * Translate algocomm to a BLACS handle
     */
    ictxt = Csys2blacs_handle(mxmcomm);

    /*
     * Form BLACS context based on algocomm
     */
    Cblacs_gridinit(&ictxt, "r", p, q);
    blacs_gridinfo__(&ictxt, &p, &q, &myrow, &mycol);

    mp = numroc_(&m, &nb, &myrow, &c__0, &p);
    kp = numroc_(&k, &nb, &myrow, &c__0, &p);
    kq = numroc_(&k, &nb, &mycol, &c__0, &q);
    nq = numroc_(&n, &nb, &mycol, &c__0, &q);

    // Initialize the array descriptor for the matrix A, B and C
    i__1 = max(1,mp);
    descinit_(desca, &m, &k, &nb, &nb, &c__0, &c__0, &ictxt, &i__1, &info);
    i__1 = max(1,kp);
    descinit_(descb, &k, &n, &nb, &nb, &c__0, &c__0, &ictxt, &i__1, &info);
    i__1 = max(1,mp);
    descinit_(descc, &m, &n, &nb, &nb, &c__0, &c__0, &ictxt, &i__1, &info);

    a = (double*)malloc(
                 sizeof(double)
                 *
                 (desca[8]*kq)
    );

    if (a == NULL)
    {
       printf("Cannot allocate a\n");
       HMPI_Finalize(-1);
    }

    b = (double*)malloc(
                 sizeof(double)
                 *
                 (descb[8]*nq)
    );

    if (b == NULL)
    {
       printf("Cannot allocate b\n");
       HMPI_Finalize(-1);
    }

    c = (double*)malloc(
                 sizeof(double)
                 *
                 (descc[8]*nq)
    );

    if (c == NULL)
    {
       printf("Cannot allocate c\n");
       HMPI_Finalize(-1);
    }

    //
    // Initialize the matrices A, B, and C
    if (NEED_INITIALIZATION)
    {
       iaseed = 100;
       pdmatgen_(&ictxt, "No transpose", "No transpose", &desca[2], 
                 &desca[3], &desca[4], &desca[5], a, &desca[8], 
                 &desca[6], &desca[7], &iaseed, &c__0, &mp, &c__0, &kq, 
                 &myrow, &mycol, &p, &q, (ftnlen)12, (ftnlen)12);

       ibseed = 200;
       pdmatgen_(&ictxt, "No transpose", "No transpose", &descb[2], 
                 &descb[3], &descb[4], &descb[5], b, &descb[8], 
                 &descb[6], &descb[7], &ibseed, &c__0, &kp, &c__0, 
                 &nq, &myrow, &mycol, &p, &q, (ftnlen)12, (ftnlen)12);

       icseed = 300;
       pdmatgen_(&ictxt, "No transpose", "No transpose", &descc[2], 
                 &descc[3], &descc[4], &descc[5], c, &descc[8], 
                 &descc[6], &descc[7], &icseed, &c__0, &mp, &c__0, &nq, 
                 &myrow, &mycol, &p, &q, (ftnlen)12, (ftnlen)12);
    }

    pdgemm_("No transpose", "No transpose", &m, &n, &k, &c_b106, 
            a, &c__1, &c__1, desca, 
            b, &c__1, &c__1, descb, &c_b106, 
            c, &c__1, &c__1, descc, (ftnlen)12, (ftnlen)12);

    //
    // Check the results
    if (VERBOSE)
    {
       if (HMPI_Is_host())
       {
          for (i = 0; i < (descc[8]*nq); i++)
          {
              printf("C[%d]=%0.6f\n", i, c[i]);
          }
       }
    }

    free(a);
    free(b);
    free(c);

    blacs_gridexit__(&ictxt);

    if (HMPI_Is_member(&gid))
    {
       HMPI_Group_free(
           &gid
       );
    }

    gettimeofday(&end, NULL);

    /*
     * Print Execution time
     */
    if (HMPI_Is_host())
    {
       double tstart = start.tv_sec + (start.tv_usec/pow(10, 6));
       double tend = end.tv_sec + (end.tv_usec/pow(10, 6));
       double speed_mflops = ((double)(2*m*0.0001*n*0.01*k)/(double)(tend - tstart));

       printf(
          "N=%d, g=%d, p=%d, q=%d, t(sec)=%0.9f, speed(MFlops)=%0.6f\n",
          m,
          nb,
          p,
          q,
          (tend - tstart),
          speed_mflops
       );
    }

    HMPI_Finalize(0);
}
