
  #ifndef _MXM_HH
  #define _MXM_HH

  #include "counter.h"

  #define  recon_n  200
  #define  recon_r  50
  #define  recon_t  20

  /*
   * Verbosity of diagnostics
   */
  static int VERBOSE = 0;

  /*
   * Constant number for MXM multiplication.
   * All the matrix elements are initialized to
   * this number.
   */
  #define  MXM_CONSTANT_NUMBER (double)(2.00)

  /*
   * Matrices for recon
   */
  static double *a, *b, *c;

  /*
   * Matrices used for multiplication.
   */
  static double *A, *B, *C;

  /*
   * Heterogeneous row and column distributions
   * The block panel that is cyclically block
   * distributed shouldn't have more than this
   * number rows and columns each.
   */
  static int *block_allocations;
  static int *Generalised_block;

  static int istart = 0;
  static int jstart = 0;
  static int myalloc = 0;

  /*
   * Global barrier time
   */
  static double barrier_time = 0;

  /*
   * Is recon required
   * Not required when you know the performances of the
   * processors and network is stable
   */
  static int HMPI_Is_recon_required = 1;

  /*************************************************
   *                                               *
   *            HELPER FUNCTIONS                   *
   *                                               *
   *************************************************/

  /*
   * Create the net and start multiplying.
   */
  int Execute_algorithm();

  /*
   * Perform the mxm multiplication
   */
  int Perform_mxm
  (
     const HMPI_Group* gid
  );

  /*
   * The routine that creates the 2-D grid network 
   * of processors which are used for matrix 
   * multiplication.
   */
  int
  mxm
  (
     const HMPI_Group* gid,
     double *a,
     double *b,
     double *c
  );

  /*
   * Uses row and column communicators instead 
   * of subnets.
   * This routine has to be enhanced to take into
   * account the granularity.
   */
  int
  Grid_computations
  (
     const HMPI_Group* gid,
     MPI_Comm* grid_comm,
     const int* my_coordinates,
     double *a,
     double *b,
     double *c
  );

  int
  Translate_from_rank
  (
     int pij,
     MPI_Comm* grid_comm,
     MPI_Comm* local_comm,
     int* root
  );

  int
  Do_recon();

  int Distribute_load
  (
      int p,
      const double *perf,
      int *block_allocations
  );

  int Perf_func
  (
      int x,
      int y,
      int z
  );

  int
  Input_recon
  (
     double *a,
     double *b,
     double *c
  );

   void Benchmark_function
   (
      const void* input_p,
      int num_of_p,
      void* output_p
   );


  #endif /* _MXM_HH */
