
  #ifndef _MXM_HH
  #define _MXM_HH

  #include <mpi.h>
  #include <stdio.h>
  #include "counter.h"

  #define  H(a, b, c, d, p, q)     h[(a*p*q*q+b*p*q+c*q+d)]

  /*
   * Verbosity of diagnostics
   */
  static int MPI_VERBOSE = 0;

  /*
   * The size of a dimension of a square matrix
   */
  static int n = 0;

  static int COORDN = 2;

  /*
   * Constant number for MXM multiplication.
   * All the matrix elements are initialized to
   * this number.
   */
  #define  MXM_CONSTANT_NUMBER (double)(2.00)

  /*
   * Matrices used for multiplication.
   */
  static double *A, *B, *C;

  static int *row_allocations;
  static int *column_allocations;

  static int istart = 0;
  static int myialloc = 0; 
  static int jstart = 0;
  static int myjalloc = 0;

  /*
   * Global barrier time
   */
  static double barrier_time = 0;
  #define BARRIER_ITERATIONS 10

  static int (*Generalised_block)[2];
  static int *h;
  static int *w;
  static int *trow;

  #define HMPI_MSG_TAG  0xff
  static MPI_Comm algo_comm;
  static int algo_comm_rank;

  struct timeval starts, ends;
  static double computation_time = 0.0;
  static double communication_time = 0.0;

  /*************************************************
   *                                               *
   *            HELPER FUNCTIONS                   *
   *                                               *
   *************************************************/

  /*
   * Create the net and start multiplying.
   */
  int Execute_algorithm(MPI_Comm* acomm);

  /*
   * Perform the mxm multiplication
   */
  int Perform_mxm
  (
     MPI_Comm* acomm
  );

  /*
   * The routine that creates the 2-D grid network 
   * of processors which are used for matrix 
   * multiplication.
   */
  int
  mxm
  (
     MPI_Comm* acomm,
     double *a,
     double *b,
     double *c
  );

  /*
   * Uses row and column communicators instead 
   * of subnets.
   * This routine has to be enhanced to take into
   * account the granularity.
   */
  int
  Grid_computations
  (
     MPI_Comm* acomm,
     const int* my_coordinates,
     double *a,
     double *b,
     double *c
  );

  void rank2coord
  (
     int pnum,
     const int *ppar,
     int *pcoord
  );

  int coord2rank
  (
    const int *pcoord,
    const int *ppar
  );

  int Common_height
  (
      int top_row_1,
      int bottom_row_1,
      int top_row_2,
      int bottom_row_2
  );

  int Determine_distribution_parameters
  (
      int p,
      int q,
      const int *row_allocations,
      const int *column_allocations,
      int *w,
      int *h,
      int *trow
  );

  int Distribute_load
  (
      int p,
      int q,
      int Generalised_block_size_row,
      int Generalised_block_size_column,
      int *row_allocations,
      int *column_allocations
  );

  typedef struct {int I; int J;} Processor;
  typedef struct {int i; int j;} Block;

  void
  GetBlock(int x, int y, Block *b);

  void
  GetProcessor(Block *b, Processor *p);

  #endif /* _MXM_HH */
