
   #include "mxm_i.h"

   /*-----------------------------------------------------*/

   int
   Input_recon
   (
      double *a,
      double *b,
      double *c
   )
   {
     int i, j;
     for (i = 0; i < recon_r; i++)
     {
          for (j = 0; j < recon_n; j++)
          {
              a[i*recon_n + j] = 2.0;
          }
      }

      for (i = 0; i < recon_n; i++)
      {
          for (j = 0; j < recon_t; j++)
          {
              b[i*recon_t + j] = 2.0;
          }
      }

      for (i = 0; i < recon_r; i++)
      {
          for (j = 0; j < recon_t; j++)
          {
              c[i*recon_t + j] = 0.0;
          }
      }

      return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int Perf_func
   (
       int x, /* --r-- */
       int y, /* --n-- */
       int z  /* --t-- */
   )
   {
      /*
       * Multiplication of r*n and n*t matrices.
       */
      int i,j,k;
      for (i = 0; i < x; i++)
      {
          for (j = 0; j < z; j++)
          {
              for (k = 0; k < y; k++)
              {
                  c[i*z + j] += a[i*x + k] * b[k*z + j];
              }
          }
      }

      return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   void Benchmark_function
   (
      const void* input_p,
      int num_of_p,
      void* output_p
   )
   {
      int* params = (int*)input_p;

      int result = Perf_func(
                       params[0],
                       params[1],
                       params[2]
      );

      *(int*)output_p = result;

      return;
   }

   /*-----------------------------------------------------*/

   int Do_recon()
   {
       a = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_r*recon_n)
       );

       b = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_n*recon_t)
       );

       c = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_r*recon_t)
       );

       Input_recon(
         a,
         b,
         c
       );

       if (HMPI_Is_member(HMPI_COMM_WORLD_GROUP))
       {
            int rc;
            int output_p;
            int input_p[3] =
                {
                   recon_r,
                   recon_n,
                   recon_t
            };

            rc = HMPI_Recon(
                     &Benchmark_function,
                     input_p,
                     3,
                     &output_p
            );

            if  (rc != HMPI_OK)
            {
                printf("Panic: HMPI_Recon failed\n");
                return rc;
            }
       }

       free(a);
       free(b);
       free(c);

       return HMPI_OK;
   }

  /*-----------------------------------------------------*/

  int Pack_model_parameters
  (
     int Generalised_block_size,
     int *w,
     int *h,
     int *trow,
     int *model_params,
     int model_count
  )
  {
     int ind, i, j;
     
     model_params[0] = p; 
     model_params[1] = n; 
     model_params[1+1] = r; 
     model_params[1+1+1] = Generalised_block_size;

     for (ind = 0; ind < p; ind++) 
     {         
        model_params[1+1+1+1+ind] = w[ind];
     } 

     for (i = 0; i < (p); i++) 
     {         
        for (j = 0; j < (p); j++) 
        {         
           model_params[1+1+1+1+p+i*p+j] = h[i*p+j];
        }
     } 

     for (i = 0; i < (p); i++) 
     {         
        for (j = 0; j < (p); j++) 
        {         
           model_params[1+1+1+1+p+(p*p)+i*p+j] = trow[i*p+j];
        }
     } 

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int Timeof()
  {
     int rc, i, j, k, me;

     HMPI_Group gid;
     int *model_params;
     int model_count;
     double *dperf;
     int *iperf;

     me = HMPI_Group_rank(HMPI_COMM_WORLD_GROUP);

     model_count = 1+1+1+1+p+(p*p)+(p*p);
     model_params = (int*)malloc(
                          sizeof(int)
                          *
                          model_count
     );

     if (model_params == NULL)
     {
        return -1;
     }

     w = (int*)malloc(
               sizeof(int)
               *
               p
     );

     if (w == NULL)
     {
        return -1;
     }

     h = (int*)malloc(
               sizeof(int)
               *
               (p*p)
     );

     if (h == NULL)
     {
        return -1;
     }

     trow = (int*)malloc(
                  sizeof(int)
                  *
                  (p*p)
     );

     if (trow == NULL)
     {
        return -1;
     }

     row_allocations = (int*)malloc(
                                 sizeof(int)
                                 *
                                 (p*p)
     );

     if (row_allocations == NULL)
     {
        return -1;
     }

     column_allocations = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
     );

     if (column_allocations == NULL)
     {
        return -1;
     }

     dperf = (double*)malloc(
                     sizeof(double)
                     *
                     (p*p)
     );

     if (dperf == NULL)
     {
        return -1;
     }

     iperf = (int*)malloc(
                   sizeof(int)
                   *
                   (p*p)
     );

     if (iperf == NULL)
     {
        return -1;
     }

     rc = HMPI_Get_processors_info(
              dperf
     );

     if (rc != HMPI_OK)
     {
        return rc;
     }

     for (i = 0; i < (p*p); i++)
     {
        iperf[i] = dperf[i];
     }

     if (HMPI_Is_host())
     {
        printf("Processor performances after HMPI_Recon are: ");

        for (i = 0; i < (p*p); i++)
        {
            printf("%d ", iperf[i]);
        }

        printf("\n");
     }

     if (HMPI_Is_host())
     {
        int bsize = 90;
        double time, min_time = 1.7976931348623157E+308;

        HMPI_Debug(1);

        for (bsize = p; bsize <= n/r; bsize++)
        {
           /* The generalised blocks should fit    
            * the square matrix
            */
           int fitted = (n/r)%bsize;

           if (fitted != 0)
           {
              continue;
           }

           printf("=========Block size=%d=============\n", bsize);

           Generalised_block = (int(*)[2])malloc(
                                sizeof(int[2])
                                *
                                (bsize)*(bsize)
           );

           if (Generalised_block == NULL)
           {
              return -1;
           }

           rc = Distribute_load(
                  p,
                  iperf,
                  bsize,
                  row_allocations,
                  column_allocations
           );

           if (rc != HMPI_OK)
           {
              return rc;
           }

           rc = Determine_distribution_parameters(
                  p,
                  row_allocations,
                  column_allocations,
                  w,
                  h,
                  trow
           );

           if (rc != HMPI_OK)
           {
              return rc;
           }

           rc = Pack_model_parameters(
                    bsize,
                    w,
                    h,
                    trow,
                    model_params,
                    model_count
           );

           if (rc != HMPI_OK)
           {
              return rc;
           }

           time = HMPI_Timeof(
                      &MPC_NetType_ParallelAxB,
                      model_params,
                      model_count
           );

           printf(
              "TIMEOF: time=%0.6f, bsize=%d\n", 
              time,
              bsize
           );

           if (time < min_time)
           {
              Optimal_generalised_block_size = bsize;
              min_time = time;
           }

           free(Generalised_block);

           printf("===================================\n");
        }
     }

     rc = MPI_Bcast(
             &Optimal_generalised_block_size,
             1,
             MPI_INT,
             0,
             HMPI_COMM_WORLD
     );

     if (rc != MPI_SUCCESS)
     {
        return rc;
     }

     if (HMPI_Is_host())
     {
        printf("\n\n");
        printf("me=%d, Optimal generalised block size = %d\n", me, Optimal_generalised_block_size);
        printf("\n\n");
     }

     free(row_allocations);
     free(column_allocations);
     free(model_params);

     free(dperf);
     free(iperf);

     free(h);
     free(w);
     free(trow);

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

   int Get_number_of_elements_to_be_transferred
   (
       int top_row_1,
       int bottom_row_1,
       int top_row_2,
       int bottom_row_2,
       int width_1
   )
   {
       /*
        * One area contains the other
        */
       if ((top_row_1 >= top_row_2)
           && (bottom_row_1 <= bottom_row_2)
       )
       {
          return (bottom_row_1 - top_row_1)*width_1;
       }

       if ((top_row_1 <= top_row_2)
           && (bottom_row_1 >= bottom_row_2)
       )
       {
          return (bottom_row_2 - top_row_2)*width_1;
       }

       /*
        * One area is followed or preceded by another
        * with an overlap
        */
       if ((top_row_1 <= top_row_2)
           && (bottom_row_1 >= top_row_2)
           && (bottom_row_1 <= bottom_row_2)
       )
       {
          return (bottom_row_1 - top_row_2)*width_1;
       }

       if ((top_row_1 >= top_row_2)
           && (top_row_1 <= bottom_row_2)
           && (bottom_row_1 >= bottom_row_2)
       )
       {
          return (bottom_row_2 - top_row_1)*width_1;
       }

       /*
        * There is no overlap
        */
       if ((bottom_row_1 < top_row_2)
           || (top_row_1 > bottom_row_2)
       )
       {
          return 0;
       }

       if ((top_row_1 < top_row_2)
           && (bottom_row_1 < bottom_row_2)
       )
       {
          return 0;
       }

       if ((top_row_1 > top_row_2)
           && (bottom_row_1 > bottom_row_2)
       )
       {
          return 0;
       }

       return 0;
   }

  /*-----------------------------------------------------*/

  int
  GetBlock(int x, int y, Block *b)
  {
     b->I = x;
     b->J = y;

     return 0;
  }

  /*-----------------------------------------------------*/

  int
  GetProcessor(Block *b, int Generalised_block_size, Processor* p)
  {
     p->I = Generalised_block[((b->I)*Generalised_block_size) + (b->J)][0];
     p->J = Generalised_block[((b->I)*Generalised_block_size) + (b->J)][1];

     return 0;
  }

  /*-----------------------------------------------------*/

