
   #include "mxm_i.h"

   /*-----------------------------------------------------*/

   int
   Input_recon
   (
      double *a,
      double *b,
      double *c
   )
   {
      int i, j;
      for (i = 0; i < recon_r; i++)
      {
          for (j = 0; j < recon_n; j++)
          {
             a[i*recon_n + j] = 2.0;
          }
      }

      for (i = 0; i < recon_n; i++)
      {
          for (j = 0; j < recon_t; j++)
          {
              b[i*recon_t + j] = 2.0;
          }
      }

      for (i = 0; i < recon_r; i++)
      {
          for (j = 0; j < recon_t; j++)
          {
              c[i*recon_t + j] = 0.0;
          }
      }

      return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int Perf_func
   (
       int x, /* --r-- */
       int y, /* --n-- */
       int z  /* --t-- */
   )
   {
      /*
       * Multiplication of r*n and n*t matrices.
       */
      int i,j,k;
      for (i = 0; i < x; i++)
      {
          for (j = 0; j < z; j++)
          {
              for (k = 0; k < y; k++)
              {
                  c[i*z + j] += a[i*x + k] * b[k*z + j];
              }
          }
      }

      return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   void Benchmark_function
   (
      const void* input_p,
      int num_of_p,
      void* output_p
   )
   {
      int* params = (int*)input_p;

      int result = Perf_func(
                       params[0],
                       params[1],
                       params[2]
      );

      *(int*)output_p = result;

      return;
   }

   /*-----------------------------------------------------*/

   int Do_recon()
   {
       a = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_r*recon_n)
       );

       b = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_n*recon_t)
       );

       c = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_r*recon_t)
       );

       Input_recon(
         a,
         b,
         c
       );

       if (HMPI_Is_member(HMPI_COMM_WORLD_GROUP))
       {
            int rc;
            int output_p;
            int input_p[3] =
                {
                   recon_r,
                   recon_n,
                   recon_t
            };

            rc = HMPI_Recon(
                     &Benchmark_function,
                     input_p,
                     3,
                     &output_p
            );

            if  (rc != HMPI_OK)
            {
                printf("Panic: HMPI_Recon failed\n");
                return rc;
            }
       }

       if (HMPI_Is_host())
       {
          printf("Recon finished\n");
       }

       free(a);
       free(b);
       free(c);

       return HMPI_OK;
   }

  /*-----------------------------------------------------*/

  int Pack_model_parameters
  (
     int Generalised_block_size_row,
     int Generalised_block_size_col,
     int *w,
     int *h,
     int *model_params,
     int model_count
  )
  {
     int i, j;
     
     model_params[0] = p; 
     model_params[1] = q; 
     model_params[1+1] = r;
     model_params[1+1+1] = n; 
     model_params[1+1+1+1] = Generalised_block_size_row;
     model_params[1+1+1+1+1] = Generalised_block_size_col;

     for (i = 0; i < q; i++) 
     {         
        model_params[1+1+1+1+1+1+i] = w[i];
     } 

     for (i = 0; i < (p*q*p*q); i++) 
     {         
        model_params[1+1+1+1+1+1+q+i] = h[i];
     } 

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int Timeof()
  {
     int rc, i, j, k;

     int *model_params;
     int model_count;
     double *dperf;
     int *iperf;

     model_count = 1+1+1+1+1+1+q+(p*q*p*q);
     model_params = (int*)malloc(
                          sizeof(int)
                          *
                          model_count
     );

     if (model_params == NULL)
     {
        return -1;
     }

     w = (int*)malloc(
               sizeof(int)
               *
               q
     );

     if (w == NULL)
     {
        return -1;
     }

     h = (int*)malloc(
               sizeof(int)
               *
               (p*q*p*q)
     );

     if (h == NULL)
     {
        return -1;
     }

     trow = (int*)malloc(
                  sizeof(int)
                  *
                  (p*q)
     );

     if (trow == NULL)
     {
        return -1;
     }

     row_allocations = (int*)malloc(
                                 sizeof(int)
                                 *
                                 (p*q)
     );

     if (row_allocations == NULL)
     {
        return -1;
     }

     column_allocations = (int*)malloc(
                                    sizeof(int)
                                    *
                                    q
     );

     if (column_allocations == NULL)
     {
        return -1;
     }

     dperf = (double*)malloc(
                     sizeof(double)
                     *
                     (p*q)
     );

     if (dperf == NULL)
     {
        return -1;
     }

     iperf = (int*)malloc(
                   sizeof(int)
                   *
                   (p*q)
     );

     if (iperf == NULL)
     {
        return -1;
     }

     rc = HMPI_Get_processors_info(
              dperf
     );

     if (rc != HMPI_OK)
     {
        return rc;
     }

     for (i = 0; i < (p*q); i++)
     {
        iperf[i] = dperf[i];
     }

     if (HMPI_Is_host())
     {
        printf("Updated processor performances are: ");

        for (i = 0; i < (p*q); i++)
        {
            printf("%d ", iperf[i]);
        }

        printf("\n");
     }

     if (HMPI_Is_host())
     {
        int bsize_r, bsize_c;
        double time, min_time = 1.7976931348623157E+308;

        for (bsize_r = p; bsize_r <= n; bsize_r++)
        {
           for (bsize_c = q; bsize_c <= n; bsize_c++)
           {
               /*
                * The generalised blocks should fit 
                * the square matrix
                */
               int rfitted = (n)%bsize_r;
               int cfitted = (n)%bsize_c;

               if ((rfitted != 0)
                   || (cfitted != 0
                  )
               )
               {
                  continue;
               }

               printf("=========row block size=%d, column block size=%d=============\n",
                      bsize_r,
                      bsize_c
               );

               Generalised_block = (int(*)[2])malloc(
                                    sizeof(int[2])
                                    *
                                    (bsize_r)*(bsize_c)
               );

               if (Generalised_block == NULL)
               {
                  return -1;
               }

               rc = Distribute_load(
                      p,
                      q,
                      iperf,
                      bsize_r,
                      bsize_c,
                      row_allocations,
                      column_allocations
               );

               if (rc != HMPI_OK)
               {
                  return rc;
               }

               rc = Determine_distribution_parameters(
                      p,
                      q,
                      row_allocations,
                      column_allocations,
                      w,
                      h,
                      trow
               );

               if (rc != HMPI_OK)
               {
                  return rc;
               }

               rc = Pack_model_parameters(
                        bsize_r,
                        bsize_c,
                        w,
                        h,
                        model_params,
                        model_count
               );

               if (rc != HMPI_OK)
               {
                  return rc;
               }

               time = HMPI_Timeof(
                          &MPC_NetType_ParallelAxB,
                          model_params,
                          model_count
               );

               printf(
                  "TIMEOF: time=%0.6f\n", 
                  time
               );
               
               if (time < min_time)
               {
                  Optimal_generalised_block_size_row = bsize_r;
                  Optimal_generalised_block_size_col = bsize_c;
                  min_time = time;
               }

               free(Generalised_block);

               printf("===================================\n");
            }
        }
     }

     if (HMPI_Is_host())
     {
        printf("\n\n");
        printf("Optimal generalised block size row = %d, Optimal generalised block size col = %d\n", 
           Optimal_generalised_block_size_row,
           Optimal_generalised_block_size_col
        );
        printf("\n\n");
     }

     rc = MPI_Bcast(
             &Optimal_generalised_block_size_row,
             1,
             MPI_INT,
             0,
             HMPI_COMM_WORLD
     );

     if (rc != MPI_SUCCESS)
     {
        return rc;
     }

     rc = MPI_Bcast(
             &Optimal_generalised_block_size_col,
             1,
             MPI_INT,
             0,
             HMPI_COMM_WORLD
     );

     if (rc != MPI_SUCCESS)
     {
        return rc;
     }

     free(row_allocations);
     free(column_allocations);
     free(model_params);

     free(dperf);
     free(iperf);

     free(h);
     free(w);
     free(trow);

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int Common_height
  (
      int top_row_1,
      int bottom_row_1,
      int top_row_2,
      int bottom_row_2
  )
  {
      /*
       * One area contains the other
       */
      if ((top_row_1 >= top_row_2)
          && (bottom_row_1 <= bottom_row_2)
      )
      {
         return (bottom_row_1 - top_row_1);
      }

      if ((top_row_1 <= top_row_2)
          && (bottom_row_1 >= bottom_row_2)
      )
      {
         return (bottom_row_2 - top_row_2);
      }

      /*
       * One area is followed or preceded by another
       * with an overlap
       */
      if ((top_row_1 <= top_row_2)
          && (bottom_row_1 >= top_row_2)
          && (bottom_row_1 <= bottom_row_2)
      )
      {
         return (bottom_row_1 - top_row_2);
      }

      if ((top_row_1 >= top_row_2)
          && (top_row_1 <= bottom_row_2)
          && (bottom_row_1 >= bottom_row_2)
      )
      {
         return (bottom_row_2 - top_row_1);
      }

      /*
       * There is no overlap
       */
      if ((bottom_row_1 < top_row_2)
          || (top_row_1 > bottom_row_2)
      )
      {
         return 0;
      }

      if ((top_row_1 < top_row_2)
          && (bottom_row_1 < bottom_row_2)
      )
      {
         return 0;
      }

      if ((top_row_1 > top_row_2)
          && (bottom_row_1 > bottom_row_2)
      )
      {
         return 0;
      }

      return 0;
  }

  /*-----------------------------------------------------*/

   int Get_number_of_elements_to_be_transferred
   (
       int top_row_1,
       int bottom_row_1,
       int top_row_2,
       int bottom_row_2,
       int width_1
   )
   {
       /*
        * One area contains the other
        */
       if ((top_row_1 >= top_row_2)
           && (bottom_row_1 <= bottom_row_2)
       )
       {
          return (bottom_row_1 - top_row_1)*width_1;
       }

       if ((top_row_1 <= top_row_2)
           && (bottom_row_1 >= bottom_row_2)
       )
       {
          return (bottom_row_2 - top_row_2)*width_1;
       }

       /*
        * One area is followed or preceded by another
        * with an overlap
        */
       if ((top_row_1 <= top_row_2)
           && (bottom_row_1 >= top_row_2)
           && (bottom_row_1 <= bottom_row_2)
       )
       {
          return (bottom_row_1 - top_row_2)*width_1;
       }

       if ((top_row_1 >= top_row_2)
           && (top_row_1 <= bottom_row_2)
           && (bottom_row_1 >= bottom_row_2)
       )
       {
          return (bottom_row_2 - top_row_1)*width_1;
       }

       /*
        * There is no overlap
        */
       if ((bottom_row_1 < top_row_2)
           || (top_row_1 > bottom_row_2)
       )
       {
          return 0;
       }

       if ((top_row_1 < top_row_2)
           && (bottom_row_1 < bottom_row_2)
       )
       {
          return 0;
       }

       if ((top_row_1 > top_row_2)
           && (bottom_row_1 > bottom_row_2)
       )
       {
          return 0;
       }

       return 0;
   }

  /*-----------------------------------------------------*/

  int
  GetBlock(int x, int y, Block *b)
  {
     b->I = x;
     b->J = y;

     return 0;
  }

  /*-----------------------------------------------------*/

  int
  GetProcessor(Block *b, int Generalised_block_size_col, Processor* p)
  {
     p->I = Generalised_block[((b->I)*Generalised_block_size_col) + (b->J)][0];
     p->J = Generalised_block[((b->I)*Generalised_block_size_col) + (b->J)][1];

     return 0;
  }

  /*-----------------------------------------------------*/
