
   /***********************************************************/
   /* hmpi - Implementation of Interfaces of                  */
   /*        the HMPI library                                 */
   /*                                                         */
   /* Revision history                                        */
   /* 15-10-2007  --      Added new group auto interfaces     */
   /*                     hmpi_group_pauto_create             */
   /*                     hmpi_group_heuristic_pauto_create   */
   /***********************************************************/

   #include <stdio.h>
   #include <stdlib.h>
   #include <string.h>

   #include <hmpi.h>
   #include <hmpi_internal_funcs.h>

   #define  HMPI_MAX_INITIAL_PROC_ARRMTS   16
   #define  HMPI_MSG_TAG                   0xff

   /*-----------------------------------------------------*/

   int
   HMPI_Get_process_arrangements_recursively_i
   (
       int d,
       const HMPI_Model* model,
       HMPI_Heuristic_function hfunc,
       const int* model_parameters,
       int model_param_count,
       int gsize,
       int numcoord,
       int *coord,
       int **parr,
       int *nparr,
       int *max_nparr
   )
   {
       int i, j, y, rc;
       int totalp;
       int *modelp, paramc;

       if (d > numcoord)
       {
          return HMPI_OK;
       }

       for (y = 1; y <= gsize; y++)
       {
           coord[d] = y;

           if ((d+1) == numcoord)
           {
              int global_size;
              int only_one_proc;

              /*
               * We have a combination here
               * Test if the combination against
               * the number of processes available.
               */
              int greater = HMPI_Test_size_i(
                                gsize,
                                numcoord,
                                coord
              );

              if (greater)
              {
                 continue;
              }

              /*
               * Set the model parameters.
               * Evaluate model for the combination
               */
              for (totalp = 1, i = 0; i < numcoord; i++)
              {
                 totalp *= coord[i];
              }

              paramc = model_param_count + numcoord + totalp + totalp*totalp + totalp;

              modelp = (int*)malloc(
                             sizeof(int)
                             *
                             paramc
              );

              if (modelp == NULL)
              {
                 return MPC_ERR_NOMEM;
              }

              for (i = 0; i < model_param_count; i++)
              {
                 modelp[i] = model_parameters[i];
              }

              for (i = 0; i < numcoord; i++)
              {
                 modelp[model_param_count+i] = coord[i];
              }

              /*
               * Get the speeds
               */
              for (i = 0; i < totalp; i++)
              {
                 modelp[model_param_count+numcoord+i] = (int)HMPI_Comm_world_performances[i];
              }

              /*
               * Initialize the communication link data
               */
              for (i = 0; i < totalp*totalp; i++)
              {
                 modelp[model_param_count+numcoord+totalp+i] = 0;
              }

              /*
               * Initialize the communication link data flags
               */
              for (i = 0; i < totalp; i++)
              {
                 modelp[model_param_count+numcoord+totalp+totalp*totalp+i] = 0;
              }

              /*
               * User may have provided heuristic
               * function to reduce the
               * number of processor arrangements
               * evaluated.
               */
              if (hfunc != NULL)
              {
                 rc = (*hfunc)(
                      numcoord,
                      coord,
                      modelp,
                      paramc
                 );

                 if (rc == 0)
                 {
                    free(modelp);
                    continue;
                 }
              }

              free(modelp);

              for (i = 0; i < numcoord; i++)
              {
                 (*parr)[(*nparr)*numcoord + i] = coord[i];
              }

              (*nparr)++;

              if ((*nparr) >= (*max_nparr))
              {
                 int tmp_max_nparr = (*max_nparr)*numcoord;
                 int *tmp_parr = (*parr);

                 (*max_nparr) *= 2;

                 (*parr) = (int*)malloc(
                                 sizeof(int)
                                 *
                                 numcoord
                                 *
                                 (*max_nparr)
                 );

                 if ((*parr) == NULL)
                 {
                    return MPC_ERR_NOMEM;
                 }
 
                 memcpy((*parr), tmp_parr, tmp_max_nparr*sizeof(int));

                 free(tmp_parr);
              }

              continue;
           }

           rc = HMPI_Get_process_arrangements_recursively_i(
                    d+1,
                    model,
                    hfunc,
                    model_parameters,
                    model_param_count,
                    gsize,
                    numcoord,
                    coord,
                    parr,
                    nparr,
                    max_nparr
            );

           if (rc != HMPI_OK)
           {
              return rc;
           }
       }

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int
   HMPI_Get_process_arrangements_i
   (
       const HMPI_Model* model,
       HMPI_Heuristic_function hfunc,
       const int* model_parameters,
       int model_param_count,
       int **parr,
       int *nparr
   )
   {
       int i, j, rc;
       int numcoord, *coord;
       int max_nparr = HMPI_MAX_INITIAL_PROC_ARRMTS;

       /*
        * Number of processes that are FREE and the
        * parent of the group
        */
       int gsize = HMPI_Group_size(HMPI_COMM_WORLD_GROUP);

       numcoord = model->numcoord;

       *nparr = 0;

       coord = (int*)malloc(
                     sizeof(int)
                     *
                     numcoord
       );

       if (coord == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       (*parr) = (int*)malloc(
                       sizeof(int)
                       *
                       numcoord
                       *
                       max_nparr
       );

       if ((*parr) == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Evaluate for all possible combinations of
        * coordinates.
        *
        * For example if the total number of processes
        * in MPI_COMM_WORLD are 3 and the number of dimensions
        * in the topology is 3, then the possible
        * combinations are
        * 1 1 1
        * 1 1 2
        * 1 1 3
        * 1 2 1
        * 1 3 1
        * 2 1 1
        * 3 1 1
        */
       rc = HMPI_Get_process_arrangements_recursively_i(
                0,
                model,
                hfunc,
                model_parameters,
                model_param_count,
                gsize,
                numcoord,
                coord,
                parr,
                nparr,
                &max_nparr
       );

       free(coord);

       return rc;
   }

   /*-----------------------------------------------------*/

   /*
    * HMPI_Group_pauto_create() --
    * Create a HMPI group of processes with optimal number of 
    * processes. The HMPI runtime system detects the optimal 
    * number of processes.
    *                             
    * ARGUMENTS: - see HMPI documentation
    *
    * RETURN CODES: - see HMPI documentation
    */   
   int
   HMPI_Group_pauto_create
   (
       HMPI_Group* gid,
       const HMPI_Model* model,
       const int* model_parameters,
       int model_param_count
   )
   {
       return HMPI_Group_pauto_create_i(
              gid,
              model,
              NULL,
              model_parameters,
              model_param_count
       );
   }

   /*-----------------------------------------------------*/

   /*
    * HMPI_Group_heuristic_pauto_create() --
    * Create a HMPI group of processes with optimal number of 
    * processes. The HMPI runtime system detects the optimal 
    * number of processes. Application programmers can provide
    * heuristics to reduce the number of process arrangements 
    * evaluated.
    *                             
    * ARGUMENTS: - see HMPI documentation
    *
    * RETURN CODES: - see HMPI documentation
    */   
   int
   HMPI_Group_heuristic_pauto_create
   (
       HMPI_Group* gid,
       const HMPI_Model* model,
       HMPI_Heuristic_function hfunc,
       const int* model_parameters,
       int model_param_count
   )
   {
       return HMPI_Group_pauto_create_i(
              gid,
              model,
              hfunc,
              model_parameters,
              model_param_count
       );
   }

   /*-----------------------------------------------------*/

   int
   HMPI_Group_pauto_create_i
   (
       HMPI_Group* gid,
       const HMPI_Model* model,
       HMPI_Heuristic_function hfunc,
       const int* model_parameters,
       int model_param_count
   )
   {
       int i, j, rc;

       int totalp;
       int *modelp, paramc;

       int numcoord = model->numcoord;

       /*
       * Process arrangements
       * and Number of process arrangements
       * at the host process
       */
       int *parr, nparr;

       /*
       * All the timeofs
       */
       double *alltimeofs;

       if (HMPI_Is_member(HMPI_PROC_WORLD_GROUP))
       {
          MPI_Comm *hmpiproccomm;
          int p, me, *dparr;

          /*
          * Process arrangements
          * and Number of process arrangements
          * at each process
          */
          int mynparr, *myparr;

          /*
          * Timeofs calculated by each processor
          */
          double *timeofs;

          p = HMPI_Group_size(HMPI_PROC_WORLD_GROUP);
          me = HMPI_Group_rank(HMPI_PROC_WORLD_GROUP);

          hmpiproccomm = (MPI_Comm*)HMPI_Get_comm(HMPI_PROC_WORLD_GROUP);

          if (hmpiproccomm == NULL)
          {
             return HMPI_ERR_INTERNAL;
          }

          if (HMPI_Is_parent(HMPI_PROC_WORLD_GROUP))
          {
             /*
             * Evaluate the process arrangements
             */
             rc = HMPI_Get_process_arrangements_i(
                      model,
                      hfunc,
                      model_parameters,
                      model_param_count,
                      &parr,
                      &nparr             
             );

             if (rc != HMPI_OK)
             {
                return rc;
             }

             /*
             * Allocate the process arrangements to the slaves
             */
             dparr = (int*)malloc(
                           sizeof(int)
                           *
                           p
             );

             if (dparr == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             rc = HMPI_Partition_set(
                      p,
                      1,
                      HMPI_Comp_world_performances,
                      NULL,
                      NULL,
                      nparr,
                      NULL,
                      0,
                      0,
                      -1,
                      NULL,
                      NULL,
                      dparr
             );

             if (rc != HMPI_OK)
             {
                return rc; 
             }

             mynparr = dparr[me];

             /*
             * Broadcast the number of process arrangements
             */
             rc = MPI_Bcast(
                     dparr,
                     p,
                     MPI_INT,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),
                     *hmpiproccomm
             );
   
             if (rc != MPI_SUCCESS)
             {
                return rc;
             }
          }
          else
          {
             dparr = (int*)malloc(
                           sizeof(int)
                           *
                           p
             );

             if (dparr == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             /*
             * Receive the number of process arrangements
             */
             rc = MPI_Bcast(
                     dparr,
                     p,
                     MPI_INT,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),
                     *hmpiproccomm
             );
   
             if (rc != MPI_SUCCESS)
             {
                return rc;
             }

             mynparr = dparr[me];
          }

          /*
          * Receive the process arrangements
          */
          if (HMPI_Is_parent(HMPI_PROC_WORLD_GROUP))
          {
             int cdispl = 0;
             int *counts, *displs;

             counts = (int*)malloc(
                              sizeof(int)
                              *
                              p
             );

             if (counts == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             displs = (int*)malloc(
                              sizeof(int)
                              *
                              p
             );

             if (displs == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             for (i = 0; i < p; i++)
             {
                counts[i] = dparr[i]*numcoord;
                displs[i] = cdispl;
                cdispl += dparr[i]*numcoord;
             }

             myparr = (int*)malloc(
                            sizeof(int)
                            *
                            mynparr*numcoord
             );

             if (myparr == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             rc = MPI_Scatterv(
                     parr,
                     counts,
                     displs,
                     MPI_INT,
                     myparr,
                     mynparr*numcoord,
                     MPI_INT,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),
                     *hmpiproccomm           
             );

             if (rc != MPI_SUCCESS)
             {
                return rc;
             }

             free(counts);
             free(displs);
          }
          else
          {
             myparr = (int*)malloc(
                            sizeof(int)
                            *
                            mynparr*numcoord
             );

             if (myparr == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             rc = MPI_Scatterv(
                     NULL,
                     NULL,
                     NULL,
                     MPI_INT,
                     myparr,
                     mynparr*numcoord,
                     MPI_INT,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),          
                     *hmpiproccomm           
             );

             if (rc != MPI_SUCCESS)
             {
                return rc;
             }
          }

          /*
          * Evaluate the timeof for each process arrangement
          */
          timeofs = (double*)malloc(
                             sizeof(double)
                             *
                             mynparr
          );

          if (timeofs == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < mynparr; i++)
          {
              /*
               * Set the model parameters.
               * Evaluate model for the combination
               */
              for (totalp = 1, j = 0; j < numcoord; j++)
              {
                 totalp *= myparr[i*numcoord + j];
              }

              paramc = model_param_count + numcoord + totalp + totalp*totalp + totalp;

              modelp = (int*)malloc(
                             sizeof(int)
                             *
                             paramc
              );

              if (modelp == NULL)
              {
                 return MPC_ERR_NOMEM;
              }

              for (j = 0; j < model_param_count; j++)
              {
                 modelp[j] = model_parameters[j];
              }

              for (j = 0; j < numcoord; j++)
              {
                 modelp[model_param_count+j] = myparr[i*numcoord + j];
              }

              /*
               * Get the speeds
               */
              for (j = 0; j < totalp; j++)
              {
                 modelp[model_param_count+numcoord+j] = (int)HMPI_Comm_world_performances[j];
              }

              /*
               * Initialize the communication link data
               */
              for (j = 0; j < totalp*totalp; j++)
              {
                 modelp[model_param_count+numcoord+totalp+j] = 0;
              }

              /*
               * Initialize the communication link data flags
               */
              for (j = 0; j < totalp; j++)
              {
                 modelp[model_param_count+numcoord+totalp+totalp*totalp+j] = 0;
              }

              timeofs[i] = HMPI_Timeof(
                               model,
                               modelp,
                               paramc
              );

              free(modelp);
          }

          /*
          * Gather the times
          */
          if (HMPI_Is_parent(HMPI_PROC_WORLD_GROUP))
          {
             int cdispl = 0;
             int *displs;

             displs = (int*)malloc(
                              sizeof(int)
                              *
                              p
             );

             if (displs == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             for (i = 0; i < p; i++)
             {
                displs[i] = cdispl;
                cdispl += dparr[i];
             }

             alltimeofs = (double*)malloc(
                                   sizeof(double)
                                   *
                                   nparr
             );

             if (alltimeofs == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             rc = MPI_Gatherv(
                     timeofs,
                     mynparr,
                     MPI_DOUBLE,
                     alltimeofs,
                     dparr,
                     displs,
                     MPI_DOUBLE,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),
                     *hmpiproccomm           
             );

             if (rc != MPI_SUCCESS)
             {
                return rc;
             }

             free(displs);
          }
          else
          {
             rc = MPI_Gatherv(
                     timeofs,
                     mynparr,
                     MPI_DOUBLE,
                     NULL,
                     NULL,
                     NULL,
                     MPI_DOUBLE,
                     HMPI_Group_parent(HMPI_PROC_WORLD_GROUP),          
                     *hmpiproccomm           
             );

             if (rc != MPI_SUCCESS)
             {
                return rc;
             }
          }

          free(myparr);
          free(timeofs);
          free(dparr);
       }

       if (HMPI_Is_member(HMPI_FREE_GROUP))
       {
          rc =  HMPI_Group_create(
                    gid,
                    model,
                    NULL,
                    0
          );

          if (rc != HMPI_OK)
          {
             return rc;
          }

          return HMPI_OK;
       }

       if (HMPI_Is_parent(HMPI_PROC_WORLD_GROUP))
       {
          int *optimal_proc_arrangement;
          double mintime;

          optimal_proc_arrangement = (int*)calloc(
                                           numcoord,
                                           sizeof(int)
          );

          if (optimal_proc_arrangement == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          mintime = alltimeofs[0];
          for (j = 0; j < numcoord; j++)
          {
              optimal_proc_arrangement[j] = parr[j];
          }

          if (HMPI_Debug_flag)
          {
             printf("HMPI===> HMPI_GROUP_PAUTO_CREATE: Process arrangement [%d] ( ", 0);

             for (j = 0; j < numcoord; j++)
             {
                 printf("%d ", parr[j]);
             }

              printf(") TIME=%0.9f\n", alltimeofs[0]);
          }

          for (i = 1; i < nparr; i++)
          {
              if (HMPI_Debug_flag)
              {
                 printf("HMPI===> HMPI_GROUP_PAUTO_CREATE: Process arrangement [%d] ( ", i);

                 for (j = 0; j < numcoord; j++)
                 {
                    printf("%d ", parr[i*numcoord+j]);
                 }

                 printf(") TIME=%0.9f\n", alltimeofs[i]);
              }

              if (alltimeofs[i] < mintime)
              {
                 mintime = alltimeofs[i];
                 for (j = 0; j < numcoord; j++)
                 {
                    optimal_proc_arrangement[j] = parr[i*numcoord+j];
                 }
              }
          }

          if (HMPI_Debug_flag)
          {
              printf("HMPI===> HMPI_GROUP_PAUTO_CREATE: Optimal process arrangement ( ");
              for (j = 0; j < numcoord; j++)
              {
                 printf("%d ", optimal_proc_arrangement[j]);
              }
              printf(")\n");
              printf("HMPI===> HMPI_GROUP_PAUTO_CREATE: Estimated execution time %f\n", mintime);
          }

          free(alltimeofs);

          /*
          * Now create the group with the optimal
          * process arrangement
          */
          for (totalp = 1, i = 0; i < numcoord; i++)
          {
              totalp *= optimal_proc_arrangement[i];
          }

          paramc = model_param_count + numcoord + totalp + totalp*totalp + totalp;

          modelp = (int*)malloc(
                         sizeof(int)
                         *
                         paramc
          );

          if (modelp == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < model_param_count; i++)
          {
              modelp[i] = model_parameters[i];
          }

          for (i = 0; i < numcoord; i++)
          {
              modelp[model_param_count+i] = optimal_proc_arrangement[i];
          }

          /*
           * Get the speeds
           */
          for (i = 0; i < totalp; i++)
          {
              modelp[model_param_count+numcoord+i] = (int)HMPI_Comm_world_performances[i];
          }

          /*
          *  Initilaize the communication link data
          */
          for (i = 0; i < totalp*totalp; i++)
          {
              modelp[model_param_count+numcoord+totalp+i] = 0;
          }

          /*
          *  Initilaize the communication link data flags
          */
          for (i = 0; i < totalp; i++)
          {
              modelp[model_param_count+numcoord+totalp+totalp*totalp+i] = 0;
          }
          
          rc = HMPI_Group_create(
                   gid,
                   model,
                   modelp,
                   paramc
          );

          if (rc != HMPI_OK)
          {
             return rc;
          }

          /*
           * Fetch the Net structure from the map
           * using the network identifier.
           * Fill in the optimal parameters
           */
          {
             HMPI_Net* pnet;
             int index_of_net;
   
             pnet = HMPI_Get_net_from_map(
                        gid,
                        &index_of_net
             );
   
             if (pnet == NULL)
             {
                return HMPI_ERR_GROUP_NOT_EXIST;
             }
   
             pnet->numcoord = numcoord;
   
             pnet->coord = (int*)malloc(
                                 sizeof(int)
                                 *
                                 numcoord
             );

             if (pnet->coord == NULL)
             {
                return MPC_ERR_NOMEM;
             }

             for (i = 0; i < numcoord; i++)
             {
                pnet->coord[i] = optimal_proc_arrangement[i];
             }

             pnet->time = mintime;
          }

          free(parr);
          free(optimal_proc_arrangement);
          free(modelp);
       }
       
       return HMPI_OK;
   }   

   /*-----------------------------------------------------*/

