

   /************************************************/
   /* Implementation of Partitioning Interfaces of */
   /* Sets using processor graphs with speed and   */
   /* memory. The elements of the set have weights.*/
   /*                                              */
   /* Revision history                             */
   /* 01-07-2003  --      Initial version          */
   /************************************************/

   #include <math.h>
   #include <hmpi.h>

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int *rearranged_weights;
       int *rearrangedw;
       int *allocations;
       int sumw = 0;
       int i, j, rc;
       int *Size_of_bin, *Current_bin_capacity;
       double *speeds_opt;
       int temp, temp_number;

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       /*
        * We rearrange the element weights 
        * in descending order.
	*/
       {
          rearranged_weights = (int*)malloc(
                                     sizeof(int)
              		             *
				     n
          );

          if (rearranged_weights == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          rearrangedw = (int*)malloc(
                              sizeof(int)
   	                      *
			      n
          );

          if (rearrangedw == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < n; i++)
          {
              rearrangedw[i] = i;
              rearranged_weights[i] = w[i];
          }

          for (i = 0; i < n; i++)
          {
              for (j = 1; j < n; j++)
              {
                  if (rearranged_weights[j-1] < rearranged_weights[j])
                  {
                     temp = rearranged_weights[j-1];
	             rearranged_weights[j-1] = rearranged_weights[j];
		     rearranged_weights[j] = temp;

		     temp_number = rearrangedw[j-1];
		     rearrangedw[j-1] = rearrangedw[j];
		     rearrangedw[j] = temp_number;
                  }
              }
          }
       }

       allocations = (int*)malloc(
                           sizeof(int)
                           *
                           n
       );

       if (allocations == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       Current_bin_capacity = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
       );

       if (Current_bin_capacity == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
          Current_bin_capacity[i] = 0;
       }

       for (i = 0; i < n; i++)
       {
           int waste = INT_MAX;
	   int chosen = -1;

	   for (j = 0; j < p; j++)
           {
               if ((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j])
               {
		  int wastej = (
			   	    Size_of_bin[j]
                                    -
				    (
				      Current_bin_capacity[j]
				      +
				      w[i]
                                    )
                  );

		  if (wastej < waste)
                  {
                     chosen = j;
		     waste = wastej;
                  }
               }
           }

	   if (chosen == -1)
           {
              waste = INT_MAX;

	      for (j = 0; j < p; j++)
              {
                  int wastej = fabs(
				 Size_of_bin[j]
                                 -
				 (
                                   Current_bin_capacity[j]
                                   +
                                   w[i]
                                 )
                  );

	          if (wastej < waste)
                  {
                     chosen = j;
	             waste = wastej;
                  }
              }
           }

	   allocations[i] = chosen;
	   Current_bin_capacity[chosen] += w[i];
       }

       if (metric == NULL)
       {
          for (i = 0; i < n; i++)
          {
             np[rearrangedw[i]] = allocations[i];
          }

          free(Size_of_bin);
          free(Current_bin_capacity);
	  free(rearrangedw);
	  free(rearranged_weights);
	  free(allocations);
	  free(speeds_opt);

          return HMPI_OK;
       }
       
       /*
        * The ideal sum of weights is given by 
        * elements of array Size_of_bin and the 
        * actual sum of weights is calculated for array elements
        * of Current_bin_capacity.
        */
       switch (type_of_metric)
       {
          case USER_SPECIFIED:
            {
               *metric = (*umf)(
                          p,
                          speeds_opt,
                          Current_bin_capacity,
                          Size_of_bin
               );
            }
            break;
          case SYSTEM_DEFINED:
            {
               *metric = __HMPI_System_defined_metric(
                         p,
                         speeds_opt,
                         Current_bin_capacity,
                         Size_of_bin
               );
            }
            break;
          default:
            {
               return HMPI_ERR_METRIC;
            }
            break;
       }

       for (i = 0; i < n; i++)
       {
          np[rearrangedw[i]] = allocations[i];
       }

       {
          free(Size_of_bin);
          free(Current_bin_capacity);
	  free(rearrangedw);
	  free(rearranged_weights);
	  free(allocations);
	  free(speeds_opt);
       }

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       const int *mlimits,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int i, j, k, rc;
       int *rearranged_weights;
       int *rearrangedw;
       int temp;
       int temp_number, temp_mlimit;
       int *allocations;
       int *Size_of_bin, *Current_bin_capacity;
       int total_limits = 0;
       int *Open, *Number_in_bin;
       int sumw = 0;
       double *speeds_opt;
       int *shortlist;

       for (i = 0; i < p; i++)
       {
          total_limits += mlimits[i];
       }

       if (total_limits < n)
       {
          printf(
            "The number of elements in the set"
            " is greater than the sum of numbers of elements"
            " the processors can hold or"
            " Partitioning cannot be done with the restrictions"
            " provided\n"
          );

          return HMPI_ERR_MLIMITS;
       }

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       /*
        * We rearrange the element weights 
        * in descending order.
	*/
       {
          rearranged_weights = (int*)malloc(
                                     sizeof(int)
              		             *
				     n
          );

          if (rearranged_weights == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          rearrangedw = (int*)malloc(
                              sizeof(int)
   	                      *
			      n
          );

          if (rearrangedw == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < n; i++)
          {
              rearrangedw[i] = i;
              rearranged_weights[i] = w[i];
          }

          for (i = 0; i < n; i++)
          {
              for (j = 1; j < n; j++)
              {
                  if (rearranged_weights[j-1] < rearranged_weights[j])
                  {
                     temp = rearranged_weights[j-1];
	             rearranged_weights[j-1] = rearranged_weights[j];
		     rearranged_weights[j] = temp;

		     temp_number = rearrangedw[j-1];
		     rearrangedw[j-1] = rearrangedw[j];
		     rearrangedw[j] = temp_number;
                  }
              }
          }
       }

       allocations = (int*)malloc(
                           sizeof(int)
                           *
                           n
       );

       if (allocations == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       Current_bin_capacity = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
       );

       if (Current_bin_capacity == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       if (total_limits == n)
       {
          int ind = 0;
          int *rearranged_mlimits, *rearrangedp;
          double *rearranged_speeds_opt;

          rearrangedp = (int*)malloc(
                              sizeof(int)
                              *
                              p
          );

          if (rearrangedp == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          rearranged_mlimits = (int*)malloc(
                                     sizeof(int)
                                     *
                                     p
          );

          if (rearranged_mlimits == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          rearranged_speeds_opt = (double*)malloc(
                                       sizeof(double)
                                       *
                                       p
          );

          if (rearranged_speeds_opt == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < p; i++)
          {
              rearrangedp[i] = i;
              rearranged_speeds_opt[i] = speeds_opt[i];
              rearranged_mlimits[i] = mlimits[i];

              Current_bin_capacity[i] = 0;
          }

          for (i = 0; i < p; i++)
          {
              for (j = 1; j < p; j++)
              {
                  if (rearranged_mlimits[j-1] > rearranged_mlimits[j])
                  {
                     temp = rearranged_speeds_opt[j-1];
                     rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j];
                     rearranged_speeds_opt[j] = temp;

                     temp_number = rearrangedp[j-1];
                     rearrangedp[j-1] = rearrangedp[j];
                     rearrangedp[j] = temp_number;

                     temp_mlimit = rearranged_mlimits[j-1];
                     rearranged_mlimits[j-1] = rearranged_mlimits[j];
                     rearranged_mlimits[j] = temp_mlimit;
                  }
              }
          }

          /*
           * This looks like a NP-hard problem.
           * We know the number of elements in each subset
           * given by the upper bound.
           * We provide a naive implementation here.
           * This is of complexity O(n*n).
           * We arrange the processors in increasing
           * order of their upper bounds and we arrange
           * the weights in decreasing order.
           */
          for (i = 0; i < p; i++)
          {
             for (j = 0; j < rearranged_mlimits[i]; j++)
             {
                 allocations[ind] = rearrangedp[i];
                 Current_bin_capacity[rearrangedp[i]] += w[ind];
                 ind++;
             }
          }

          for (i = 0; i < n; i++)
          {
              np[rearrangedw[i]] = allocations[i];
          }

          if (metric == NULL)
          {
             free(Size_of_bin);
             free(Current_bin_capacity);
             free(rearranged_weights);
             free(rearranged_mlimits);
             free(rearrangedw);
             free(rearrangedp);
             free(allocations);
             free(speeds_opt);
             free(rearranged_speeds_opt);

             return HMPI_OK;
          }

          /*
           * The ideal sum of weights is given by
           * elements of array Size_of_bin and the
           * actual sum of weights is calculated for array elements
           * of Current_bin_capacity.
           */
          switch (type_of_metric)
          {
             case USER_SPECIFIED:
             {
                 *metric = (*umf)(
                            p,
                            speeds_opt,
                            Current_bin_capacity,
                            Size_of_bin
                  );
             }
             break;
             case SYSTEM_DEFINED:
             {
                 *metric = __HMPI_System_defined_metric(
                           p,
                           speeds_opt,
                           Current_bin_capacity,
                           Size_of_bin
                  );
             }
             break;
             default:
             {
                  return HMPI_ERR_METRIC;
             }
             break;
          }

          free(Size_of_bin);
          free(Current_bin_capacity);
          free(rearranged_weights);
          free(rearranged_mlimits);
          free(rearrangedw);
          free(rearrangedp);
          free(allocations);
          free(speeds_opt);
          free(rearranged_speeds_opt);

          return HMPI_OK;
       }

       Open = (int*)malloc(
                    sizeof(int)
                    *
		    p
       );

       if (Open == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       Number_in_bin = (int*)malloc(
                             sizeof(int)
                             *
		             p
       );

       if (Number_in_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
          Current_bin_capacity[i] = 0;
          Open[i] = 1;
          Number_in_bin[i] = 0;
       }

       shortlist = (int*)malloc(
                         sizeof(int)
                         *
                         p
       );
       
       if (shortlist == NULL)
       {  
          return MPC_ERR_NOMEM;
       }
 
       for (i = 0; i < n; i++)
       {
           int nslist = 0;
	   int chosen = -1;

	   for (j = 0; j < p; j++)
           {
               if (((Current_bin_capacity[j] + w[i]) <= Size_of_bin[j])
                   && (Open[j] == 1
                  )
               )
               {
                  shortlist[nslist++] = j;
               }
           }

	   if (nslist > 0)
           {
              int temp = Size_of_bin[shortlist[0]] 
		         - 
			 Current_bin_capacity[shortlist[0]]
              ;
	      chosen = shortlist[0];

	      for (k = 1; k < nslist; k++)
              {
                  int tempk = Size_of_bin[shortlist[k]]
                              -
			      Current_bin_capacity[shortlist[k]]
		  ;

                  if ((tempk >= temp)
                      && (Open[shortlist[k]] == 1
                     )
                  )
                  {
                     temp = tempk;
		     chosen = shortlist[k];
                  }
              }
           }
           else
           {
              int waste = INT_MAX;
	      for (j = 0; j < p; j++)
              {
                  if (Open[j] == 1)
                  {
                     int wastej = (
				       Current_bin_capacity[j]
				       +
				       w[i]
				       -
				       Size_of_bin[j]
                     );

                     if (wastej < waste)
                     {
                        chosen = j;
			waste = wastej;
                     }
                  }
              }
           }

           if ((Number_in_bin[chosen] + 1) == mlimits[chosen])
           {
	      allocations[i] = chosen;
	      Number_in_bin[chosen]++;
	      Current_bin_capacity[chosen] = Current_bin_capacity[chosen]
		                             +
		   	      		     w[i]
              ;
              Open[chosen] = 0;

              continue;
           }

           if ((Number_in_bin[chosen] + 1) > mlimits[chosen])
           {
              printf("HMPI===> __HMPI_Sum_of_weights_for_nonordered_set_speed_functions_with_mlimits:"
                     " error in code, must not come into this part\n"
              );

              return HMPI_ERR_INTERNAL;
           }

	   allocations[i] = chosen;
	   Number_in_bin[chosen]++;
	   Current_bin_capacity[chosen] = Current_bin_capacity[chosen]
		                          +
					  w[i]
           ;
       }

       free(shortlist);
       free(Open);
       free(Number_in_bin);

       if (metric == NULL)
       {
          for (i = 0; i < n; i++)
          {
              np[rearrangedw[i]] = allocations[i];
          }

          free(Size_of_bin);
          free(Current_bin_capacity);
          free(rearranged_weights);
          free(rearrangedw);
          free(allocations);
          free(speeds_opt);

          return HMPI_OK;
       }

       /*
        * The ideal sum of weights is given by 
        * elements of array Size_of_bin and the 
        * actual sum of weights is calculated for array elements
        * of Current_bin_capacity.
        */
       if (HMPI_Debug_flag)
       {
          printf("Speeds opt are: \n");

          for (i = 0; i < p; i++)
          {
             printf("%0.1f ", speeds_opt[i]);
          }

          printf("\n");

          printf("Current bin capacities are: \n");

          for (i = 0; i < p; i++)
          {
             printf("%d ", Current_bin_capacity[i]);
          }

          printf("\n");

          printf("Sizes of bin are: \n");

          for (i = 0; i < p; i++)
          {
             printf("%d ", Size_of_bin[i]);
          }

          printf("\n");
       }

       switch (type_of_metric)
       {
          case USER_SPECIFIED:
            {
               *metric = (*umf)(
                          p,
                          speeds_opt,
                          Current_bin_capacity,
                          Size_of_bin
               );
            }
            break;
          case SYSTEM_DEFINED:
            {
               *metric = __HMPI_System_defined_metric(
                         p,
                         speeds_opt,
                         Current_bin_capacity,
                         Size_of_bin
               );
            }
            break;
          default:
            {
               return HMPI_ERR_METRIC;
            }
            break;
       }

       for (i = 0; i < n; i++)
       {
           np[rearrangedw[i]] = allocations[i];
       }

       free(Size_of_bin);
       free(Current_bin_capacity);
       free(rearranged_weights);
       free(rearrangedw);
       free(allocations);
       free(speeds_opt);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_ordered_set_speed_functions
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int sumw = 0;
       int sumcum = 0;
       int i, j, rc, prev_proc;
       int *wallocationsc;
       int *Size_of_bin, *Current_bin_capacity;
       double *speeds_opt;

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       wallocationsc = (int*)malloc(
		             sizeof(int)
			     *
			     (p+1)
       );

       if (wallocationsc == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       wallocationsc[0] = 0;
       for (i = 1; i <= p; i++)
       {
           wallocationsc[i] = wallocationsc[i-1] + Size_of_bin[i-1];
       }

       Current_bin_capacity = (int*)malloc(
		                    sizeof(int)
			            *
			            p
       );

       if (Current_bin_capacity == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
           Current_bin_capacity[i] = 0;
           np[i] = 0;
       }

       for (i = 0; i < n; i++)
       {
           prev_proc = 0;
           sumcum += w[i];

           for (j = 0; j < p; j++)
           {
               int Wastej_1, Wastej;

               if ((sumcum > wallocationsc[j])
                   && (sumcum <= wallocationsc[j+1]
	          )
               )
               {
                  if (prev_proc == j)
                  {
                     np[j]++;
		     Current_bin_capacity[j] += w[i];
	             break;
                  }
		  
		  /*
		   * The elements preceding the current one
		   * exactly fit into partition (j-1)
		   */
		  if ((sumcum - w[i]) == wallocationsc[j])
                  {
                     prev_proc = j;
                     np[j]++;
		     Current_bin_capacity[j] += w[i];
	             break;
                  }

		  /*
		   * This is a border element.
		   * The waste is calculated if this element
		   * goes to j-1 or to j.
		   */
                  Wastej_1 = fabs(
				 Size_of_bin[j-1] 
				 - 
				 (
				   Current_bin_capacity[j-1]
                                   +
                                   w[i]
                                 )
                  );

                  Wastej = fabs( (
				  sumw - wallocationsc[j]
                                )
                                -
			        (
				  sumw - sumcum + w[i]
				)
		  );

		  if (Wastej_1 <= Wastej)
                  {
                     np[j-1]++;
		     Current_bin_capacity[j-1] += w[i];
                  }
                  else
                  {
                     np[j]++;
		     Current_bin_capacity[j] += w[i];
                  }
               }
           }
       }

       if (metric == NULL)
       {
          free(wallocationsc);
          free(Size_of_bin);
          free(Current_bin_capacity);
          free(speeds_opt);

          return HMPI_OK;
       }

       /*
        * The ideal sum of weights is given by 
        * elements of array Size_of_bin and the 
        * actual sum of weights is calculated for array elements
        * of Current_bin_capacity.
        */
       switch (type_of_metric)
       {
          case USER_SPECIFIED:
            {
               *metric = (*umf)(
                          p,
                          speeds_opt,
                          Current_bin_capacity,
                          Size_of_bin
               );
            }
            break;
          case SYSTEM_DEFINED:
            {
               *metric = __HMPI_System_defined_metric(
                         p,
                         speeds_opt,
                         Current_bin_capacity,
                         Size_of_bin
               );
            }
            break;
          default:
            {
               return HMPI_ERR_METRIC;
            }
            break;
       }

       free(wallocationsc);
       free(Size_of_bin);
       free(Current_bin_capacity);
       free(speeds_opt);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       const int *mlimits,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int i, j, k, rc, indl;
       int total_limits = 0;
       int mlimits_apply = 0;
       int x, y, l, m, opt_start;
       int wastei, sumtmp, wastef;
       int sumw;
       int *Size_of_bin;
       int total_sub_mlimits;
       double *speeds_opt;

       for (i = 0; i < p; i++)
       {
           total_limits += mlimits[i];
       }

       if (total_limits == n)
       {
          for (i = 0; i < p; i++)
          {
              np[i] = mlimits[i];
          }

          return HMPI_OK;
       }

       if (total_limits < n)
       {
          printf(
            "The number of elements in the set"
            " is greater than the sum of numbers of elements"
            " the processors can hold or"
            " Partitioning cannot be done with the restrictions"
            " provided\n"
          );

          return HMPI_ERR_MLIMITS;
       }

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions(
                p,
                pn,
                speeds,
                psizes,
                n,
                w,
                type_of_metric,
                umf,
                metric,
                np
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       for (i = 0; i < p; i++)
       {
           if (np[i] > mlimits[i])
           {
              mlimits_apply = 1;
              break;
           }
       }

       if (mlimits_apply == 0)
       {
          return HMPI_OK;
       }

       for (i = 0; i < p; i++)
       {
           if (np[i] <= mlimits[i])
           {
              continue;
           }

	   /*
	    * We try to distribute the remaining 
	    * elements to the processors following it
	    */
           if (i == 0)
           {
              int reduced_set_size;
              np[i] = mlimits[i];
              reduced_set_size = n - np[i];

              if (HMPI_Debug_flag)
              {
                 printf("HMPI===> __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: mlimits = %d, Reduced set size = %d\n", mlimits[i], reduced_set_size);
              }
                 
              free(Size_of_bin);
              free(speeds_opt);

	      return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits(
                     p-1,
                     pn,
	             (speeds + pn),
		     (psizes + pn),
		     (mlimits + 1),
                     reduced_set_size,
		     (w + np[i]),
                     type_of_metric,
                     umf,
                     metric,
		     np + 1
              );
           }

	   /*
	    * If this is the last processor,
	    * we try to distribute the remaining 
	    * elements to the processors preceding it
	    */
	   if (i == (p - 1))
           {
              int reduced_set_size = 0;
                 
              for (j = 0; j < i; j++)
              {
                  reduced_set_size += np[j];
              }

              reduced_set_size += (np[i] - mlimits[i]);
              np[i] = mlimits[i];

              free(Size_of_bin);
              free(speeds_opt);

	      return __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits(
                     p-1,
                     pn,
	             speeds,
		     psizes,
		     mlimits,
                     reduced_set_size,
		     w,
                     type_of_metric,
                     umf,
                     metric, 
		     np
              );
	   }

           if (HMPI_Debug_flag)
           {
              printf(
                "HMPI===> "
                "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: "
                "Processor %d has upper bound exceeded\n",
                i
              );

              printf("HMPI===> Allocations are: \n");
              for (k = 0; k < p; k++)
              {
                 printf("%d ", np[k]); 
              }
              printf("\n");

              printf("HMPI===> element limits are:\n");
              for (k = 0; k < p; k++)
              {
                 printf("%d ", mlimits[k]); 
              }
              printf("\n");
           }

           for (k = i+1, total_sub_mlimits = 0; k < p; k++)
           {
               total_sub_mlimits += mlimits[k];
           }

	   /*
	    * Find the maximum subsequence of elements, the number
	    * of elements being equal to mlimits[i] and packing these
	    * element into bin i generates least amount of waste
	    */
           l = 0;
	   for (k = 0; k < i; k++)
           {
               l += np[k];
           }

           indl = l;

           do 
           {
               wastei = INT_MAX;

	       for (x = indl; x < (indl+np[i]); x++)
               {
                  if (((indl+np[i]) - x) < mlimits[i])
                  {
                     break;
                  }

	          sumtmp = 0;

	          for (y = 0; y < mlimits[i]; y++)
                  {
                      sumtmp += w[x+y];
                  }

                  wastef = fabs(sumtmp - Size_of_bin[i]);

                  if (HMPI_Debug_flag)
                  {
                     printf("x=%d, wastef=%d, Size of bin=%d   ", x, wastef, Size_of_bin[i]);
                  }

	          if (wastef < wastei)
                  {
                     wastei = wastef;
	             opt_start = x;
                  }
               }
 
               if (HMPI_Debug_flag)
               {
                  printf("\n");
               }

	       l = opt_start;
	       m = l + mlimits[i];

               indl++;
           }
           while ((n-m) > total_sub_mlimits);

           np[i] = mlimits[i];

           if (HMPI_Debug_flag)
           {
              printf(
                "HMPI===> "
                "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits: "
                "Total number of elements=%d,"
                "Number of elements to be redistributed before=%d,"
                " elements after the element %d to be redistributed\n",
                n,
                l,
                m
              );
           }

	   /*
	    * spread the elements {0, 1, ..., l-1}
	    * amongst the processors before i
	    */
	   rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits(
                    i,
                    pn,
		    speeds,
                    psizes,
		    mlimits,
		    l,
                    w,
                    type_of_metric,
                    umf,
                    metric, 
		    np
           );

	   if (rc != HMPI_OK)
           {
              return rc;
           }

	   /*
	    * spread the elements {m+1, m+2, ..., n-1}
	    * amongst the processors following i
	    */
	   rc = __HMPI_Sum_of_weights_for_ordered_set_speed_functions_with_mlimits(
                    p-(i+1),
                    pn,
		    (speeds+(i+1)*pn),
                    (psizes+(i+1)*pn),
		    (mlimits+i+1),
                    (n-l-mlimits[i]),
		    (w+l+mlimits[i]),
                    type_of_metric,
                    umf,
                    metric, 
		    (np+i+1)
           );

	   if (rc != HMPI_OK)
           {
              return rc;
           }

           free(Size_of_bin);
           free(speeds_opt);

           break;
       }

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int sumw = 0;
       int sumcum = 0;
       int ind, i, j, rc, prev_proc;
       int *wallocationsc, *allocations;
       int *Size_of_bin, *Current_bin_capacity;
       double *speeds_opt, *rearranged_speeds_opt;
       int *rearranged_size_of_bin;
       int *rearrangedp;
       int temp, temp_number;

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       rearranged_speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (rearranged_speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       rearranged_size_of_bin = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
       );

       if (rearranged_size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       rearrangedp = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (rearrangedp == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {   
           rearrangedp[i] = i;
           rearranged_size_of_bin[i] = Size_of_bin[i];
           rearranged_speeds_opt[i] = speeds_opt[i];
       }
          
       for (i = 0; i < p; i++)
       {   
           for (j = 1; j < p; j++)
           {   
              if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j])
              {  
                 temp = rearranged_size_of_bin[j-1];
                 rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j];
                 rearranged_size_of_bin[j] = temp;
                     
                 temp = rearranged_speeds_opt[j-1];
                 rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j];
                 rearranged_speeds_opt[j] = temp;
                     
                 temp_number = rearrangedp[j-1];
                 rearrangedp[j-1] = rearrangedp[j];
                 rearrangedp[j] = temp_number;
              }
           }
       }

       wallocationsc = (int*)malloc(
		             sizeof(int)
			     *
			     (p+1)
       );

       if (wallocationsc == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       wallocationsc[0] = 0;
       for (i = 1; i <= p; i++)
       {
           wallocationsc[i] = wallocationsc[i-1] + rearranged_size_of_bin[i-1];
       }

       Current_bin_capacity = (int*)malloc(
		                    sizeof(int)
			            *
			            p
       );

       if (Current_bin_capacity == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       allocations = (int*)malloc(
		           sizeof(int)
		           *
			   p
       );

       if (allocations == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
           Current_bin_capacity[i] = 0;
           allocations[i] = 0;
       }

       for (i = 0; i < n; i++)
       {
           prev_proc = 0;
           sumcum += w[i];

           for (j = 0; j < p; j++)
           {
               int Wastej_1, Wastej;

               if ((sumcum > wallocationsc[j])
                   && (sumcum <= wallocationsc[j+1]
	          )
               )
               {
                  if (prev_proc == j)
                  {
                     allocations[j]++;
		     Current_bin_capacity[j] += w[i];
	             break;
                  }
		  
		  /*
		   * The elements preceding the current one
		   * exactly fit into partition (j-1)
		   */
		  if ((sumcum - w[i]) == wallocationsc[j])
                  {
                     prev_proc = j;
                     allocations[j]++;
		     Current_bin_capacity[j] += w[i];
	             break;
                  }

		  /*
		   * This is a border element.
		   * The waste is calculated if this element
		   * goes to j-1 or to j.
		   */
                  Wastej_1 = fabs(
				 rearranged_size_of_bin[j-1]
				 - 
				 (
				   Current_bin_capacity[j-1]
                                   +
                                   w[i]
                                 )
                  );

                  Wastej = fabs( (
				  sumw - wallocationsc[j]
                                )
                                -
			        (
				  sumw - sumcum + w[i]
				)
		  );

		  if (Wastej_1 <= Wastej)
                  {
                     allocations[j-1]++;
		     Current_bin_capacity[j-1] += w[i];
                  }
                  else
                  {
                     allocations[j]++;
		     Current_bin_capacity[j] += w[i];
                  }
               }
           }
       }

       if (metric == NULL)
       {
          for (ind = 0, i = 0; i < p; i++)
          {
             np[ind++] = rearrangedp[i];
             np[ind++] = allocations[i];
          }

          free(wallocationsc);
          free(Size_of_bin);
          free(rearranged_size_of_bin);
          free(rearrangedp);
          free(Current_bin_capacity);
          free(speeds_opt);
          free(rearranged_speeds_opt);
          free(allocations);

          return HMPI_OK;
       }
 
       /*
        * The ideal sum of weights is given by 
        * elements of array Size_of_bin and the 
        * actual sum of weights is calculated for array elements
        * of Current_bin_capacity.
        */
       switch (type_of_metric)
       {
          case USER_SPECIFIED:
            {
               *metric = (*umf)(
                          p,
                          rearranged_speeds_opt,
                          Current_bin_capacity,
                          rearranged_size_of_bin
               );
            }
            break;
          case SYSTEM_DEFINED:
            {
               *metric = __HMPI_System_defined_metric(
                         p,
                         rearranged_speeds_opt,
                         Current_bin_capacity,
                         rearranged_size_of_bin
               );
            }
            break;
          default:
            {
               return HMPI_ERR_METRIC;
            }
            break;
       }

       for (ind = 0, i = 0; i < p; i++)
       {
          np[ind++] = rearrangedp[i];
          np[ind++] = allocations[i];
       }

       free(wallocationsc);
       free(Size_of_bin);
       free(rearranged_size_of_bin);
       free(rearrangedp);
       free(Current_bin_capacity);
       free(speeds_opt);
       free(rearranged_speeds_opt);
       free(allocations);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       const int *mlimits,
       int n,
       const int *w,
       int type_of_metric,
       User_defined_metric umf,
       double *metric,
       int *np
   )
   {
       int sumw = 0;
       int *allocations;
       int *Size_of_bin, *rearranged_size_of_bin;
       double *speeds_opt, *rearranged_speeds_opt;
       int *rearrangedp;
       int *rearranged_mlimits;
       double temp;
       int temp_number;
       int i, j, rc, ind;
       int total_limits = 0;
       int mlimits_apply = 0;

       for (i = 0; i < p; i++)
       {
          total_limits += mlimits[i];
       }

       if (total_limits == n)
       {
          for (i = 0, ind = 0; i < p; i++)
          {
              np[ind++] = i;
              np[ind++] = mlimits[i];
          }

          return HMPI_OK;
       }

       if (total_limits < n)
       {
          printf(
            "The number of elements in the set"
            " is greater than the sum of numbers of elements"
            " the processors can hold or"
            " Partitioning cannot be done with the restrictions"
            " provided\n"
          );

          return HMPI_ERR_MLIMITS;
       }

       for (i = 0; i < n; i++)
       {
          sumw += w[i];
       }

       Size_of_bin = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (Size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
        * Assume the application programmer represents
        * speeds as function of problem size and 
        * problem size is measured in terms of the weights
        * of the elements.
        */
       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                sumw,
                speeds_opt,
                Size_of_bin
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       rearranged_speeds_opt = (double*)malloc(
                             sizeof(double)
                             *
                             p
       );

       if (rearranged_speeds_opt == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       rearranged_size_of_bin = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
       );

       if (rearranged_size_of_bin == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       rearrangedp = (int*)malloc(
                           sizeof(int)
                           *
                           p
       );

       if (rearrangedp == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       rearranged_mlimits = (int*)malloc(
                                  sizeof(int)
                                  *
                                  p
       );

       if (rearranged_mlimits == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {   
           rearrangedp[i] = i;
           rearranged_mlimits[i] = mlimits[i];
           rearranged_size_of_bin[i] = Size_of_bin[i];
           rearranged_speeds_opt[i] = speeds_opt[i];
       }
          
       for (i = 0; i < p; i++)
       {   
           for (j = 1; j < p; j++)
           {   
              if (rearranged_size_of_bin[j-1] < rearranged_size_of_bin[j])
              {  
                 temp = rearranged_size_of_bin[j-1];
                 rearranged_size_of_bin[j-1] = rearranged_size_of_bin[j];
                 rearranged_size_of_bin[j] = temp;
                     
                 temp = rearranged_speeds_opt[j-1];
                 rearranged_speeds_opt[j-1] = rearranged_speeds_opt[j];
                 rearranged_speeds_opt[j] = temp;
                     
                 temp_number = rearrangedp[j-1];
                 rearrangedp[j-1] = rearrangedp[j];
                 rearrangedp[j] = temp_number;

                 temp_number = rearranged_mlimits[j-1];
                 rearranged_mlimits[j-1] = rearranged_mlimits[j];
                 rearranged_mlimits[j] = temp_number;
              }
           }
       }

       allocations = (int*)malloc(
		           sizeof(int)
		           *
			   p
       );

       if (allocations == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
           allocations[i] = 0;
       }

       if (HMPI_Debug_flag)
       {
          printf(
             "HMPI===> "
             "__HMPI_Sum_of_weights_for_ordered_set_speed_functions_processor_reordering_with_mlimits:"
             " Optimal speeds are: \n"
          );

          for (i = 0; i < p; i++)
          {
             printf("%.1f ", rearranged_speeds_opt[i]);
          }

          printf("\n");
       }

       rc = __HMPI_Apply_mlimits_to_ordered_sum_of_weights(
                  p,
                  n,
                  rearranged_speeds_opt,
                  rearranged_mlimits,
                  w,
                  -1,
                  NULL,
                  NULL,
                  allocations
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       /*
        * Calculate the metric
        */
       if (metric != NULL)
       {
          int i, ind, j;
          int *Current_bin_capacity = (int*)malloc(
                                          sizeof(int)
                                          *
                                          p
          );

          if (Current_bin_capacity == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (i = 0; i < p; i++)
          {
             Current_bin_capacity[i] = 0;
          }

          for (i = 0, ind = 0; i < p; i++)
          {
             for (j = 0; j < allocations[i]; j++)
             {
                Current_bin_capacity[i] += w[ind++];
             }
          }

          /*
           * The ideal sum of weights is given by
           * elements of array Size_of_bin and the
           * actual sum of weights is calculated for array elements
           * of Current_bin_capacity.
           */
          switch (type_of_metric)
          {
             case USER_SPECIFIED:
               {
                  *metric = (*umf)(
                            p,
                            rearranged_speeds_opt,
                            Current_bin_capacity,
                            rearranged_size_of_bin
                  );
               }
               break;
             case SYSTEM_DEFINED:
               {
                  *metric = __HMPI_System_defined_metric(
                            p,
                            rearranged_speeds_opt,
                            Current_bin_capacity,
                            rearranged_size_of_bin
                  );
               }
               break;
             default:
               {
                  return HMPI_ERR_METRIC;
               }
               break;
          }

          free(Current_bin_capacity);
       }

       for (ind = 0, i = 0; i < p; i++)
       {
          np[ind++] = rearrangedp[i];
          np[ind++] = allocations[i];
       }

       free(Size_of_bin);
       free(rearranged_size_of_bin);
       free(rearrangedp);
       free(speeds_opt);
       free(rearranged_speeds_opt);
       free(rearranged_mlimits);
       free(allocations);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

