
   /************************************************/
   /* Implementation of Partitioning Interfaces of */
   /* Sets using processor graphs with speed and   */
   /* memory                                       */
   /*                                              */
   /* Revision history                             */
   /* 01-07-2003  --      Initial version          */
   /************************************************/

   #include <stdio.h>
   #include <stdlib.h>
   #include <string.h>

   #include <math.h>
   #include <hmpi.h>

   static int _HMPI_Bisection_count = 0;

   /*-----------------------------------------------------*/

   int 
   __HMPI_Distribute_with_single_number_for_speed
   (
      int n,
      int p,
      const double *s,
      double *npd
   )
   {
       int i, left, rc, sum = 0;
       int* npp = (int*)malloc(
                        sizeof(int)
                        *
                        p
       );

       if (npp == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       for (i = 0; i < p; i++)
       {
	   sum += npd[i];
       }

       left = n - sum;

       if (left > 0)
       {
          rc = __HMPI_Number_of_elements_proportional_to_speed(
                     p,
		     left,
		     s,
		     npp
          );

	  if (rc != HMPI_OK)
          {
             return rc;
          }

          for (i = 0; i < p; i++)
          {
	      npd[i] += npp[i];
          }
       }
       else
       {
          rc = __HMPI_Number_of_elements_proportional_to_speed(
                     p,
		     sum - n,
		     s,
		     npp
          );

	  if (rc != HMPI_OK)
          {
             return rc;
          }

          for (i = 0; i < p; i++)
          {
	      npd[i] -= npp[i];
          }
       }

       free(npp);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Recursive_bisection_middle_region
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       int n,
       double slopei,
       double slopef,
       double *speeds_opt,
       double *npd
   )
   {
       int i, j, rc;
       double sumd;
       double slope, slopeii, slopeff;
       double slope_1_plus_2;
       double *s = (double*)malloc(
                            sizeof(double)
                            *
                            p
       );

       if (s == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       /*
	* Now use the straight line with half slope
        */
       slope_1_plus_2 = (double)(slopei+slopef)
                        /
                       (double)(1 - slopei*slopef);

       if (slope_1_plus_2 > 0)
       {
          slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) - 1)
	          /
	          (double)slope_1_plus_2;
       }

       if (slope_1_plus_2 < 0)
       {
          slope = (double)(sqrt(1+pow(slope_1_plus_2, 2)) + 1)
	          /
	          (double)(-slope_1_plus_2);
       }

       if (HMPI_Debug_flag)
       {
          printf(
            "HMPI===> __HMPI_Recursive_bisection_middle_region: Slope is %0.10f, %0.10f, %0.10f\n", 
            slopei, slopef, slope
          );
       }

       for (i = 0; i < p; i++)
       {
           double x, y;
           double xj = (double)speeds[i*pn]
		       /
		       (double)slope;
           int intersection_found = 0;

	   if ((xj > 0)
	       && (xj <= psizes[i*pn]
	      )
           )
           {
              npd[i] = xj;
              s[i] = (double)speeds[i*pn];
              speeds_opt[i] = s[i];
              intersection_found = 1;
              continue;
           }

           for (j = 0; j < (pn - 1); j++)
           {
               /*
                * If speed is a increasing function
                * There are two possibilities, that is
                * two equations
                * y=a*x+c
                *   or
                * y=a*x-c
                * Evaluate both
                *
                * If speed is a decreasing function
                * There is only one possibility, that is
                * one equation
                * y=-a*x+c
                */
               if (speeds[i*pn + j + 1] > speeds[i*pn + j])
               {
                  double ci = (double)
			   (
                              speeds[i*pn + j + 1]*psizes[i*pn + j]
			      - 
			      speeds[i*pn + j]*psizes[i*pn + j + 1]
			   )
			   /
			   (double)
			   (
                              psizes[i*pn + j] 
			      -
			      psizes[i*pn + j + 1]
			   );

	          double denom = (
                                slope
                              )
			      -
			      (
			        (double)
			        (
			           speeds[i*pn + j + 1]
			           -
			           speeds[i*pn + j]
                                )
			        /
			        (double)
			        (
                                   psizes[i*pn + j + 1] 
			           -
			           psizes[i*pn + j]
			        )
			      );

	          x = (double)ci
	              /
	              (double)denom;

                  y = (slope)*x;

                  if (HMPI_Debug_flag)
                  {
                     printf(
                       "HMPI===> __HMPI_Recursive_bisection_middle_region:"
                       " equation of the form y=a*x+c"
                       " x=%d, y=%.3f\n",
                       x, y
                     );
                  }

                  /*
                   * x and y must both be positive
                   * Values of x=0, y=0 not acceptable
                   */
                  if ((x > 0)
                      && (y > 0
                     )
                  )
                  {
                     ;
                  }
                  else
                  {
                      ci = (double)
			   (
                              speeds[i*pn + j + 1]*psizes[i*pn + j]
                              -
                              2*speeds[i*pn + j]*psizes[i*pn + j]
			      +
			      speeds[i*pn + j]*psizes[i*pn + j + 1]
			   )
			   /
			   (double)
			   (
                              psizes[i*pn + j] 
			      -
			      psizes[i*pn + j + 1]
			   );

	              denom = (
			        (double)
			        (
			           speeds[i*pn + j + 1]
			           -
			           speeds[i*pn + j]
                                )
			        /
			        (double)
			        (
                                   psizes[i*pn + j] 
			           -
			           psizes[i*pn + j + 1]
			        )
			      )
                              -
                              (
                                slope
                              )
                              ;

	              x = (double)ci
	                  /
	                  (double)denom;

                      y = (slope)*x;

                      if (HMPI_Debug_flag)
                      {
                         printf(
                           "HMPI===> __HMPI_Recursive_bisection_middle_region:"
                           " equation of the form y=a*x-c"
                           " x=%d, y=%.3f\n",
                           x, y
                         );
                      }

                      if ((x > 0)
                          && (y > 0
                         )
                      )
                      {
                         ;
                      }
                      else
                      {
                         if (HMPI_Debug_flag)
                         {
                            printf(
                              "HMPI===> __HMPI_Recursive_bisection_middle_region:"
                              " Processor %d speed functions do not fit the "
                              "restrictions\n",
                              i
                            );
                         }
                      }
                  }
               }
               else
               {
                  double ci = (double)
			   (
                              speeds[i*pn + j + 1]*psizes[i*pn + j]
			      - 
			      speeds[i*pn + j]*psizes[i*pn + j + 1]
			   )
			   /
			   (double)
			   (
                              psizes[i*pn + j] 
			      -
			      psizes[i*pn + j + 1]
			   );

	          double denom = (
                                slope
                              )
			      +
			      (
			        (double)
			        (
			           speeds[i*pn + j + 1]
			           -
			           speeds[i*pn + j]
                                )
			        /
			        (double)
			        (
                                   psizes[i*pn + j] 
			           -
			           psizes[i*pn + j + 1]
			        )
			      );

	          x = (double)ci
	              /
	              (double)denom;

                  y = (slope)*x;

                  if (HMPI_Debug_flag)
                  {
                     printf(
                       "HMPI===> __HMPI_Recursive_bisection_middle_region:"
                       " equation of the form y=-a*x+c"
                       " x=%d, y=%.3f\n",
                       x, y
                     );
                  }

                  if ((x > 0)
                      && (y > 0
                     )
                  )
                  {
                     ;
                  }
                  else
                  {
                         if (HMPI_Debug_flag)
                         {
                            printf(
                              "HMPI===> __HMPI_Recursive_bisection_middle_region:"
                              " Processor %d speed functions do not fit the "
                              "restrictions\n",
                              i
                            );
                         }
                  }
               }

	       if (((x >= psizes[i*pn + j])
                    && (x <= psizes[i*pn + j + 1]
                    )
                   )
                   && 
		   ((y <= speeds[i*pn + j])
                    && (y >= speeds[i*pn + j + 1]
                    )
                  )
               )
               {
		  npd[i] = x;
                  s[i] = y;
                  speeds_opt[i] = y;
                  intersection_found = 1;
                  break;
               }
           }

	   if (intersection_found == 0)
           {
              npd[i] = x;
              s[i] = y;
              speeds_opt[i] = y;
           }
       }

       /*
	* If the sum is equal to n, we have a 
	* perfect fit.
	*/
       sumd = 0.0;
       for (i = 0; i < p; i++)
       {
	   sumd += npd[i];
       }

       if (((floor(sumd)) == n)
           || ((ceil(sumd)) == n
          )
       )
       {
          int sum = 0;

          for (i = 0; i < p; i++)
          {
	     sum += floor(npd[i]);
          }

          if (sum == n)
          {
             return HMPI_OK;
          }

          for (i = 0; i < p; i++)
          {
             npd[i] = npd[i] + 1;

             sum = 0;

             for (j = 0; j < p; j++)
             {
                sum += floor(npd[j]);
             }

             if (sum == n)
             {
                break;
             }
          }

          return HMPI_OK;
       }

       if (HMPI_Debug_flag)
       {
          printf("HMPI===> __HMPI_Recursive_bisection_middle_region: Sum is %0.6f\n", sumd);
       }

       /*
        * After HMPI_MAX_BISECTION_STEPS steps, we have not
        * arrived at a solution. It is known that for bisection, no more 
        * than 53 iterations are needed to obtain full single precision.
        * Distribute the rest of the elements of the set 
        * using the speeds at the current point.
        */
       if (_HMPI_Bisection_count == HMPI_MAX_BISECTION_STEPS)
       {
          for (i = 0; i < p; i++)
          {
             speeds_opt[i] = s[i];
          }

          rc = __HMPI_Distribute_with_single_number_for_speed(
                     n, 
                     p, 
                     s,
                     npd
          );

          if (rc != HMPI_OK)
          {
             return rc;
          }

          free(s);

          return HMPI_OK;
       }

       free(s);

       _HMPI_Bisection_count++;

       if (sumd > n)
       {
          slopeii = slopei;
	  slopeff = slope;

	  return __HMPI_Recursive_bisection_middle_region(
                     p,
		     pn,
		     speeds,
		     psizes,
                     n,
                     slopeii,
		     slopeff,
                     speeds_opt,
		     npd
          );
       }
       
       slopeii = slope;
       slopeff = slopef;

       return __HMPI_Recursive_bisection_middle_region(
                  p,
		  pn,
		  speeds,
		  psizes,
                  n,
                  slopeii,
		  slopeff,
                  speeds_opt,
		  npd
       );
   }
   
   /*-----------------------------------------------------*/

   int __HMPI_Speed_function_of_problem_size_with_mlimits
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       const int *bounds,
       int n,
       double *speeds_opt,
       int *np
   )
   {
       int i, j, rc;
       int bound_exceeded = 0;
       double sumd = 0;

       _HMPI_Bisection_count  = 0;

       for (i = 0; i < p; i++)
       {
          sumd += bounds[i];
       }

       if (sumd < n)
       {	        
          printf(
            "Problem size %d cannot be solved, memory bounds "
            "on the number of elements that can be stored by "
            "each processor exceeded\n", n);
          return HMPI_ERR_PARTITION_SET;
       }

       if (sumd == n)
       {	        
          for (i = 0; i < p; i++)
          {
             np[i] = bounds[i];
          }

	  return HMPI_OK;
       }

       rc = __HMPI_Speed_function_of_problem_size(
                p,
                pn,
                speeds,
                psizes,
                n,
                speeds_opt,
                np
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       for (i = 0; i < p; i++)
       {
           if (np[i] > bounds[i])
           {
              np[i] = bounds[i];

              bound_exceeded = 1;

              break;
           }
       }

       if (bound_exceeded == 1)
       {
          int k;
          int ind = 0;
          int sind = 0;
          int mind = 0;
          int *psizesm;
          double *speedsm;
          double *speedsm_opt;
          int *npm;
          int *boundsm;
          int nm = n - bounds[i];

          speedsm = (double*)malloc(
                             sizeof(double)
                             *
                             (p-1)
                             *
                             pn
          );        

          if (speedsm == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          speedsm_opt = (double*)malloc(
                             sizeof(double)
                             *
                             (p-1)
                             *
                             pn
          );        

          if (speedsm_opt == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          psizesm  = (int*)malloc(
                           sizeof(int)
                           *
                           (p-1)
                           *
                           pn
          );        

          if (psizesm  == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          npm = (int*)malloc(
                      sizeof(int)
                      *
                      (p-1)
          );        

          if (npm == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          boundsm = (int*)malloc(
                          sizeof(int)
                          *
                          (p-1)
          );        

          if (boundsm == NULL)
          {
             return MPC_ERR_NOMEM;
          }

          for (j = 0; j < p; j++)
          {
             if (j == i)
             {
                continue;
             }

             npm[ind] = np[j];
             boundsm[ind] = bounds[j];
             ind++;

             for (k = 0; k < pn; k++)
             {
                 speedsm[sind++] = speeds[j*pn + k];
                 psizesm[mind++] = psizes[j*pn + k];
             }
          }

          rc = __HMPI_Speed_function_of_problem_size_with_mlimits(
                   p-1,
                   pn,
                   speedsm,
                   psizesm,
                   boundsm,
                   nm,
                   speedsm_opt,
                   npm
          );

          if (rc != HMPI_OK)
          {
             return rc;
          }

          for (ind  = 0, j = 0; j < p; j++)
          {
             if (j == i)
             {
                continue;
             }

             np[j] = npm[ind];
             speeds_opt[j] = speedsm_opt[ind];
             ind++;
          }

          free(speedsm);
          free(speedsm_opt);
          free(psizesm);
          free(boundsm);
          free(npm);
       }

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

   int __HMPI_Speed_function_of_problem_size
   (
       int p,
       int pn,
       const double *speeds,
       const int *psizes,
       int n,
       double *speeds_opt,
       int *np
   )
   {
       int rc, ind, i, j;
       double temp, sumd;
       double slopei, slopef;
       double *npd = (double*)malloc(
                              sizeof(double)
                              *
                              p
       );

       if (npd == NULL)
       {
          return MPC_ERR_NOMEM;
       }

       _HMPI_Bisection_count = 0;

       /*
	* Start with a straight line with maximum slope
	* at first point
	*/
       temp = (double)speeds[0]
	      /
	      (double)psizes[0];
       ind = 0;
       for (i = 1; i < p; i++)
       {
           double temps = (double)speeds[i*pn]
		          /
		          (double)psizes[i*pn];

           if (temp < temps)
           {
              temp = temps;
              ind = i;
           }
       }

       /*
	* Solve the equations 
	* y = (maximum slope)*x and 
	* y0 = c0, y1 = c1, ... for points
	* x0, x1, x2, ...,xp-1
	*/
       npd[ind] = psizes[ind*pn];
       for (i = 0; i < p; i++)
       {
           double xj;

	   if (i == ind)
           {
              continue;
           }

           xj = (
	          (double)psizes[ind*pn]
	          /
	          (double)speeds[ind*pn]
                )
	        *
	        speeds[i*pn];

           npd[i] = xj;
           speeds_opt[i] = speeds[i*pn];
       }

       /*
	* If the sum is equal to n, we have a 
	* perfect fit.
	*/
       sumd = 0.0;
       for (i = 0; i < p; i++)
       {
	  sumd += npd[i];
       }

       if (((floor(sumd)) == n)
           || ((ceil(sumd)) == n
           )
       )
       {
          int sum = 0;

          for (i = 0; i < p; i++)
          {
	     sum += floor(npd[i]);
          }

          if (sum == n)
          {
             for (i = 0; i < p; i++)
             {
                np[i] = floor(npd[i]);
             }

             return HMPI_OK;
          }

          for (i = 0; i < p; i++)
          {
             npd[i] = npd[i] + 1;

             sum = 0;

             for (j = 0; j < p; j++)
             {
                 sum += floor(npd[j]);
             }

             if (sum == n)
             {
                break;
             }
          }

          for (i = 0; i < p; i++)
          {
              np[i] = floor(npd[i]);
          }

          free(npd);

          return HMPI_OK;
       }

       if (sumd > n)
       {
          double *s = (double*)malloc(
			       sizeof(double)
			       *
			       p
          );

	  if (s == NULL)
          {
             return MPC_ERR_NOMEM;
          }

	  for (i = 0; i < p; i++)
          {
              s[i] = speeds[i*pn];
          }

          rc = __HMPI_Number_of_elements_proportional_to_speed(
                      p,
		      n,
		      s,
		      np
          );

	  if (rc != HMPI_OK)
          {
             return rc;
          }

          free(npd);
	  free(s);

          return HMPI_OK;
       }
          
       slopei = (double)speeds[ind*pn]
                /
	        (double)psizes[ind*pn];

       /*
	* Start with a straight line passing
	* with minimum slope at last point
	*/
       temp = (double)speeds[pn - 1]
              /
              (double)psizes[pn - 1];
       ind = 0;
       for (i = 1; i < p; i++)
       {
           double temps = (double)speeds[i*pn + pn - 1]
		          /
		          (double)psizes[i*pn + pn - 1];
           if (temp > temps)
           {
              temp = temps;
              ind = i;
           }
       }

       /*
	* Solve the equations for the last point
	* y = (minimum slope)*x and for the processors
	* y0 = b0*x+c0, y1 = b1*x1+c1, ... for points
	* x0, x1, x2, ...,xp-1
	*/
       npd[ind] = psizes[ind*pn + pn - 1];
       speeds_opt[ind] = speeds[ind*pn + pn - 1];
       for (i = 0; i < p; i++)
       {
           if (i == ind)
           {
              continue;
           }

	   {
              double x, y;
              double ci =  (double)
	                   (
                              speeds[i*pn + pn - 1]*psizes[i*pn + pn - 2]
			      -
			      speeds[i*pn + pn - 2]*psizes[i*pn + pn - 1]
			   )
			   /
			   (double)
			   (
                              psizes[i*pn + pn - 2] 
			      -
			      psizes[i*pn + pn - 1]
			   );
	      double denom =  (
			        (double)speeds[ind*pn + pn - 1]
			        /
			        (double)npd[ind]
                              )
			      +
			      (
			        (double)
			        (
				   speeds[i*pn + pn - 1]
				   -
				   speeds[i*pn + pn - 2]
                                )
				/
				(double)
				(
                                   psizes[i*pn + pn - 2] 
			     	   -
				   psizes[i*pn + pn - 1]
				)
			      );

	       x = (double)ci
		   /
		   (double)denom;

	       npd[i] = x;
               speeds_opt[i] = x
	                       *
                               (
			         (double)speeds[ind*pn + pn - 1]
			         /
			         (double)npd[ind]
                               );
           }
       }

       /*
	* If the sum is equal to n, we have a 
	* perfect fit.
	*/
       sumd = 0.0;
       for (i = 0; i < p; i++)
       {
	   sumd += npd[i];
       }

       if (((floor(sumd)) == n)
           || ((ceil(sumd)) == n
           )
       )
       {
          int sum = 0;

          for (i = 0; i < p; i++)
          {
	     sum += floor(npd[i]);
          }

          if (sum == n)
          {
             for (i = 0; i < p; i++)
             {
                np[i] = floor(npd[i]);
             }

             return HMPI_OK;
          }

          for (i = 0; i < p; i++)
          {
             npd[i] = npd[i] + 1;

             sum = 0;

             for (j = 0; j < p; j++)
             {
                sum += floor(npd[j]);
             }

             if (sum == n)
             {
                break;
             }
          }

          for (i = 0; i < p; i++)
          {
             np[i] = floor(npd[i]);
          }

          free(npd);

          return HMPI_OK;
       }

       if (sumd < n)
       {
          double *s = (double*)malloc(
			       sizeof(double)
			       *
			       p
          );

	  if (s == NULL)
          {
             return MPC_ERR_NOMEM;
          }

	  for (i = 0; i < p; i++)
          {
              s[i] = (
		       (double)speeds[ind*pn + pn - 1]
		       /
		       (double)npd[ind]
		     ) 
		     *
		     npd[i];
              speeds_opt[i] = s[i];
          }

          rc = __HMPI_Number_of_elements_proportional_to_speed(
                      p,
		      n,
		      s,
		      np
          );

	  if (rc != HMPI_OK)
          {
             return rc;
          }

	  free(s);
	  free(npd);

          return HMPI_OK;
       }

       slopef = (double)speeds[ind*pn + pn - 1]
                /
	        (double)psizes[ind*pn + pn - 1];

       /*
	* Use recursive Bisection to get a perfect fit
	*/
       rc = __HMPI_Recursive_bisection_middle_region(
            p,
            pn,
            speeds,
            psizes,
	    n,
            slopei, 
	    slopef,
            speeds_opt,
            npd
       );

       if (rc != HMPI_OK)
       {
          return rc;
       }

       for (i = 0; i < p; i++)
       {
          np[i] = floor(npd[i]);
       }

       free(npd);

       return HMPI_OK;
   }

   /*-----------------------------------------------------*/

