
  #include "cholesky_i.h"

  /*-----------------------------------------------------*/

  int
  Input_recon
  (
     double *a
  )
  {
     int i;
     for (i = 0; i < recon_n*recon_n; i++)
     {
         a[i] = CHOLESKY_CONSTANT_NUMBER;
     }

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

   int Perf_func
   (
       int x /* --n-- */
   )
   {
      /*
       * Factorization of n*n matrix
       */
      int i,j,k;

      for (k = 0; k < x; k++)
      {
          double lkk;
          for (i = k, lkk = sqrt(a[k*x+k]); i < N; i++)
          {
              a[i*x+k] /= lkk;
          }

          for (j = k+1; j < x; j++)
          {
              for (i = j; i < x; i++)
              {
                  a[i*x+j] -= a[i*x+k]*a[j*x+k];
              }
          }
      }

      return 0;
   }

  /*-----------------------------------------------------*/

   void Benchmark_function
   (
      const void* input_p,
      int num_of_p,
      void* output_p
   )
   {
      int* params = (int*)input_p;

      int result = Perf_func(
                       *params
      );

      *(int*)output_p = result;

      return;
   }

  /*-----------------------------------------------------*/

   int Do_recon()
   {
       a = (double*)malloc(
                    sizeof(double)
                    *
                    (recon_n*recon_n)
       );

       Input_recon(
         a
       );

       if (HMPI_Is_member(HMPI_COMM_WORLD_GROUP))
       {
            int rc;
            int output_p;
            int input_p = recon_n;

            rc = HMPI_Recon(
                     &Benchmark_function,
                     &input_p,
                     1,
                     &output_p
            );

            if  (rc != HMPI_OK)
            {
                printf("Panic: HMPI_Recon failed \n");
                HMPI_Abort(-1);
            }
       }

       free(a);

       return HMPI_OK;
   }

  /*-----------------------------------------------------*/

  int
  LowerCholesky
  (
    int num,
    double *l11
  )
  {
      int i, j, k;

      for (i = 0; i < num; i++)
      {
          double lkk = sqrt(l11[i*num + i]);

	  for (j = i; j < num; j++)
          {
              l11[j*num + i] = (double)l11[j*num + i]
		               /
		               (double)lkk;
          }

	  for (j = i+1; j < num; j++)
          {
              for (k = j; k < num; k++)
              {
                  l11[k*num + j] -= l11[k*num + i]
			            *
				    l11[j*num + i]; 
              }
          }
      }

      for (i = 0; i < num; i++)
      {
          for (j = i+1; j < num; j++)
          {
              l11[i*num + j] = 0.0;
          }
      }

      return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int Execute_algorithm()
  {
     int rc, i, j, k;

     HMPI_Group gid;
     int *model_params;
     int model_count;
     double *dperf;
     int *iperf;

     /*
      * The grid count of 2 corresponds to
      * p -- No. of processors along the row.
      * q -- No. of processors along the column.
      */
     if (HMPI_Is_host())
     {
        model_count = 2;
        model_params = (int*)malloc(
                            sizeof(int)
                            *
                            model_count
        );

        model_params[0] = p;
        model_params[1] = q;
     }

     if (HMPI_Is_host())
     {
        int rc = HMPI_Group_create(
                     &gid,
                     &HMPI_NetType_simpleGrid,
                     model_params,
                     model_count
        );

        if (rc != HMPI_OK)
        {
           return rc;
        }
     }

     if (HMPI_Is_free())
     {
        int rc = HMPI_Group_create(
                     &gid,
                     &HMPI_NetType_simpleGrid,
                     NULL,
                     0
        );

        if (rc != HMPI_OK)
        {
           return rc;
        }
     }

     if (HMPI_Is_free())
     {
        HMPI_Finalize(0);
     }

     row_allocations = (int*)malloc(
                                 sizeof(int)
                                 *
                                 (p*q)
     );

     if (row_allocations == NULL)
     {
        return -1;
     }

     column_allocations = (int*)malloc(
                                    sizeof(int)
                                    *
                                    p
     );

     if (column_allocations == NULL)
     {
        return -1;
     }

     Generalised_block = (int(*)[2])malloc(
                                    sizeof(int[2])
                                    *
                                    (g)*(g)
     );

     if (Generalised_block == NULL)
     { 
        return -1;
     }

     dperf = (double*)malloc(
                     sizeof(double)
                     *
                     (p*q)
     );

     if (dperf == NULL)
     {
        return -1;
     }

     iperf = (int*)malloc(
                   sizeof(int)
                   *
                   (p*q)
     );

     if (iperf == NULL)
     {
        return -1;
     }

     rc = HMPI_Group_performances(
              &gid,
              dperf
     );

     if (rc != HMPI_OK)
     {
        return rc;
     }

     for (i = 0; i < (p*q); i++)
     {
        iperf[i] = dperf[i];
     }

     if ((VERBOSE > 0)
         && (HMPI_Is_host()
        )
     )
     {
        printf("Performances are: ");

        for (i = 0; i < (p*q); i++)
        {
            printf("%d ", iperf[i]);
        }

        printf("\n");
     }

     /*
      * Distribute the load taking into account the
      * relative performances.
      */
     rc = Distribute_load(
            p,
            q, 
            iperf,
            g,
            g,
            row_allocations,
            column_allocations
     );

     if (rc != HMPI_OK)
     {
        printf(
           "CHOLESKY:Problems HEHE distributing the load "
           "...Exiting...\n"
        );

        return rc;
     }

     /*
      * Perform the Cholesky Factorization for square matrices for 
      * each problem size.
      */
     n = (N/r);

     rc = Perform_cholesky(
                 &gid
     );

     if (rc != HMPI_OK)
     {
        return rc;
     }

     rc = HMPI_Barrier(&gid);

     if (rc != HMPI_OK)
     {
        return rc;
     }

     free(row_allocations);
     free(column_allocations);
     free(Generalised_block);

     free(dperf);
     free(iperf);

     /*
      * Destroy the network.
      */
     if (HMPI_Is_member(&gid))
     {
        int rc = HMPI_Group_free(&gid);

        if (rc != HMPI_OK)
        {
           return rc;
        }
     }

     if (HMPI_Is_host())
     {
        free(model_params);
     }

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int
  Perform_cholesky
  (
     const HMPI_Group* gid
  )
  {
     int rc;

     A = (double*)malloc(
                  sizeof(double)
                  *
                  (N*N)
     );

     if (A == NULL)
     {
        printf("PANIC: heap problems, allocation of A\n");
        HMPI_Finalize(-1);
     }

     rc = cholesky(
            gid,
            A
     );

     if (rc != HMPI_OK)
     {
        printf(
          "CHOLESKY:Problems Factorising the matrix A "
          "...Exiting...\n"
        );

        return rc;
     }

     free(A);

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int
  cholesky
  (
     const HMPI_Group* gid,
     double *a
  )
  {
     int result;
     int x, y;
     int icoord, jcoord;
     int i, j, k, l;
     int** my_coordinates = (int**)malloc(
                                   sizeof(int*)
     );
     int num_of_coordinates;

     result =  HMPI_Group_coordof(
                   gid,
                   &num_of_coordinates,
                   my_coordinates
     ); 

     if (result != HMPI_OK)
     {
        HMPI_Printf("Error while getting the coordinates\n");
        return result;
     }

     /*
      * Initialize the respective array elements A & B at
      * the processors.
      * Each array element is a r*r matrix.
      */
     icoord = (*my_coordinates)[0];
     jcoord = (*my_coordinates)[1];

     istart = 0;
     for (i = 0; i < icoord; i++)
     {
         istart += row_allocations[(i*p) + jcoord];
     }

     jstart = 0;
     for (j = 0; j < jcoord; j++)
     {
         jstart += (column_allocations[j]);
     }

     myialloc = row_allocations[(icoord*p) + jcoord];
     myjalloc = (column_allocations[jcoord]);

     for (x = (istart*r); x < N; x+=(g*r))
     {
         for (y = (jstart*r); y < N; y+=(g*r))
         {
             for (i = 0; i < (myialloc*r); i+=r)
             {
                 for (j = 0; j < (myjalloc*r); j+=r)
                 {
                     for (k = 0; k < r; k++)
                     {
                         for (l = 0; l < r; l++)
                         {
                             a[(x*N) + y + (i*N) + j + (k*N) + l] 
                             = 
                             (N*N*CHOLESKY_CONSTANT_NUMBER*CHOLESKY_CONSTANT_NUMBER)
                             -
                             ((x*N + y + i*N + j + k*N + l)*CHOLESKY_CONSTANT_NUMBER)
                             ;
                         }
                     }
                 }
             }
         }
     }

     /*
      * Check to see if the matrix elements are properly
      * initialised.
      */
     if (VERBOSE > 1)
     {
        for (x = (istart*r); x < N; x+=(g*r))
        {
            for (y = (jstart*r); y < N; y+=(g*r))
            {
                for (i = 0; i < (myialloc*r); i+=r)
                {
                    for (j = 0; j < (myjalloc*r); j+=r)
                    {
                        for (k = 0; k < r; k++)
                        {
                            for (l = 0; l < r; l++)
                            {
                                printf(
                                    "a[%d][%d] = %0.2f\n",
                                    x + i + k,
                                    y + j + l,
                                    a[(x*N) + y + (i*N) + j + (k*N) + l]
                                );
                            }
                        }
                    }
                }
            }
        }
     }

     /*
      * Computations on network gid_grid
      */
     {
        int result = Grid_computations(
                         gid,
                         (*my_coordinates),
                         a
        );

        if (result != HMPI_OK)
        {
           HMPI_Printf("Error while performing grid computations\n");
           return result;
        }
     }

     free(my_coordinates[0]);
     free(my_coordinates);

     return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  int
  Grid_computations
  (
     const HMPI_Group* gid,
     const int* my_coordinates,
     double *a
  )
  {
     int x, y, z, i, j, k, l, m, t, rc;
     double l11[r*r], transpose_l11[r*r], inverse_l11[r][r];
     double *l21, *transpose_l21;

     Block Ablock;
     Processor Root, Me;

     MPI_Comm* grid_comm = (MPI_Comm*)HMPI_Get_comm(gid);

     if (grid_comm == NULL)
     {
        return -1;
     }

     Me.I = my_coordinates[0];
     Me.J = my_coordinates[1];

     for (k = 0; k < n; k++)
     {
         int istarts, jstarts;
         int currenti, currentj;
	 int myialloc_left, myjalloc_left;
         int pivoti_column = 0;
         int pivotj_column = 0;
	 int I_have_the_pivot = 0;
         int Iam_the_root = 0;

         int Arow = (k%g);
         int Acolumn = (k%g);

         GetBlock(Arow, Acolumn, &Ablock);
         GetProcessor(&Ablock, &Root);

	 /*
	  * Broadcast l11 to the processors along the column k.
	  */
         if ((Root.J) != (Me.J))
         {
            if ((k+1) == n)
            {
               break;
            }
         }

         if ((Root.J) == (Me.J))
         {
	    if (((Root.I) == (Me.I))
                && ((Root.J) == (Me.J)
               )
            )
            {
               double temp[r*r];
	       int RootCoords[2] = {Root.I, Root.J};
	       int root = HMPI_Rank(gid, RootCoords);

	       /*
	        * Compute l11, the lower Choleksy factor of a11.
	        */
	       for (i = 0; i < r; i++)
               {
                   for (j = 0; j < r; j++)
                   {
                       l11[i*r+j] = a[k*r*N + k*r + i*N + j];
                   }
               }

	       rc = LowerCholesky(
		         r, 
		         l11
               );

	       if (rc != HMPI_OK)
               {
                  printf("Cannot get the Lower Choleksy Factor of l11\n");
                  return -1;
               }

	       for (i = 0; i < r; i++)
               {
                   for (j = 0; j < r; j++)
                   {
                       a[k*r*N + k*r + i*N + j] = l11[i*r+j];
                   }
               }

               if ((k+1) == n)
               {
                  break;
               }

	       for (i = 0; i < r*r; i++)
               {
                   temp[i] = l11[i];
               }

               for (i = 0; i < p; i++)
               {
	           int ReceiverCoords[2] = {i, Root.J};
	           int Receiver = HMPI_Rank(gid, ReceiverCoords);

                   rc = MPI_Send(
                           &temp,
                           r*r,
                           MPI_DOUBLE,
                           Receiver,
                           HMPI_MSG_TAG,
                           *grid_comm
                   );

	           if (rc != MPI_SUCCESS)
                   {
                      printf("Problems sending l11 to Receiver:%d\n", Receiver);
	              return -1;
                   }
               }
            }
            else
            {
               MPI_Status status;
	       int RootCoords[2] = {Root.I, Root.J};
	       int root;

               if ((k+1) == n)
               {
                  break;
               }

	       root = HMPI_Rank(gid, RootCoords);

               rc = MPI_Recv(
                       &l11,
                       r*r,
                       MPI_DOUBLE,
                       root,
                       HMPI_MSG_TAG,
                       *grid_comm,
                       &status
               );

	       if (rc != MPI_SUCCESS)
               {
                  printf("Problems Receiving l11\n");
	          return -1;
               }
            }
         }

	 /*
	  * Calculate L21
	  * 1). First Transpose l11.
	  * 2). Multiply A21 into inverse of transpose of l11
	  */
         if ((Root.J) == (Me.J))
         {
            for (i = 0; i < r; i++)
            {
                for (j = 0; j < r; j++)
                {
                    transpose_l11[i*r+j] = l11[j*r + i];
                }
            }

	    {
                double given[r][r];
                double working[r][r];

                for (i = 0; i < r; i++)
                {
                    for (j = 0; j < r; j++)
                    {
                        given[i][j] = transpose_l11[i*r+j];
                    }
	        }     

                rc = invns(
	                r, 
		        inverse_l11,
		        given,
		        working
	        );

	        if (rc != 0)
                {
                   printf("Inverse failed\n");
	   	   return -1;
                }
            }

            currenti = ((k+1)%g)*r;

	    if ((currenti >= (istart*r))
                && (currenti < (istart*r + myialloc*r)
               )
            )
            {
               istarts = (((k+1)/g)*g*r) + currenti;
               myialloc_left = istart*r
                               +
                               myialloc*r
                               -
                               currenti;
               I_have_the_pivot = 1;
            }

	    if (currenti < (istart*r))
            {
               istarts = (((k+1)/g)*g*r)
	                 +
	                 (istart*r);
            }

	    if (currenti >= (istart*r + myialloc*r))
            {
               istarts = (((k+1)/g+1)*g*r)
                         +
                         (istart*r);
            }

            if (I_have_the_pivot == 1)
            {
               for (x = 0; x < myialloc_left; x++)      
               {
                   for (y = k*r; y < (k*r+r); y++)
                   {
                       double temp = 0.0;
                       for (z = 0; z < r; z++)
                       {
                           temp
                           += 
                           a[istarts*N + x*N + z]
	                   *
	                   inverse_l11[z][y - k*r];
                       }
                       a[istarts*N + x*N + y] = temp;
                   }
               }

               x = (k/g+1)*g*r;

               for (x += (istart*r); x < N; x+=(g*r))      
               {
                   for (t = 0; t < (myialloc*r); t++)
                   {
                       for (y = k*r; y < (k*r+r); y++)
                       {
                           double temp = 0.0;
                           for (z = 0; z < r; z++)
                           {
                               temp
                               += 
                               a[x*N + t*N + z]
	                       *
	                       inverse_l11[z][y - k*r];
                           }
                           a[x*N + t*N + y] = temp;
                       }
                   }
               }
            }
            else
            {
               x = istarts;
   
               for (; x < N; x+=(g*r))
               {
                   for (t = 0; t < myialloc*r; t++)
                   {
                       for (y = k*r; y < (k*r+r); y++)
                       {
                           double temp = 0.0;
                           for (z = 0; z < r; z++)
                           {
                               temp
                               += 
                               a[x*N + t*N + z]
	                       *
	                       inverse_l11[z][y - k*r];
                           }
                           a[x*N + t*N + y] = temp;
                       }
                   }
               }
            }
         }

         I_have_the_pivot = 0;

	 /*
	  * Broadcast the column of blocks L21
	  */
	 l21 = (double*)malloc(
			sizeof(double)
			*
			(N - k*r - r)*r
         );

	 if (l21 == NULL)
         {
            printf("Cannot allocate l21\n");
	    return -1;
         }

	 transpose_l21 = (double*)malloc(
			          sizeof(double)
			          *
	          		  (N - k*r - r)*r
         );

	 if (transpose_l21 == NULL)
         {
            printf("Cannot allocate transpose_l21\n");
	    return -1;
         }

	 for (i = k+1; i < n; i++)
         {
             double temp[r*r];
             Block Ablocks;
             Processor Roots;
             GetBlock(
	        (i%g), 
		(k%g), 
		&Ablocks
	     );
             GetProcessor(&Ablocks, &Roots);

	     if (((Roots.I) == (Me.I))
                 && ((Roots.J) == (Me.J)
                )
             )
             {
	        int RootCoords[2] = {Roots.I, Roots.J};
	        int root = HMPI_Rank(gid, RootCoords);

		for (x = 0; x < r; x++)
                {
                    for (y = 0; y < r; y++)
                    {
                        temp[x*r+y] = a[i*r*N + x*N + y];
                    }
                }

                rc = MPI_Bcast(
                        &temp,
                        r*r,
                        MPI_DOUBLE,
                        root,
                        *grid_comm
                );
	     
             }
	     else
             {
	        int RootCoords[2] = {Roots.I, Roots.J};
	        int root = HMPI_Rank(gid, RootCoords);

                rc = MPI_Bcast(
                        &temp,
                        r*r,
                        MPI_DOUBLE,
                        root,
                        *grid_comm
                );
             }

	     if (rc != MPI_SUCCESS)
             {
                printf("Problems broadcasting l21\n");
	        return -1;
             }

	     for (x = 0; x < r; x++)
             {
                 for (y = 0; y < r; y++)
                 {
                     l21[(i-k-1)*r + x*r + y] = temp[x*r + y];
                 }
             }
	 } 

	 /*
	  * Transpose l21
	  */
         for (i = 0; i < (N - k*r - r); i++)
         {
             for (j = 0; j < r; j++)
             {
                 transpose_l21[i*r+j] = l21[j*r + i];
             }
         }

	 /*
	  * Update local portions of A22
	  */
         {
            Block Ablocks;
            Processor Roots;
            GetBlock(
	       ((k+1)%g), 
	       ((k+1)%g), 
		&Ablocks
	    );
            GetProcessor(&Ablocks, &Roots);

	    if (((Roots.I) == (Me.I))
                && ((Roots.J) == (Me.J)
               )
            )
            {
               Iam_the_root = 1;
            }

            if ((Roots.J) == (Me.J))
            {
               pivoti_column = 1;
            }
         }

         currenti = ((k+1)%g)*r;
         currentj = ((k+1)%g)*r;

	 if ((currenti >= (istart*r))
             && (currenti < (istart*r + myialloc*r)
            )
         )
         {
            pivotj_column = 1;
         }

         if (pivotj_column == 1)
         {
	    if ((currentj >= (jstart*r))
               && (currentj < (jstart*r + myjalloc*r)
               )
            )
            {
               jstarts = (((k+1)/g)*g*r) + currentj;
               myjalloc_left = jstart*r
                               +
                               myjalloc*r
                               -
                               currentj;
            }

	    if (currentj < (jstart*r))
            {
               jstarts = (((k+1)/g)*g*r)
		         +
		         (jstart*r);
            }

	    if (currentj >= (jstart*r + myjalloc*r))
            {
               jstarts = (((k+1)/g+1)*g*r)
                         +
                         (jstart*r);
            }

	    if ((currenti >= (istart*r))
                && (currenti < (istart*r + myialloc*r)
               )
            )
            {
               myialloc_left = istart*r
                               +
                               myialloc*r
                               -
                               currenti;
            }
         }

         if (pivoti_column == 1)
         {
	    if ((currenti >= (istart*r))
                && (currenti < (istart*r + myialloc*r)
               )
            )
            {
               istarts = (((k+1)/g)*g*r) + currenti;
               myialloc_left = istart*r
                               +
                               myialloc*r
                               -
                               currenti;
            }

	    if (currenti < (istart*r))
            {
               istarts = (((k+1)/g)*g*r)
	                 +
	                 (istart*r);
            }

	    if (currenti >= (istart*r + myialloc*r))
            {
               istarts = (((k+1)/g+1)*g*r)
                         +
                         (istart*r);
            }

	    if ((currentj >= (jstart*r))
                && (currentj < (jstart*r + myjalloc*r)
               )
            )
            {
               myjalloc_left = jstart*r
                               +
                               myjalloc*r
                               -
                               currentj;
            }
         }

         if (Iam_the_root == 1)
         {
            for (x = 0; x < myialloc_left; x++)
            {
                for (y = 0; y < myjalloc_left; y++)
                {
                    double temp = 0.0;
                    for (i = 0; i < r; i++)
                    {
                        temp += l21[x*r + i]
                                *
                                transpose_l21[i*(N-k*r-r) + y];
                    }

                    a[istarts*N + x*N + jstarts + y]
                    -=
                    temp;
                }
            }

            x = (k/g+1)*g*r;

            for (x += istart*r; x < N; x+=g*r)
            {
                for (t = 0; t < myialloc*r; t++)
                {
                    for (y = (k+1)*r; y < ((k+1)*r + myjalloc_left); y++)
                    {
                        double temp = 0.0;
                        for (i = 0; i < r; i++)
                        {
                            temp += l21[(x-(k+1)*r)*r + t*r + i]
                                    *
                                    transpose_l21[i*(N-k*r-r) + y - (k+1)*r];
                        }

                        a[x*N + t*N + y]
                        -=
                        temp;
                    }
                }
            }

            x = (k/g+1)*g*r;

            for (x += jstart*r; x < N; x+=g*r)
            {
                for (t = 0; t < myjalloc*r; t++)
                {
                    for (y = (k+1)*r; y < ((k+1)*r + myialloc_left); y++)
                    {
                        double temp = 0.0;
                        for (i = 0; i < r; i++)
                        {
                            temp += l21[(y-(k+1)*r)*r + i]
                                    *
                                    transpose_l21[i*(N-k*r-r) + x - (k+1)*r + t];
                        }

                        a[y*N + x + t]
                        -=
                        temp;
                    }
                }
            }
      
            x = (k/g+1)*g*r;
            y = (k/g+1)*g*r;
         }
         else
         {
            if (pivoti_column == 1)
            {
               x = istarts;

               for (; x < N; x+=g*r)
               {
                   for (t = 0; t < myialloc*r; t++)
                   {
                       for (y = (k+1)*r; y < ((k+1)*r + myjalloc_left); y++)
                       {
                           double temp = 0.0;
                           for (i = 0; i < r; i++)
                           {
                               temp += l21[(x-(k+1)*r)*r + t*r + i]
                                       *
                                       transpose_l21[i*(N-k*r-r) + y - (k+1)*r];
                           }

                           a[x*N + t*N + y]
                           -=
                           temp;
                       }
                   }
               }

               x = (k/g+1)*g*r;
               y = (k/g+1)*g*r;
            }

            if (pivotj_column == 1)
            {
               x = jstarts;

               for (; x < N; x+=g*r)
               {
                   for (t = 0; t < myjalloc*r; t++)
                   {
                       for (y = (k+1)*r; y < ((k+1)*r + myialloc_left); y++)
                       {
                           double temp = 0.0;
                           for (i = 0; i < r; i++)
                           {
                               temp += l21[(y-(k+1)*r)*r + i]
                                       *
                                       transpose_l21[i*(N-k*r-r) + x - (k+1)*r + t];
                           }

                           a[y*N + x + t]
                           -=
                           temp;
                       }
                   }
               }

               x = (k/g+1)*g*r;
               y = (k/g+1)*g*r;
            }

            if ((pivoti_column != 1)
                && (pivotj_column != 1
               )
            )
            {
	       if ((currenti < istart*r)
                   && (currentj < jstart*r
                  )
               )
               {
                  x = ((k+1)/g)*g*r;
                  y = ((k+1)/g)*g*r;
               }
               else
               {
                  x = (k/g+1)*g*r;
                  y = (k/g+1)*g*r;
               }
            }
         }

         for (x += istart*r; x < N; x+=g*r)
         {
             for (y += jstart*r; y < N; y+=g*r)
             {            
                 for (l = 0; l < myialloc*r; l++)
                 {
                    for (m = 0; m < myjalloc*r; m++)
                    {
                        double temp = 0.0;
                        for (i = 0; i < r; i++)
                        {
                            temp += l21[(x-(k+1)*r)*r + l*r + i]
                                    *
                                    transpose_l21[i*(N-k*r-r) + y - (k+1)*r + m];
                        }

                        a[x*N + y + l*N + m]
                        -=
                        temp;
                    }
                }
            }
         }

	 free(l21);
	 free(transpose_l21);
	 pivoti_column = 0;
	 pivotj_column = 0;
         Iam_the_root = 0;
      }

      /*
       * The result of the computations.
       */
      if (VERBOSE > 1)
      {
         for (x = (istart*r); x < N; x+=(g*r))
         {
             for (y = (jstart*r); y < N; y+=(g*r))
             {
                 for (i = 0; i < (myialloc*r); i+=r)
                 {
                     for (j = 0; j < (myjalloc*r); j+=r)
                     {
                          for (l = 0; l < r; l++)
                          {
                              for (m = 0; m < r; m++)
                              {
                                  printf(
                                      "a[%d][%d] = %0.2f\n",
                                      x + i + l,
                                      y + j + m,
                                      a[x*N + y + i*N + j + l*N + m]
                                  );
                              }
                          }
                     }
                 }
             }
         }
      }

      return HMPI_OK;
  }

  /*-----------------------------------------------------*/

  void
  GetBlock(int x, int y, Block *b)
  {
     b->i = x;
     b->j = y;

     return;
  }

  /*-----------------------------------------------------*/

  void
  GetProcessor(Block *b, Processor* p)
  {
     p->I = Generalised_block[((b->i)*g) + (b->j)][0];
     p->J = Generalised_block[((b->i)*g) + (b->j)][1];

     return;
  }

  /*-----------------------------------------------------*/

