#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
#include <sys/times.h>
#include <limits.h>
#include <unistd.h>
#include <sys/utsname.h>
#include <string.h>
extern long sysconf(int);

#define MAX_NNL 256 /* Maximal length of node name*/
#define BUFSIZE 1024 /* Maximum length of line in machine file */
#define HOST 0
#define Kb 1024
#define REPS 3 /* Number of repeated testings of communication links */

#define MPC_POWER_COEFF 1000.0
#define MPC_SIZE_OF_DATA_PACK 64   /* Unit while mesuring link speeds */
#define MPC_TRANSFER_DATA_RANGE 3  /* MPC_SIZE_OF_DATA_PACK**i, */
                                   /* i=1,...,MPC_TRANSFER_DATA_RANGE, */
                                   /* sizes of test data packages when mesuring */
                                   /*link speeds */

static int myrank, N, actual_N, myAPrank, error, sumerr, *numps;

#define IF_HOST if(myrank==HOST)
#define LEFT_BRACE (-2)
#define RIGHT_BRACE (-3)

typedef struct { double speed; double memory; double scalability;} tNodeAttribute;

static tNodeAttribute *procs, *processors;
static char (*nodenames)[MAX_NNL], myname[MAX_NNL];

static char buf[BUFSIZE];
static char name[MAX_NNL];


int MPC_Is_comment(const char* buffer)
{
  while(isspace(*buffer))
    buffer++;
  if (strlen(buffer) == 0) return 1;
  if (*buffer== '#')
    return 1;
  else
    return 0;
}  

int MPC_Is_left_brace(char *buffer)
{
  while(isspace(*buffer))
    buffer++;
  if(*buffer=='{')
    return 1;
  else
    return 0;
}

int MPC_Is_right_brace(char *buffer)
{
  while(isspace(*buffer))
    buffer++;
  if(*buffer=='}')
    return 1;
  else
    return 0;
}

int actual_num(char (*names)[MAX_NNL], int **actual_ranks, int **v2a)
{
  int n, i, j, k;
  char **pname, **pi, **pj;

  n=N;
  pname=calloc(N, sizeof(*pname));
  *v2a=calloc(N,sizeof(int));
  *actual_ranks=calloc(N, sizeof(int));
  
  for(i=0; i<N; i++) {
    pname[i]=names[i];
    (*v2a)[i]=-1;
  }
  
  for(pi=pname,i=0,k=0; i<N; pi++,i++) {
    if(*pi!=NULL)
      (*actual_ranks)[k++]=i;
    else
      continue;
    for(pj=pi+1,j=i+1; j<N; pj++,j++) {
      if(*pj==NULL)
        continue;
      if(strcmp(*pj,*pi)==0) {
        n--;
        (*v2a)[j]=i;
        *pj=NULL;
      }
    }
  }
  
  free(pname);
  return n;
}

void SeqMult(double *x, double *y, double *z, int n)
{
  int i, j, k, ixn;
  double s;
  
  for(i=0; i<n; i++)
    {
      ixn=i*n;
      for(j=0; j<n; j++)
        {
          for(k=0, s=0.0; k<n; k++)
            s += x[ixn+k]*(double)(y[k*n+j]);
           z[ixn+j]=s;
        }
    }
}

int Input(double **x, double **y, int n)
{
  int i, j, ixn;
  
  *x=calloc(n*n, sizeof(double));
  *y=calloc(n*n, sizeof(double));
  for(i=0; i<n; i++)
    {
      ixn=i*n;
      for(j=0; j<n; j++)
        if(i==j)
          {
            (*x)[ixn+j]=2.0;
            (*y)[ixn+j]=3.0;
              }
        else
          {
            (*x)[ixn+j]=0.001;
            (*y)[ixn+j]=0.001;
          }
    }
  return 0;
}

main(int argc, char **argv)
{
  FILE *finput, *foutput/*, *foutput1*/;
  char *mpctopo;
  int nprocs;
  static tNodeAttribute my_attribute;
  int ITERS;
  /* Group data */
  MPI_Group MPI_GROUP_WORLD, APgroup;
  MPI_Comm APcomm;
  /* Data for derived type */
  static int blocklengths[3]={1,1,1};
  static MPI_Aint displacements[3];
  static MPI_Datatype types[3], attrtype;
  struct utsname node_info;
  static int *basic_ranks, *virtual2basic;
  int (*clu_stack)[2], clu_pointer, clu_num, cur_proc_num, proc_num;
  int *num_of_processors;
        
  mpctopo=getenv("MPCTOPO");
  if(mpctopo==NULL)
    {
      printf("mpccreate: cannot find environmental variable MPCTOPO.\n");
      exit(-1);
    }  
  ITERS=200;
  displacements[1]=
    (char *)&my_attribute.memory-(char *)&my_attribute.speed;
  displacements[2]=2*displacements[1];
  uname(&node_info);
  strcpy(myname, node_info.nodename);
  /*
   * Initialize the World.
   */
  /*
   * The i-th element of the table 'procs' will contain (for i-th virtual
   * processor (i=0,...,N-1):
   * - speed measured in floating muliplicaitions per second,
   * - memory size measured in Kbyte,
   * - scalability mesured in processes running without loss of speed
   */
  MPI_Init( &argc, &argv);
  MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
  N=nprocs;
  types[0]=MPI_DOUBLE;
  types[1]=MPI_DOUBLE;
  types[2]=MPI_DOUBLE;
  MPI_Type_struct(3, blocklengths, displacements, types, &attrtype);
  MPI_Type_commit(&attrtype);
  
  
  IF_HOST
    {
      FILE *f;

      procs=calloc(N, sizeof(tNodeAttribute));
      nodenames=calloc(N, MAX_NNL*sizeof(char));
      strcpy(buf, mpctopo);
      strcat(buf, "/");
      strcat(buf, argv[1]);
      f=fopen(buf, "r");
      clu_num=1;
      while(!feof(f))
        {
         buf[0] = (char)0;
          fgets(buf,BUFSIZE,f);
          if (MPC_Is_comment(buf))
            ;
          else if(MPC_Is_left_brace(buf))
            clu_num+=2;
          else
            clu_num++;
        }
      fclose(f);
      buf[0] = (char)0;
      strcpy(buf, mpctopo);
      strcat(buf, "/");
      strcat(buf, argv[1]);
      f=fopen(buf, "r");
      clu_stack=calloc(clu_num, sizeof(int[2]));
      clu_pointer=0;
      cur_proc_num=0;
      clu_stack[0][0]=0;
      while (!feof(f))
        {
          buf[0] = (char)0;
          fgets(buf,BUFSIZE,f);
          if (MPC_Is_comment(buf))
            ;
          else if(MPC_Is_left_brace(buf))
            {
              clu_pointer++;
              clu_stack[clu_pointer][0]=LEFT_BRACE;
              clu_pointer++;
              clu_stack[clu_pointer][0]=cur_proc_num;
              clu_stack[clu_pointer][1]=-1;
            }
          else if(MPC_Is_right_brace(buf))
            {
              int i=clu_pointer;
              while(clu_stack[i][1]>=0)
                i--;
              clu_stack[i][1]=cur_proc_num-1;
              clu_pointer++;
              clu_stack[clu_pointer][0]=RIGHT_BRACE;
            }
          else
            {
              sscanf(buf, "%[^ \t\b]", name);
              sscanf(buf+strlen(name), "%d", &proc_num);
              clu_pointer++;
              clu_stack[clu_pointer][0]=cur_proc_num;
              cur_proc_num+=proc_num;
              clu_stack[clu_pointer][1]=cur_proc_num-1;
            }
        } /* while */
      clu_stack[0][1]=cur_proc_num-1;
      fclose(f);
    }
    MPI_Gather(&myname, MAX_NNL, MPI_CHAR, nodenames, MAX_NNL,
               MPI_CHAR, HOST, MPI_COMM_WORLD);
    if(myrank!=HOST)
      nodenames=calloc(N, MAX_NNL*sizeof(char));
    MPI_Bcast(nodenames, N*MAX_NNL, MPI_CHAR, HOST, MPI_COMM_WORLD);
    
    IF_HOST {
      FILE *f;
      int i, j;
      static char a1[]="   ", a2[]="(*)";
      char *pleftbracket, *pfirstdigit;
      
      actual_N=actual_num(nodenames, &basic_ranks, &virtual2basic);
      numps=calloc(actual_N, sizeof(int));
      num_of_processors=calloc(actual_N, sizeof(int));
      buf[0] = (char)0;
      strcpy(buf, mpctopo);
      strcat(buf, "/");
      strcat(buf, argv[1]);
      f=fopen(buf, "r");
      i=0;
      while(!feof(f))
        {
         buf[0] = (char)0;
          fgets(buf,BUFSIZE,f);
          if (MPC_Is_comment(buf))
            ;
          else if(MPC_Is_left_brace(buf))
            ;
          else if(MPC_Is_right_brace(buf))
            ;
          else
            {
              sscanf(buf, "%[^ \t\b]", name);
              pleftbracket=strchr(buf+strlen(name), '[');
              if(pleftbracket!=NULL)
                {
                  if((pfirstdigit=strpbrk(pleftbracket+1, "0123456789"))!=NULL)
                    num_of_processors[i]=atoi(pfirstdigit);
                }
              i++;
            }
        }
      fclose(f);
    }
    MPI_Bcast(&actual_N, 1, MPI_INT, HOST, MPI_COMM_WORLD);
    if(myrank!=HOST)
      basic_ranks=calloc(actual_N, sizeof(int));
    MPI_Bcast(basic_ranks, actual_N, MPI_INT, HOST, MPI_COMM_WORLD);
    MPI_Group_incl(MPI_GROUP_WORLD, actual_N, basic_ranks,
                   &APgroup);
    MPI_Comm_create(MPI_COMM_WORLD, APgroup, &APcomm);
    if(APcomm!=MPI_COMM_NULL)
      MPI_Comm_rank(APcomm, &myAPrank);
    
    if(APcomm!=MPI_COMM_NULL)
      {
        int hm=-1, *ranks;
        strcpy(buf, mpctopo);
        strcat(buf, "/");
        strcat(buf, argv[1]);
        strcat(buf, ".topo");
        if((foutput=fopen(buf, "w"))==NULL)
          {
            error=1;
            hm=myrank;
          }
        MPI_Reduce(&error, &sumerr, 1, MPI_INT, MPI_SUM, HOST, APcomm);
        IF_HOST {
          int i;
          if(sumerr)
            printf("mpccreate: can not create file %s%s on %d processors:\n",
                   argv[1], ".topo", sumerr);
          else
            printf("mpccreate: wait for creation \'%s\'\n", argv[1]);
          ranks=calloc(actual_N, sizeof(int));
          for(i=0; i<actual_N; i++)
            ranks[i]=-1;
        }
        MPI_Gather(&hm, 1, MPI_INT, ranks, 1, MPI_INT, HOST, APcomm);
        IF_HOST {
          int i;
          if(sumerr)
            for(i=0; i<actual_N; i++)
              if(ranks[i]>=0)
                printf("           %s\n", nodenames[ranks[i]]);
          free(ranks);
        }
      }
    MPI_Bcast(&sumerr, 1, MPI_INT, HOST, MPI_COMM_WORLD);
    if(sumerr)
      {
        MPI_Finalize();
        exit(-1);
      }
    
    {
      double *a, *b, *c;
      double  diff;
      double start1, start2, end1, end2;
      struct tms s, e;
      int i, j;
      
      Input(&a, &b, ITERS);
      c=calloc(ITERS*ITERS, sizeof(double));
      
      MPI_Barrier(MPI_COMM_WORLD);
      if(APcomm!=MPI_COMM_NULL) start1=MPI_Wtime();
      times(&s);
      SeqMult(a, b, c, ITERS);
      times(&e);
      if(APcomm!=MPI_COMM_NULL) end1=MPI_Wtime();
      MPI_Barrier(MPI_COMM_WORLD);
      
      diff=e.tms_utime-s.tms_utime;
      
      MPI_Barrier(MPI_COMM_WORLD);
      if(APcomm!=MPI_COMM_NULL)
        {
          start2=MPI_Wtime();
          SeqMult(a, b, c, ITERS);
          end2=MPI_Wtime();
        }
      MPI_Barrier(MPI_COMM_WORLD);
      free(a);
      free(b);
      free(c);
      if(APcomm!=MPI_COMM_NULL)
        my_attribute.scalability=(end2-start2)/(end1-start1);
      
      my_attribute.speed=sysconf(_SC_CLK_TCK)/diff*MPC_POWER_COEFF;
      
      my_attribute.memory=0
        /*sysconf(_SC_PHYS_PAGES)*(sysconf(_SC_PAGESIZE)/Kb)*/;
      
      MPI_Gather(&my_attribute, 1, attrtype, procs, 1,
                 attrtype, HOST, MPI_COMM_WORLD);
      
      IF_HOST {
        int i, j, k, probes;
        processors=calloc(actual_N, sizeof(tNodeAttribute));
        for(i=0; i<actual_N; i++) {
          k=basic_ranks[i];
          for(j=k+1, probes=1; j<N; j++)
            if(virtual2basic[j]==k) {
              procs[k].speed+=procs[j].speed;
              probes++;
            }
          procs[k].speed/=probes;
          processors[i]=procs[k];
          numps[i]=probes;
        }
        free(virtual2basic);
      }
      {
        int i,j,k;
        double start, end;
        double speeds[REPS][MPC_TRANSFER_DATA_RANGE];
        int rep_count;
        int source, dest;
        int block_size;
        char *buff;
        MPI_Status status;
        
        if(APcomm!=MPI_COMM_NULL&&myrank!=HOST)
          processors=calloc(actual_N, sizeof(tNodeAttribute));
        if(APcomm!=MPI_COMM_NULL&&myrank!=HOST)
          numps=calloc(actual_N, sizeof(int));
        
        if(APcomm!=MPI_COMM_NULL&&myrank!=HOST)
          num_of_processors=calloc(actual_N, sizeof(int));
        
        if(APcomm!=MPI_COMM_NULL)
          MPI_Bcast(processors, actual_N, attrtype, HOST, APcomm);
        if(APcomm!=MPI_COMM_NULL)
          MPI_Bcast(numps, actual_N, MPI_INT, HOST, APcomm);
        
        if(APcomm!=MPI_COMM_NULL)
          MPI_Bcast(num_of_processors, actual_N, MPI_INT, HOST, APcomm);
        
        MPI_Bcast(&cur_proc_num, 1, MPI_INT, HOST, MPI_COMM_WORLD);
        MPI_Bcast(&clu_num, 1, MPI_INT, HOST, MPI_COMM_WORLD);
        if(myrank!=HOST)
          clu_stack=calloc(clu_num, sizeof(int[2]));
        MPI_Bcast(clu_stack, clu_num*2, MPI_INT, HOST, MPI_COMM_WORLD);

        source=clu_stack[0][0];
        dest=clu_stack[0][1];
        for(rep_count=0; rep_count<REPS; rep_count++) {
          for(k=0, block_size=MPC_SIZE_OF_DATA_PACK;
              k<MPC_TRANSFER_DATA_RANGE; k++, block_size*=MPC_SIZE_OF_DATA_PACK)
            {
              buff=malloc(block_size);
              MPI_Barrier(MPI_COMM_WORLD);
              if(source!=dest) {
                if(myrank==source)
                  {
                    start=MPI_Wtime();
                    MPI_Send(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD);
                    MPI_Recv(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD, &status);
                    end=MPI_Wtime();
                    speeds[rep_count][k]=2.0*block_size/(end-start);
                  }
                if(myrank==dest)
                  {
                    MPI_Recv(buff,block_size,MPI_CHAR,source,0,MPI_COMM_WORLD, &status);
                    MPI_Send(buff,block_size,MPI_CHAR,source,0,MPI_COMM_WORLD);
                  }
              }
              else {
                char *bufff;
                bufff=malloc(block_size);
                start=MPI_Wtime();
                memcpy(bufff,buff,block_size);
                end=MPI_Wtime();
                speeds[rep_count][k]=block_size/(end-start);
                free(bufff);              
              }
              free(buff);
            }
          MPI_Bcast(speeds[rep_count], MPC_TRANSFER_DATA_RANGE, MPI_DOUBLE,
                    source, MPI_COMM_WORLD);
        }
        if(APcomm!=MPI_COMM_NULL) {
          for(rep_count=1; rep_count<REPS; rep_count++)
            for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
              speeds[0][k]+=speeds[rep_count][k];
          for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
            speeds[0][k]/=REPS;
          
          for(k=0, block_size=MPC_SIZE_OF_DATA_PACK; k<MPC_TRANSFER_DATA_RANGE;
              k++, block_size*=MPC_SIZE_OF_DATA_PACK)
            fprintf(foutput, "c%d%c", (int)(speeds[0][k]),
                    (k!=MPC_TRANSFER_DATA_RANGE-1)?' ':'\n');
        }
        for(j=1, i=0; j<clu_num; j++)
          if(clu_stack[j][0]==LEFT_BRACE) {
            if(APcomm!=MPI_COMM_NULL)
              fprintf(foutput, "{\n");
            j++;
            source=clu_stack[j][0];
            dest=clu_stack[j][1];
            for(rep_count=0; rep_count<REPS; rep_count++) {
              for(k=0, block_size=MPC_SIZE_OF_DATA_PACK;
                  k<MPC_TRANSFER_DATA_RANGE; k++, block_size*=MPC_SIZE_OF_DATA_PACK)
                {
                  buff=malloc(block_size);
                  MPI_Barrier(MPI_COMM_WORLD);
                  if(source!=dest) {
                    if(myrank==source)
                      {
                        start=MPI_Wtime();
                        MPI_Send(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD);
                        MPI_Recv(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD,
                                 &status);
                        end=MPI_Wtime();
                        speeds[rep_count][k]=2.0*block_size/(end-start);
                      }
                    if(myrank==dest)
                      {
                        MPI_Recv(buff,block_size,MPI_CHAR,source,0,
                                 MPI_COMM_WORLD, &status);
                        MPI_Send(buff,block_size,MPI_CHAR,source,0,MPI_COMM_WORLD);
                      }
                  }
                  else {
                    char *bufff;
                    bufff=malloc(block_size);
                    start=MPI_Wtime();
                    memcpy(bufff,buff,block_size);
                    end=MPI_Wtime();
                    speeds[rep_count][k]=block_size/(end-start);
                    free(bufff);              
                  }
                  free(buff);
                }
              MPI_Bcast(speeds[rep_count], MPC_TRANSFER_DATA_RANGE, MPI_DOUBLE,
                        source, MPI_COMM_WORLD);
            }
            if(APcomm!=MPI_COMM_NULL) {
              for(rep_count=1; rep_count<REPS; rep_count++)
                for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
                  speeds[0][k]+=speeds[rep_count][k];
              for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
                speeds[0][k]/=REPS;
              
              for(k=0, block_size=MPC_SIZE_OF_DATA_PACK; k<MPC_TRANSFER_DATA_RANGE;
                  k++, block_size*=MPC_SIZE_OF_DATA_PACK)
                fprintf(foutput, "c%d%c", (int)(speeds[0][k]),
                        (k!=MPC_TRANSFER_DATA_RANGE-1)?' ':'\n');
            }
          }
          else if(clu_stack[j][0]==RIGHT_BRACE) {
            if(APcomm!=MPI_COMM_NULL)
              fprintf(foutput, "}\n");
          }
          else {
            if(APcomm!=MPI_COMM_NULL)
              {
                fprintf(foutput, "#%s\ns%d p%d n%d ", nodenames[basic_ranks[i]],
                        num_of_processors[i]?num_of_processors[i]:nearest(numps[i]*processors[i].scalability),
                        (int)(processors[i].speed)+1,
                        numps[i]);
              }
            i++;
            source=clu_stack[j][0];
            dest=clu_stack[j][1];
            for(rep_count=0; rep_count<REPS; rep_count++) {
              for(k=0, block_size=MPC_SIZE_OF_DATA_PACK;
                  k<MPC_TRANSFER_DATA_RANGE; k++, block_size*=MPC_SIZE_OF_DATA_PACK)
                {
                  buff=malloc(block_size);
                  MPI_Barrier(MPI_COMM_WORLD);
                  if(source!=dest) {
                    if(myrank==source)
                      {
                        start=MPI_Wtime();
                        MPI_Send(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD);
                        MPI_Recv(buff,block_size,MPI_CHAR,dest,0,MPI_COMM_WORLD,
                                 &status);
                        end=MPI_Wtime();
                        speeds[rep_count][k]=2.0*block_size/(end-start);
                      }
                    if(myrank==dest)
                      {
                        MPI_Recv(buff,block_size,MPI_CHAR,source,0,
                                 MPI_COMM_WORLD, &status);
                        MPI_Send(buff,block_size,MPI_CHAR,source,0,MPI_COMM_WORLD);
                      }
                  }
                  else {
                    char *bufff;
                    bufff=malloc(block_size);
                    start=MPI_Wtime();
                    memcpy(bufff,buff,block_size);
                    end=MPI_Wtime();
                    speeds[rep_count][k]=block_size/(end-start);
                    free(bufff);              
                  }
                  free(buff);
                }
              MPI_Bcast(speeds[rep_count], MPC_TRANSFER_DATA_RANGE, MPI_DOUBLE,
                        source, MPI_COMM_WORLD);
            }
            if(APcomm!=MPI_COMM_NULL) {
              for(rep_count=1; rep_count<REPS; rep_count++)
                for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
                  speeds[0][k]+=speeds[rep_count][k];
              for(k=0; k<MPC_TRANSFER_DATA_RANGE; k++)
                speeds[0][k]/=REPS;
              
              for(k=0, block_size=MPC_SIZE_OF_DATA_PACK; k<MPC_TRANSFER_DATA_RANGE;
                  k++, block_size*=MPC_SIZE_OF_DATA_PACK)
                fprintf(foutput, "c%d%c", (int)(speeds[0][k]),
                        (k!=MPC_TRANSFER_DATA_RANGE-1)?' ':'\n');
            }
          }
        if(APcomm!=MPI_COMM_NULL) {
          free(numps);
          free(processors);
          free(nodenames);
          fclose(foutput);
        }
      }
    }
    free(basic_ranks);
    free(clu_stack);
    IF_HOST printf("mpccreate: parallel machine \'%s\' is created.\n", argv[1]);
    MPI_Finalize();
    exit(0);
}
      
int nearest(double x)
{
  int low, hi;
  
  low=(int)x;
  hi=low+1;
  if(x-low<hi-x)
    return low;
  else
    return hi;
}
