#include "config.h"
#include "fupermod/fupermod.h"
#include "libjacobi.h"
#include <unistd.h>
#include <time.h>
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#define ZERO 1e-13
typedef enum fupermod_algorithm {
partial,
full,
constant1,
constant2,
homogeneous,
manual
} fupermod_algorithm;
void fupermod_balance_init(MPI_Comm comm, int root, fupermod_algorithm algorithm, int D, char* info);
void fupermod_balance_finalise(int size, fupermod_model** models);
FILE* balance_times = NULL;
int iter;
int balance;
int continue_balancing = 1;
double threshold;
fupermod_algorithm algorithm = -1;
int D;
struct timeval total_start;
void fupermod_balance_init(MPI_Comm comm, int root, fupermod_algorithm _algorithm, int _D, char* info) {
algorithm = _algorithm;
D = _D;
int rank;
MPI_Comm_rank(comm, &rank);
int size;
MPI_Comm_size(comm, &size);
char hostname[MPI_MAX_PROCESSOR_NAME];
int len;
MPI_Get_processor_name(hostname, &len);
char* hostnames = (rank == root ? (char*)malloc(sizeof(char) * size * MPI_MAX_PROCESSOR_NAME) : NULL);
MPI_Gather(hostname, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, root, comm);
if (rank == root) {
char filename[1024];
char sizes[16];
if (size <= 9)
sprintf(sizes, "0%d", size);
else
sprintf(sizes, "%d", size);
sprintf(filename, "balance_times.%d.%d.%s.dat", algorithm, D, sizes);
balance_times = fopen(filename, "w");
time_t now = time(NULL);
struct tm *tm_now;
char datetime[25];
tm_now = localtime(&now);
strftime(datetime, sizeof datetime, "%d-%m-%y %H:%M", tm_now);
iter = 0;
balance = 0;
fprintf(balance_times, "#Fupermod Balance iteration times. Num machines:%d\tAt:%s\n", size, datetime);
fprintf(balance_times, "# %s \n", info);
fprintf(balance_times, "#host\t");
int i;
for (i = 0; i < size; i++) {
fprintf(balance_times, " \t%12s\t", (hostnames + i * MPI_MAX_PROCESSOR_NAME));
}
fprintf(balance_times, "\n");
fprintf(balance_times, "#itt \t");
for (i = 0; i < size; i++) {
fprintf(balance_times, "dist%4d\ttime%3d \t", i, i);
}
fprintf(balance_times, "\n");
free(hostnames); hostnames = NULL;
gettimeofday(&total_start, NULL);
}
}
void fupermod_balance_finalise(int size, fupermod_model** models) {
struct timeval end;
gettimeofday(&end, NULL);
double time = ((end.tv_sec + end.tv_usec / 1000000.) - (total_start.tv_sec + total_start.tv_usec / 1000000.));
fprintf(balance_times, "#total time: %le\n", time);
fclose(balance_times);
char filename[1024];
int i;
for (i = 0; i < size; i++){
sprintf(filename, "datapoints.%d.%d.%d.dat", algorithm, D, i);
FILE* datapoints = fopen(filename, "w");
fprintf(datapoints, "# Datapoints for rank:%d, algorithm:%d problem size:%d\n", i, algorithm, D);
fprintf(datapoints, "#d\ttime\tspeed\n");
int j, d;
double t;
for (j = 0; j < models[i]->data->count; j++){
d = models[i]->data->points[j].d;
t = models[i]->data->points[j].t;
fprintf(datapoints, "%d\t%le\t%le\n", d, t, models[i]->complexity(d) / t);
}
fclose(datapoints);
}
}
int main(int argc, char **argv){
MPI_Init(&argc,&argv);
MPI_Comm comm = MPI_COMM_WORLD;
int rank;
MPI_Comm_rank(comm, &rank);
int size;
MPI_Comm_size(comm, &size);
int root = 0;
int seed = 2;
int D = 100;
int max_itt = 20;
fupermod_algorithm algorithm = 0;
double threshold = 0.01;
int verbose = 0;
int exit = 0;
if(rank == root){
int ret;
while ((ret = getopt(argc, argv, "hD:i:a:vt:")) >=0) {
switch(ret) {
case 'h':
printf("jacobi.c help\n"
""
"Description: Jacobi method for solving a system of liner "
"equations on a heterogeneous cluster. \n"
"\n"
"Usage:\n"
"-D I Size of matrix\n"
"-i I Number of Iterations\n"
"-a I Balancing algorithm. (default: %d)\n"
"\t\t0: Partial functional performance model\n"
"\t\t1: Full functional performance model\n"
"\t\t2: Constant performance model 1 (small benchmark)\n"
"\t\t3: Constant performance model 2 (homogeneous benchmark)\n"
"\t\t4: Homogeneous distribution\n"
"\t\t5: Manual distribution\n"
"-t D Relative threshold between 0 and 1, above which balancing is done\n"
"-v I Verbose mode\n"
, algorithm);
exit = 1;
break;
case 'D':
D = atoi(optarg);
break;
case 'i':
max_itt = atoi(optarg);
break;
case 'a':
algorithm = atoi(optarg);
break;
case 'v':
verbose=1;
break;
case 't':
threshold = atof(optarg);
break;
}
}
}
MPI_Bcast(&exit, 1, MPI_INT, root, comm);
if (exit) {
MPI_Finalize();
return 0;
}
MPI_Bcast(&D, 1, MPI_INT, root, comm);
MPI_Bcast(&max_itt, 1, MPI_INT, root, comm);
MPI_Bcast(&algorithm, 1, MPI_INT, root, comm);
MPI_Bcast(&threshold, 1, MPI_DOUBLE, root, comm);
if (D < size){
fprintf(stderr,"Error!\nMatrix smaller then number of processors\n");
MPI_Finalize();
return FUPERMOD_FAIL;
}
fupermod_dist* distr = ((rank == root) ? fupermod_dist_alloc(size, D) : NULL);
int* d = NULL;
int* old_d = (int *)malloc(size * sizeof(int));
int* offset = (int *)malloc(size * sizeof(int));
jacobi_set_D(D);
fupermod_data** data = (fupermod_data**)malloc(sizeof(fupermod_data*) * size);
fupermod_model** models = (fupermod_model**)malloc(sizeof(fupermod_model*) * size);
int i;
for (i = 0; i < size; i++) {
data[i] = fupermod_data_alloc();
models[i] = fupermod_model_interp_alloc(data[i], jacobi_complexity, D, 0);
}
double *a, *b, *x;
char info[1024];
sprintf(info, "Jacobi. D:%d threshold:%le iterations:%d algorithm:%d", D, threshold, max_itt, algorithm);
fupermod_balance_init(comm, root, algorithm, D, info);
int itt;
for (itt = 0; itt < max_itt; itt++){
if (rank == root)
fprintf(stderr, "P%d starting itt: %d\n", rank, itt);
double diff = 0.0;
MPI_Bcast(d, size, MPI_INT, root, comm);
int i;
for (i=0; i<size; i++) {
offset[i] = i == 0 ? 0 : (offset[i-1] + d[i-1]);
}
if (itt == 0) {
jacobi_fill_matrix(&a, &b, &x, D, d[rank], offset[rank], seed, rank);
MPI_Bcast(b, D, MPI_DOUBLE, root, comm);
} else {
a = jacobi_redistribute(comm, a, D, old_d, d);
}
memcpy(old_d, d, sizeof(int) * size);
fupermod_dynamic balancer = {
fupermod_partition_multiroot,
size,
models,
distr
};
struct timeval start;
gettimeofday(&start, NULL);
double* new_x = jacobi_compute(a, b, x, D, d[rank], offset[rank], &diff);
fupermod_balancer_iterate(&balancer, comm, root, start);
if(verbose) printf("P%d x before: %f %f %f %f\n",rank, x[0], x[1], x[2], x[3]);
MPI_Allgatherv(new_x, old_d[rank], MPI_DOUBLE, x, old_d, offset, MPI_DOUBLE, comm);
if(verbose) printf("P%d x after: %f %f %f %f\n",rank, x[0], x[1], x[2], x[3]);
free(new_x);
if (rank == root && verbose > 0) {
fprintf(stderr, "dist: ");
for (i=0; i<size; i++) {
fprintf(stderr, "%d ", d[i]);
}
fprintf(stderr, "\n");
}
}
if (rank == root)
fupermod_balance_finalise(size, models);
jacobi_final_test(comm, a, x, b, D, old_d, offset);
fupermod_dist_free(distr);
if (rank != root) {
free(d); d = NULL;
}
free(x);
free(b);
free(a);
MPI_Finalize();
return FUPERMOD_SUCCESS;
}