MPIBlib: MPI Benchmark library

Function templates for tree-based algorithms of MPI collective operations

Functions

template<typename Builder >
int MPIB_Bcast_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm)
template<typename Builder >
int MPIB_Reduce_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm)
template<typename Builder >
int MPIB_Scatter_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
template<typename Builder >
int MPIB_Gather_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm)
template<typename Builder >
int MPIB_Scatterv_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm _comm)
template<typename Builder >
int MPIB_Gatherv_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm _comm)

Detailed Description

In addition to the standard arguments of an MPI collective operation, a function template has the communication tree builder and the order arguments:

template <typename Builder>
MPIB_X_tree_algorithm(Builder builder, order args, standard args);

For example, MPIB_Scatter_tree_algorithm, a base tree algorithm of scatter. In base tree-based algorithm, all point-to-point communications are performed over the communication tree built by the builder in order given by the order argument.

Usually, the communication tree is built at all processors independently. If the communication tree can be built only at a designated processor, it must then be sent to other processes along with the data. The Serialization Boost C++ library is used for serialization/deserialization of the communication tree/subtrees in such tree-based algorithms:

#include <boost/graph/adj_list_serialize.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <sstream>

Graph graph;

if (rank == root) {
    ostringstream oss;
    archive::binary_oarchive ar(oss);
    ar << graph;
    int length = oss.str().length();
    MPI_Send((void*)oss.str().c_str(), length, MPI_CHAR, dest, 1, comm);
}

if (rank == dest) {
    MPI_Status status;
    MPI_Probe(root, 1, comm, &status);
    int length;
    MPI_Get_count(&status, MPI_CHAR, &length);
    buffer = (char*)malloc(sizeof(char) * length);
    MPI_Recv(buffer, length, MPI_CHAR, root, 1, comm, MPI_STATUS_IGNORE);
    istringstream iss(string(buffer, length));
    archive::binary_iarchive ar(iss);
    ar >> graph;
    free(buffer);
}

The internal part of the tree-based implementation includes the following auxiliaries united in namespaces in order to avoid duplicates:

  • Tree visitors traverse communication tree, for example, in order to assemble the data buffer to send or receive:
    class Visitor {
    public:
        Visitor(args) {...}
        void preorder(Vertex vertex, Tree& tree) {...}
        void inorder(Vertex vertex, Tree& tree) {...}
        void postorder(Vertex vertex, Tree& tree) {...}
    };
    
    Note:
    There may be many pointer or reference arguments in the visitor's constructor because visitors are copied by value.
  • Property writers print vertex, edge and graph properties during the output of the communication tree:
    class Vertex_writer {
    public:
        void operator()(std::ostream& out, const Vertex& v) const {
            out << "[label=\"" << ... << "\"]";
        }
    };
    
    class Edge_writer {
    public:
        void operator()(std::ostream& out, const Edge& e) const {
            out << "[label=\"" << ... << "\"]";
        }
    };
    
    class Graph_writer {
    public:
        void operator()(std::ostream& out) const {
            out << "graph [...]\n";
            out << "node [...]\n";
            out << "edge [...]\n";
        }
    };
    
    write_graphviz(cout, graph, Vertex_writer(), Edge_writer(), Graph_writer());
    
    Note:
    Default writers are called when the last three arguments omitted.

Function Documentation

template<typename Builder >
int MPIB_Bcast_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  buffer,
int  count,
MPI_Datatype  datatype,
int  root,
MPI_Comm  comm 
)

Base tree algorithm of bcast

template<typename Builder >
int MPIB_Reduce_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  sendbuf,
void *  recvbuf,
int  count,
MPI_Datatype  datatype,
MPI_Op  op,
int  root,
MPI_Comm  comm 
)

Base tree algorithm of reduce.

Note:
Does not perform MPI operations but allocates memory for subproduct. MPI internals should be used. For example, Open MPI:
 #ifdef HAVE_OPENMPI_OMPI_OP_OP_H
 #include <openmpi/ompi/op/op.h>
 #endif
 ...
 int MPIB_Reduce_tree_algorithm(...) {
    ...
 #ifdef HAVE_OPENMPI_OMPI_OP_OP_H
    ompi_op_reduce(op, buffer, sendbuf, count, datatype);
 #endif
    ...
 }
TODO: implement MPI operation in the reduce tree algorithm
template<typename Builder >
int MPIB_Scatter_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  sendbuf,
int  sendcount,
MPI_Datatype  sendtype,
void *  recvbuf,
int  recvcount,
MPI_Datatype  recvtype,
int  root,
MPI_Comm  comm 
)

Base tree algorithm of scatter

template<typename Builder >
int MPIB_Gather_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  sendbuf,
int  sendcount,
MPI_Datatype  sendtype,
void *  recvbuf,
int  recvcount,
MPI_Datatype  recvtype,
int  root,
MPI_Comm  comm 
)

Base tree algorithm of gather

template<typename Builder >
int MPIB_Scatterv_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  sendbuf,
int *  sendcounts,
int *  displs,
MPI_Datatype  sendtype,
void *  recvbuf,
int  recvcount,
MPI_Datatype  recvtype,
int  root,
MPI_Comm  _comm 
)

Base tree algorithm of scatterv

template<typename Builder >
int MPIB_Gatherv_tree_algorithm ( Builder  builder,
MPIB_child_traverse_order  order,
void *  sendbuf,
int  sendcount,
MPI_Datatype  sendtype,
void *  recvbuf,
int *  recvcounts,
int *  displs,
MPI_Datatype  recvtype,
int  root,
MPI_Comm  _comm 
)

Base tree algorithm of gatherv