MPIBlib: MPI Benchmark library
Function templates for tree-based algorithms of MPI collective operations
Functions | |
template<typename Builder > | |
int | MPIB_Bcast_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *buffer, int count, MPI_Datatype datatype, int root, MPI_Comm comm) |
template<typename Builder > | |
int | MPIB_Reduce_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm) |
template<typename Builder > | |
int | MPIB_Scatter_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) |
template<typename Builder > | |
int | MPIB_Gather_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) |
template<typename Builder > | |
int | MPIB_Scatterv_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int *sendcounts, int *displs, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm _comm) |
template<typename Builder > | |
int | MPIB_Gatherv_tree_algorithm (Builder builder, MPIB_child_traverse_order order, void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int *recvcounts, int *displs, MPI_Datatype recvtype, int root, MPI_Comm _comm) |
Detailed Description
In addition to the standard arguments of an MPI collective operation, a function template has the communication tree builder and the order arguments:
template <typename Builder> MPIB_X_tree_algorithm(Builder builder, order args, standard args);
For example, MPIB_Scatter_tree_algorithm, a base tree algorithm of scatter. In base tree-based algorithm, all point-to-point communications are performed over the communication tree built by the builder in order given by the order argument.
Usually, the communication tree is built at all processors independently. If the communication tree can be built only at a designated processor, it must then be sent to other processes along with the data. The Serialization Boost C++ library is used for serialization/deserialization of the communication tree/subtrees in such tree-based algorithms:
#include <boost/graph/adj_list_serialize.hpp> #include <boost/archive/binary_oarchive.hpp> #include <boost/archive/binary_iarchive.hpp> #include <sstream> Graph graph; if (rank == root) { ostringstream oss; archive::binary_oarchive ar(oss); ar << graph; int length = oss.str().length(); MPI_Send((void*)oss.str().c_str(), length, MPI_CHAR, dest, 1, comm); } if (rank == dest) { MPI_Status status; MPI_Probe(root, 1, comm, &status); int length; MPI_Get_count(&status, MPI_CHAR, &length); buffer = (char*)malloc(sizeof(char) * length); MPI_Recv(buffer, length, MPI_CHAR, root, 1, comm, MPI_STATUS_IGNORE); istringstream iss(string(buffer, length)); archive::binary_iarchive ar(iss); ar >> graph; free(buffer); }
The internal part of the tree-based implementation includes the following auxiliaries united in namespaces in order to avoid duplicates:
- Tree visitors traverse communication tree, for example, in order to assemble the data buffer to send or receive:
class Visitor { public: Visitor(args) {...} void preorder(Vertex vertex, Tree& tree) {...} void inorder(Vertex vertex, Tree& tree) {...} void postorder(Vertex vertex, Tree& tree) {...} };
- Note:
- There may be many pointer or reference arguments in the visitor's constructor because visitors are copied by value.
- Property writers print vertex, edge and graph properties during the output of the communication tree:
class Vertex_writer { public: void operator()(std::ostream& out, const Vertex& v) const { out << "[label=\"" << ... << "\"]"; } }; class Edge_writer { public: void operator()(std::ostream& out, const Edge& e) const { out << "[label=\"" << ... << "\"]"; } }; class Graph_writer { public: void operator()(std::ostream& out) const { out << "graph [...]\n"; out << "node [...]\n"; out << "edge [...]\n"; } }; write_graphviz(cout, graph, Vertex_writer(), Edge_writer(), Graph_writer());
- Note:
- Default writers are called when the last three arguments omitted.
Function Documentation
int MPIB_Bcast_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | buffer, | |||
int | count, | |||
MPI_Datatype | datatype, | |||
int | root, | |||
MPI_Comm | comm | |||
) |
Base tree algorithm of bcast
int MPIB_Reduce_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | sendbuf, | |||
void * | recvbuf, | |||
int | count, | |||
MPI_Datatype | datatype, | |||
MPI_Op | op, | |||
int | root, | |||
MPI_Comm | comm | |||
) |
Base tree algorithm of reduce.
- Note:
- Does not perform MPI operations but allocates memory for subproduct. MPI internals should be used. For example, Open MPI: TODO: implement MPI operation in the reduce tree algorithm
#ifdef HAVE_OPENMPI_OMPI_OP_OP_H #include <openmpi/ompi/op/op.h> #endif ... int MPIB_Reduce_tree_algorithm(...) { ... #ifdef HAVE_OPENMPI_OMPI_OP_OP_H ompi_op_reduce(op, buffer, sendbuf, count, datatype); #endif ... }
int MPIB_Scatter_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | sendbuf, | |||
int | sendcount, | |||
MPI_Datatype | sendtype, | |||
void * | recvbuf, | |||
int | recvcount, | |||
MPI_Datatype | recvtype, | |||
int | root, | |||
MPI_Comm | comm | |||
) |
Base tree algorithm of scatter
int MPIB_Gather_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | sendbuf, | |||
int | sendcount, | |||
MPI_Datatype | sendtype, | |||
void * | recvbuf, | |||
int | recvcount, | |||
MPI_Datatype | recvtype, | |||
int | root, | |||
MPI_Comm | comm | |||
) |
Base tree algorithm of gather
int MPIB_Scatterv_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | sendbuf, | |||
int * | sendcounts, | |||
int * | displs, | |||
MPI_Datatype | sendtype, | |||
void * | recvbuf, | |||
int | recvcount, | |||
MPI_Datatype | recvtype, | |||
int | root, | |||
MPI_Comm | _comm | |||
) |
Base tree algorithm of scatterv
int MPIB_Gatherv_tree_algorithm | ( | Builder | builder, | |
MPIB_child_traverse_order | order, | |||
void * | sendbuf, | |||
int | sendcount, | |||
MPI_Datatype | sendtype, | |||
void * | recvbuf, | |||
int * | recvcounts, | |||
int * | displs, | |||
MPI_Datatype | recvtype, | |||
int | root, | |||
MPI_Comm | _comm | |||
) |
Base tree algorithm of gatherv