NimbleSM
NimbleSM is a solid mechanics simulation code for dynamic systems
Loading...
Searching...
No Matches
nimble.mpi.rank_clique_reducer.h
Go to the documentation of this file.
1/*
2//@HEADER
3// ************************************************************************
4//
5// NimbleSM
6// Copyright 2018
7// National Technology & Engineering Solutions of Sandia, LLC (NTESS)
8//
9// Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government
10// retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY EXPRESS OR IMPLIED
28// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
30// NO EVENT SHALL NTESS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
32// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact David Littlewood (djlittl@sandia.gov)
39//
40// ************************************************************************
41//@HEADER
42*/
43
44#ifndef NIMBLE_MPI_RANK_CLIQUE_REDUCER
45#define NIMBLE_MPI_RANK_CLIQUE_REDUCER
46
47#include <mpi.h>
48
49#include <algorithm>
50#include <memory>
51#include <stdexcept>
52#include <vector>
53
54#include "nimble_macros.h"
58
59namespace nimble {
61{
62 std::unique_ptr<int[]> indices;
63 std::unique_ptr<double[]> sendbuffer;
64 std::unique_ptr<double[]> recvbuffer;
67 MPI_Comm clique_comm;
68 MPI_Request Iallreduce_request;
70
72 /* README
73 All processors calling MPI_Comm_split with the same comm_color will
74 be grouped into the same communicator group. This means that this
75 ReductionClique_t will share a clique_comm with all the other
76 ReductionClique_ts created with the given comm_color. The way this works is
77 that if a set of ranks R all share a set of nodes N (so that every rank in R
78 has every node in N), all ranks in R will be grouped into a single MPI_Comm
79 (identified by the comm_color). The bookkeeping for this new MPI_Comm is
80 stored in a ReductionClique_t struct, along with the bookkeeping necessary to
81 call Allreduce to reduce the data for all the nodes in N. See
82 http://mpitutorial.com/tutorials/introduction-to-groups-and-communicators/ for
83 a reference
84 */
85 ReductionClique_t(const std::vector<int>& index_list, int buffersize, int comm_color, const mpicontext& context)
86 : indices{new int[index_list.size()]},
87 sendbuffer{new double[buffersize]},
88 recvbuffer{new double[buffersize]},
90 n_indices{(int)index_list.size()},
91 // Actual instantiation of clique_comm is performed by MPI_Comm_split
93 {
94 std::copy_n(index_list.data(), index_list.size(), indices.get());
95 // printf("rank %d: Splitting %d with comm color %d\n", context.get_rank(),
96 // context.get_comm(), comm_color);
97 MPI_Comm_split(context.get_comm(), comm_color, context.get_rank(), &clique_comm);
98 }
99 ReductionClique_t(const std::vector<int>& index_list, int buffersize, MPI_Comm clique_comm)
100 : indices{new int[index_list.size()]},
101 sendbuffer{new double[buffersize]},
102 recvbuffer{new double[buffersize]},
104 n_indices{(int)index_list.size()},
106 {
107 std::copy_n(index_list.data(), index_list.size(), indices.get());
108 }
111 operator=(ReductionClique_t&& source) = default;
112 bool
113 okayfieldsizeQ(int field_size)
114 {
115 return field_size * n_indices <= buffersize;
116 }
117 void
118 fitnewfieldsize(int new_field_size)
119 {
120 resizebuffer(n_indices * new_field_size);
121 }
122 void
123 resizebuffer(int newbuffersize)
124 {
125 sendbuffer.reset(new double[newbuffersize]);
126 recvbuffer.reset(new double[newbuffersize]);
127 buffersize = newbuffersize;
128 }
129
130 template <int field_size>
131 void
132 pack(double* source)
133 {
134 double* destscan = sendbuffer.get();
135 int * index_ptr = indices.get(), *index_ptr_end = index_ptr + n_indices;
136 for (; index_ptr < index_ptr_end; ++index_ptr) {
137 double* sourcescan = source + (*index_ptr) * field_size;
138 for (int j = 0; j < field_size; ++j) {
139 *destscan = *sourcescan;
140 ++destscan;
141 ++sourcescan;
142 }
143 }
144 }
145 template <int field_size>
146 void
147 unpack(double* dest)
148 {
149 double* sourcescan = recvbuffer.get();
150 int * index_ptr = indices.get(), *index_ptr_end = index_ptr + n_indices;
151 for (; index_ptr < index_ptr_end; ++index_ptr) {
152 double* destscan = dest + (*index_ptr) * field_size;
153 for (int j = 0; j < field_size; ++j) {
154 *destscan = *sourcescan;
155 ++destscan;
156 ++sourcescan;
157 }
158 }
159 }
160 template <int field_size, class Lookup>
161 void
162 pack(Lookup& source)
163 {
164 double* destscan = sendbuffer.get();
165
166 for (int index : quanta::arrayview_t<int>(indices.get(), n_indices)) {
167 for (int j = 0; j < field_size; ++j) { *destscan++ = source(index, j); }
168 }
169 }
170 template <int field_size, class Lookup>
171 void
172 unpack(Lookup& dest)
173 {
174 double* sourcescan = recvbuffer.get();
175 for (int index : quanta::arrayview_t<int>(indices.get(), n_indices)) {
176 for (int j = 0; j < field_size; ++j) { dest(index, j) = *sourcescan++; }
177 }
178 }
179 void
180 EnsureDataSafety(int field_size)
181 {
182 if (!okayfieldsizeQ(field_size)) { throw std::invalid_argument("Field size too big"); }
183 }
184 // Performs a blocking Allreduce
185 template <int field_size, class Lookup>
186 void
187 reduce(Lookup&& data)
188 {
189 EnsureDataSafety(field_size);
190 pack<field_size>(data);
191 MPI_Allreduce(sendbuffer.get(), recvbuffer.get(), n_indices * field_size, MPI_DOUBLE, MPI_SUM, clique_comm);
192 unpack<field_size>(data);
193 }
194 // Copies data to sendbuffer and starts an asynchronous allreduce operation.
195 // asyncreduce_finalize must be called for the data to be copied from the
196 // recvbuffer to the databuffer
197 template <int field_size, class Lookup>
198 void
199 asyncreduce_initialize(Lookup&& databuffer)
200 {
203 "asyncreduce_initialize(data) was called "
204 "when an active asynchronous reduce already exists");
205 }
206
207 EnsureDataSafety(field_size);
208 pack<field_size>(databuffer);
209 MPI_Iallreduce(
210 sendbuffer.get(),
211 recvbuffer.get(),
212 n_indices * field_size,
213 MPI_DOUBLE,
214 MPI_SUM,
218 }
219 // Returns true if the currently active asynchronous reduce request has
220 // completed Returns false if the currently active asynchronous reduce
221 // request hasn't completed Throws an exception if there's no currently
222 // active asynchronous reduce request
223 bool
225 {
227 int flag = 0;
228 MPI_Test(&Iallreduce_request, &flag, MPI_STATUS_IGNORE);
229 // flag is set to true if the allreduce has completed
230 return flag != 0;
231 } else {
233 "Iallreduce_completedQ() was called "
234 "without an active asynchronous reduce request.");
235 }
236 }
237 // Returns true and unpacks data if the currently active asynchronous reduce
238 // request has completed Returns false if the currently active asynchronous
239 // reduce request hasn't completed Throws an exception if there's no
240 // currently active asynchronous reduce request
241 template <int field_size, class Lookup>
242 bool
243 asyncreduce_finalize(Lookup&& databuffer)
244 {
246 if (Iallreduce_completedQ()) {
247 unpack<field_size>(databuffer);
249 return true;
250 } else {
251 return false;
252 }
253 } else {
255 "asyncreduce_finalize(data) was called "
256 "without an active asynchronous reduce request.");
257 }
258 }
259 int
261 {
262 return n_indices;
263 }
264 int const*
266 {
267 return indices.get();
268 }
269 std::vector<int>
271 {
272 int my_mpi_comm_world_rank;
273 MPI_Comm_rank(MPI_COMM_WORLD, &my_mpi_comm_world_rank);
274 int clique_comm_size;
275 MPI_Comm_size(clique_comm, &clique_comm_size);
276 std::vector<int> mpi_comm_world_ranks(clique_comm_size);
277 MPI_Allgather(&my_mpi_comm_world_rank, 1, MPI_INT, mpi_comm_world_ranks.data(), 1, MPI_INT, clique_comm);
278 std::sort(mpi_comm_world_ranks.begin(), mpi_comm_world_ranks.end());
279 return mpi_comm_world_ranks;
280 }
281};
282} // namespace nimble
283
284#endif // NIMBLE_MPI_RANK_CLIQUE_REDUCER
Definition nimble.mpi.mpicontext.h:63
const MPI_Comm & get_comm() const
Definition nimble.mpi.mpicontext.h:85
Definition nimble.quanta.arrayview.h:56
Definition kokkos_contact_manager.h:49
#define NIMBLE_ABORT(...)
Definition nimble_macros.h:87
MPI_Request Iallreduce_request
Definition nimble.mpi.rank_clique_reducer.h:68
void resizebuffer(int newbuffersize)
Definition nimble.mpi.rank_clique_reducer.h:123
int GetNumIndices()
Definition nimble.mpi.rank_clique_reducer.h:260
bool Iallreduce_completedQ()
Definition nimble.mpi.rank_clique_reducer.h:224
void reduce(Lookup &&data)
Definition nimble.mpi.rank_clique_reducer.h:187
int n_indices
Definition nimble.mpi.rank_clique_reducer.h:66
std::unique_ptr< int[]> indices
Definition nimble.mpi.rank_clique_reducer.h:62
ReductionClique_t(const std::vector< int > &index_list, int buffersize, int comm_color, const mpicontext &context)
Definition nimble.mpi.rank_clique_reducer.h:85
std::unique_ptr< double[]> recvbuffer
Definition nimble.mpi.rank_clique_reducer.h:64
void asyncreduce_initialize(Lookup &&databuffer)
Definition nimble.mpi.rank_clique_reducer.h:199
ReductionClique_t(const std::vector< int > &index_list, int buffersize, MPI_Comm clique_comm)
Definition nimble.mpi.rank_clique_reducer.h:99
MPI_Comm clique_comm
Definition nimble.mpi.rank_clique_reducer.h:67
void fitnewfieldsize(int new_field_size)
Definition nimble.mpi.rank_clique_reducer.h:118
bool asyncreduce_finalize(Lookup &&databuffer)
Definition nimble.mpi.rank_clique_reducer.h:243
void pack(Lookup &source)
Definition nimble.mpi.rank_clique_reducer.h:162
void unpack(Lookup &dest)
Definition nimble.mpi.rank_clique_reducer.h:172
ReductionClique_t & operator=(ReductionClique_t &&source)=default
std::unique_ptr< double[]> sendbuffer
Definition nimble.mpi.rank_clique_reducer.h:63
int buffersize
Definition nimble.mpi.rank_clique_reducer.h:65
bool okayfieldsizeQ(int field_size)
Definition nimble.mpi.rank_clique_reducer.h:113
int const * GetIndices()
Definition nimble.mpi.rank_clique_reducer.h:265
bool exists_active_asyncreduce_request
Definition nimble.mpi.rank_clique_reducer.h:69
void unpack(double *dest)
Definition nimble.mpi.rank_clique_reducer.h:147
ReductionClique_t(ReductionClique_t &&source)=default
void EnsureDataSafety(int field_size)
Definition nimble.mpi.rank_clique_reducer.h:180
void pack(double *source)
Definition nimble.mpi.rank_clique_reducer.h:132
ReductionClique_t()
Definition nimble.mpi.rank_clique_reducer.h:71
std::vector< int > GetMPICommWorldRanks()
Definition nimble.mpi.rank_clique_reducer.h:270