StarPU Internal Handbook
starpu_mpi_private.h
Go to the documentation of this file.
1/* StarPU --- Runtime system for heterogeneous multicore architectures.
2 *
3 * Copyright (C) 2010-2025 University of Bordeaux, CNRS (LaBRI UMR 5800), Inria
4 *
5 * StarPU is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or (at
8 * your option) any later version.
9 *
10 * StarPU is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 *
14 * See the GNU Lesser General Public License in COPYING.LGPL for more details.
15 */
16
17#ifndef __STARPU_MPI_PRIVATE_H__
18#define __STARPU_MPI_PRIVATE_H__
19
20#include <starpu.h>
21#include <common/config.h>
22#include <common/uthash.h>
23#include <starpu_mpi.h>
24#include <starpu_mpi_fxt.h>
25#include <common/list.h>
26#include <common/prio_list.h>
28#include <core/simgrid.h>
29
32#ifdef __cplusplus
33extern "C"
34{
35#endif
36
37#ifdef STARPU_SIMGRID
38extern starpu_pthread_wait_t _starpu_mpi_thread_wait;
39extern starpu_pthread_queue_t _starpu_mpi_thread_dontsleep;
40
42{
43 MPI_Request *request;
44 MPI_Status *status;
45 starpu_pthread_queue_t *queue;
46 unsigned *done;
47};
48
49int _starpu_mpi_simgrid_mpi_test(unsigned *done, int *flag);
50void _starpu_mpi_simgrid_wait_req(MPI_Request *request, MPI_Status *status, starpu_pthread_queue_t *queue, unsigned *done);
51#endif
52
53struct _starpu_mpi_req* _starpu_mpi_isend_cache_aware(starpu_data_handle_t data_handle, int dest, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *_arg, int sequential_consistency, int* cache_flag);
54struct _starpu_mpi_req* _starpu_mpi_irecv_cache_aware(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *_arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int* cache_flag);
55
56extern int _starpu_debug_rank;
57char *_starpu_mpi_get_mpi_error_code(int code);
58extern int _starpu_mpi_comm_debug;
59
60#ifdef STARPU_MPI_VERBOSE
61extern int _starpu_debug_level_min;
62extern int _starpu_debug_level_max;
63void _starpu_mpi_set_debug_level_min(int level);
64void _starpu_mpi_set_debug_level_max(int level);
65#endif
66extern int _starpu_mpi_fake_world_size;
67extern int _starpu_mpi_fake_world_rank;
68extern int _starpu_mpi_use_prio;
69extern int _starpu_mpi_nobind;
70extern int _starpu_mpi_thread_cpuid;
71extern int _starpu_mpi_thread_multiple_send;
72extern int _starpu_mpi_use_coop_sends;
73extern int _starpu_mpi_mem_throttle;
74extern int _starpu_mpi_recv_wait_finalize;
75extern int _starpu_mpi_has_cuda;
76extern int _starpu_mpi_cuda_devid;
77void _starpu_mpi_env_init(void);
78
79#ifdef STARPU_NO_ASSERT
80# define STARPU_MPI_ASSERT_MSG(x, msg, ...) do { if (0) { (void) (x); }} while(0)
81#else
82# if defined(__CUDACC__) && defined(STARPU_HAVE_WINDOWS)
83int _starpu_debug_rank;
84# define STARPU_MPI_ASSERT_MSG(x, msg, ...) \
85 do \
86 { \
87 if (STARPU_UNLIKELY(!(x))) \
88 { \
89 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
90 fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); *(int*)NULL = 0; \
91 } \
92 } while(0)
93# else
94# define STARPU_MPI_ASSERT_MSG(x, msg, ...) \
95 do \
96 { \
97 if (STARPU_UNLIKELY(!(x))) \
98 { \
99 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
100 fprintf(stderr, "\n[%d][starpu_mpi][%s][assert failure] " msg "\n\n", _starpu_debug_rank, __starpu_func__, ## __VA_ARGS__); \
101 } \
102 assert(x); \
103 } while(0)
104
105# endif
106#endif
107
108#define _STARPU_MPI_MALLOC(ptr, size) do { ptr = malloc(size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (size)); } while (0)
109#define _STARPU_MPI_CALLOC(ptr, nmemb, size) do { ptr = calloc(nmemb, size); STARPU_MPI_ASSERT_MSG(ptr != NULL, "Cannot allocate %ld bytes\n", (long) (nmemb*size)); } while (0)
110#define _STARPU_MPI_REALLOC(ptr, size) do { void *_new_ptr = realloc(ptr, size); STARPU_MPI_ASSERT_MSG(_new_ptr != NULL, "Cannot reallocate %ld bytes\n", (long) (size)); ptr = _new_ptr; } while (0)
111
112#ifdef STARPU_MPI_VERBOSE
113# define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) \
114 do \
115 { \
116 if (_starpu_mpi_comm_debug) \
117 { \
118 int __size; \
119 char _comm_name[128]; \
120 int _comm_name_len; \
121 int _rank; \
122 starpu_mpi_comm_rank(comm, &_rank); \
123 MPI_Type_size(datatype, &__size); \
124 MPI_Comm_get_name(comm, _comm_name, &_comm_name_len); \
125 fprintf(stderr, "[%d][starpu_mpi] :%d:%s:%d:%d:%ld:%s:%p:%ld:%d:%s:%d\n", _rank, _rank, way, node, tag, utag, _comm_name, ptr, count, __size, __starpu_func__ , __LINE__); \
126 fflush(stderr); \
127 } \
128 } while(0)
129# define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, dest, tag, utag, comm, "-->")
130# define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, source, tag, utag, comm, "<--")
131# define _STARPU_MPI_DEBUG(level, fmt, ...) \
132 do \
133 { \
134 if (!_starpu_silent && _starpu_debug_level_min <= level && level <= _starpu_debug_level_max) \
135 { \
136 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
137 fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__,## __VA_ARGS__); \
138 fflush(stderr); \
139 } \
140 } while(0)
141#else
142# define _STARPU_MPI_COMM_DEBUG(ptr, count, datatype, node, tag, utag, comm, way) do { } while(0)
143# define _STARPU_MPI_COMM_TO_DEBUG(ptr, count, datatype, dest, tag, utag, comm) do { } while(0)
144# define _STARPU_MPI_COMM_FROM_DEBUG(ptr, count, datatype, source, tag, utag, comm) do { } while(0)
145# define _STARPU_MPI_DEBUG(level, fmt, ...) do { } while(0)
146#endif
147
148#define _STARPU_MPI_DISP(fmt, ...) do { if (!_starpu_silent) { \
149 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
150 fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] " fmt , (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \
151 fflush(stderr); }} while(0)
152#define _STARPU_MPI_MSG(fmt, ...) do { if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
153 fprintf(stderr, "[%d][starpu_mpi][%s:%d] " fmt , _starpu_debug_rank, __starpu_func__ , __LINE__ ,## __VA_ARGS__); \
154 fflush(stderr); } while(0)
155
156#ifdef STARPU_MPI_EXTRA_VERBOSE
157# define _STARPU_MPI_LOG_IN() do { if (!_starpu_silent) { \
158 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
159 fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] -->\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__ , __LINE__); \
160 fflush(stderr); }} while(0)
161# define _STARPU_MPI_LOG_OUT() do { if (!_starpu_silent) { \
162 if (_starpu_debug_rank == -1) starpu_mpi_comm_rank(MPI_COMM_WORLD, &_starpu_debug_rank); \
163 fprintf(stderr, "%*s[%d][starpu_mpi][%s:%d] <--\n", (_starpu_debug_rank+1)*4, "", _starpu_debug_rank, __starpu_func__, __LINE__); \
164 fflush(stderr); }} while(0)
165#else
166# define _STARPU_MPI_LOG_IN()
167# define _STARPU_MPI_LOG_OUT()
168#endif
169
170enum _starpu_mpi_request_type
171{
172 SEND_REQ=0,
173 RECV_REQ=1,
174 WAIT_REQ=2,
175 TEST_REQ=3,
176 BARRIER_REQ=4,
177 PROBE_REQ=5,
178 UNKNOWN_REQ=6,
179};
180
182{
183 MPI_Comm comm;
184 int rank;
185};
186
188{
189 struct _starpu_mpi_node node;
190 starpu_mpi_tag_t data_tag;
191};
192
193MULTILIST_CREATE_TYPE(_starpu_mpi_req, coop_sends)
196{
197 starpu_data_handle_t data_handle;
198
200 struct _starpu_mpi_req_multilist_coop_sends reqs;
201 struct _starpu_mpi_data *mpi_data;
202
204 struct _starpu_spinlock lock;
205 struct _starpu_mpi_req **reqs_array;
206 unsigned n;
207 unsigned redirects_sent;
208
209 /* Used to trace dependencies */
210 long pre_sync_jobid;
211};
212
215{
216 int magic;
217 struct _starpu_mpi_node_tag node_tag;
218 char *cache_sent;
219 unsigned int cache_received;
220 unsigned int ft_induced_cache_received:1;
221 unsigned int ft_induced_cache_received_count:1;
222 unsigned int modified:1; // Whether the data has been modified since the registration.
223
227
233
236};
237
238struct _starpu_mpi_data *_starpu_mpi_data_get(starpu_data_handle_t data_handle);
239
241struct _starpu_mpi_req;
244 starpu_data_handle_t data_handle;
245
246 int prio;
247 unsigned node; /* Which StarPU memory node this will read from / write to */
248
250 MPI_Datatype datatype;
251 char *datatype_name;
252 void *ptr;
253 starpu_ssize_t count;
254 int registered_datatype; // = 0: datatype is not predefined by StarPU; = 1: otherwise; initialized with -1
255
256 struct _starpu_mpi_req_backend *backend;
257
259 struct _starpu_mpi_node_tag node_tag;
260 void (*func)(struct _starpu_mpi_req *);
261
262 MPI_Status *status;
263 struct _starpu_mpi_req_multilist_coop_sends coop_sends;
264 struct _starpu_mpi_coop_sends *coop_sends_head;
265
266 int *flag;
267 unsigned sync;
268
271
272 int ret;
273
275 enum _starpu_mpi_request_type request_type;
276
277 unsigned submitted;
278 unsigned completed;
279 unsigned posted;
280
283 void *callback_arg;
284 void (*callback)(void *);
285
286 int sequential_consistency;
287
288 long pre_sync_jobid;
289 long post_sync_jobid;
290
291#ifdef STARPU_SIMGRID
292 MPI_Status status_store;
293 starpu_pthread_queue_t queue;
294 unsigned done;
295#endif
296);
297PRIO_LIST_TYPE(_starpu_mpi_req, prio)
298
299MULTILIST_CREATE_INLINES(struct _starpu_mpi_req, _starpu_mpi_req, coop_sends)
300
301
302void _starpu_mpi_req_willpost(struct _starpu_mpi_req *req);
307
308void _starpu_mpi_isend_irecv_common(struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency);
309
310#if 0
312void _starpu_mpi_coop_sends_build_tree(struct _starpu_mpi_coop_sends *coop_sends);
313#endif
315void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency);
316
325void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data);
326
327/*
328 * Fills post_sync_jobid with the reduction synchronization task jobid
329 */
330void _starpu_mpi_redux_fill_post_sync_jobid(const void * const redux_data_args, long * const post_sync_jobid);
331
332void _starpu_mpi_request_init(struct _starpu_mpi_req **req);
333struct _starpu_mpi_req * _starpu_mpi_request_fill(starpu_data_handle_t data_handle,
334 int srcdst, starpu_mpi_tag_t data_tag, MPI_Comm comm,
335 unsigned detached, unsigned sync, int prio, void (*callback)(void *), void *arg,
336 enum _starpu_mpi_request_type request_type, void (*func)(struct _starpu_mpi_req *),
337 int sequential_consistency,
338 int is_internal_req,
339 starpu_ssize_t count);
340
341void _starpu_mpi_request_destroy(struct _starpu_mpi_req *req);
342
343char *_starpu_mpi_request_type(enum _starpu_mpi_request_type request_type);
344
345struct _starpu_mpi_req *_starpu_mpi_irecv_common(starpu_data_handle_t data_handle, int source, starpu_mpi_tag_t data_tag, MPI_Comm comm, unsigned detached, unsigned sync, void (*callback)(void *), void *arg, int sequential_consistency, int is_internal_req, starpu_ssize_t count, int prio);
346
347int _starpu_mpi_choose_node(starpu_data_handle_t data_handle, enum starpu_data_access_mode mode);
348
349void _starpu_mpi_data_flush(starpu_data_handle_t data_handle);
350
353
355{
356 int initialize_mpi;
357 int *argc;
358 char ***argv;
359 MPI_Comm comm;
361 int fargc;
363 char **fargv;
364 int rank;
365 int world_size;
366};
367
372{
373 void (*_starpu_mpi_backend_init)(struct starpu_conf *conf);
374 void (*_starpu_mpi_backend_shutdown)(void);
375 int (*_starpu_mpi_backend_reserve_core)(void);
376 void (*_starpu_mpi_backend_request_init)(struct _starpu_mpi_req *req);
377 void (*_starpu_mpi_backend_request_fill)(struct _starpu_mpi_req *req, int is_internal_req);
378 void (*_starpu_mpi_backend_request_destroy)(struct _starpu_mpi_req *req);
379 void (*_starpu_mpi_backend_data_clear)(starpu_data_handle_t data_handle);
380 void (*_starpu_mpi_backend_data_register)(starpu_data_handle_t data_handle, starpu_mpi_tag_t data_tag);
381 void (*_starpu_mpi_backend_comm_register)(MPI_Comm comm);
382
383 int (*_starpu_mpi_backend_progress_init)(struct _starpu_mpi_argc_argv *argc_argv);
384 void (*_starpu_mpi_backend_progress_shutdown)(void **value);
385#ifdef STARPU_SIMGRID
386 void (*_starpu_mpi_backend_wait_for_initialization)();
387#endif
388
389 int (*_starpu_mpi_backend_barrier)(MPI_Comm comm);
390 int (*_starpu_mpi_backend_wait_for_all)(MPI_Comm comm);
391 int (*_starpu_mpi_backend_wait)(starpu_mpi_req *public_req, MPI_Status *status);
392 int (*_starpu_mpi_backend_test)(starpu_mpi_req *public_req, int *flag, MPI_Status *status);
393
394 void (*_starpu_mpi_backend_isend_size_func)(struct _starpu_mpi_req *req);
395 void (*_starpu_mpi_backend_irecv_size_func)(struct _starpu_mpi_req *req);
396};
397
398extern struct _starpu_mpi_backend _mpi_backend;
399#ifdef __cplusplus
400}
401#endif
402
403#endif // __STARPU_MPI_PRIVATE_H__
#define struct
Definition: list.h:175
Definition: starpu_mpi_mpi_backend.h:59
void _starpu_mpi_coop_send(starpu_data_handle_t data_handle, struct _starpu_mpi_req *req, enum starpu_data_access_mode mode, int sequential_consistency)
void _starpu_mpi_tags_init(void)
struct _starpu_spinlock coop_lock
Definition: starpu_mpi_private.h:230
char * redux_map
Definition: starpu_mpi_private.h:226
struct _starpu_mpi_coop_sends * coop_sends
Definition: starpu_mpi_private.h:232
char ** fargv
Definition: starpu_mpi_private.h:363
int fargc
Definition: starpu_mpi_private.h:361
unsigned nb_future_sends
Definition: starpu_mpi_private.h:235
void _starpu_mpi_submit_ready_request(void *arg)
void _starpu_mpi_release_req_data(struct _starpu_mpi_req *req)
void _starpu_mpi_submit_coop_sends(struct _starpu_mpi_coop_sends *coop_sends, int submit_control, int submit_data)
Definition: starpu_mpi_private.h:355
Definition: starpu_mpi_private.h:196
Definition: starpu_mpi_private.h:215
Definition: starpu_mpi_private.h:182
Definition: starpu_mpi_private.h:188
Definition: starpu_mpi_private.h:42
Definition: starpu_spinlock.h:82
Definition: starpu_mpi_private.h:372
Definition: starpu_mpi_private.h:242
enum _starpu_mpi_request_type request_type
Definition: starpu_mpi_private.h:275
MPI_Datatype datatype
Definition: starpu_mpi_private.h:250
int detached
Definition: starpu_mpi_private.h:282
size_t reserved_size
Definition: starpu_mpi_private.h:270
starpu_data_handle_t data_handle
Definition: starpu_mpi_private.h:244