(git:374b731)
Loading...
Searching...
No Matches
swarm_mpi.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Handles the MPI communication of the swarm framework.
10!> \author Ole Schuett
11! **************************************************************************************************
13 USE cp_files, ONLY: close_file,&
26 USE kinds, ONLY: default_path_length,&
38#include "../base/base_uses.f90"
39
40 IMPLICIT NONE
41 PRIVATE
42
43 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_mpi'
44
48
50 TYPE(mp_para_env_type), POINTER :: world => null()
51 TYPE(mp_para_env_type), POINTER :: worker => null()
52 TYPE(mp_para_env_type), POINTER :: master => null()
53 INTEGER, DIMENSION(:), ALLOCATABLE :: wid2group
54 CHARACTER(LEN=default_path_length) :: master_output_path = ""
55 END TYPE swarm_mpi_type
56
57CONTAINS
58
59! **************************************************************************************************
60!> \brief Initialize MPI communicators for a swarm run.
61!> \param swarm_mpi ...
62!> \param world_para_env ...
63!> \param root_section ...
64!> \param n_workers ...
65!> \param worker_id ...
66!> \param iw ...
67!> \author Ole Schuett
68! **************************************************************************************************
69 SUBROUTINE swarm_mpi_init(swarm_mpi, world_para_env, root_section, n_workers, worker_id, iw)
71 TYPE(mp_para_env_type), POINTER :: world_para_env
72 TYPE(section_vals_type), POINTER :: root_section
73 INTEGER, INTENT(IN) :: n_workers
74 INTEGER, INTENT(OUT) :: worker_id
75 INTEGER, INTENT(IN) :: iw
76
77 INTEGER :: n_groups_created, pe_per_worker, &
78 subgroup_rank, subgroup_size
79 TYPE(mp_comm_type) :: subgroup
80 LOGICAL :: im_the_master
81 INTEGER, DIMENSION(:), POINTER :: group_distribution_p
82 INTEGER, DIMENSION(0:world_para_env%num_pe-2), &
83 TARGET :: group_distribution
84
85! ====== Setup of MPI-Groups ======
86
87 worker_id = -1
88 swarm_mpi%world => world_para_env
89
90 IF (mod(swarm_mpi%world%num_pe - 1, n_workers) /= 0) THEN
91 cpabort("number of processors-1 is not divisible by n_workers.")
92 END IF
93 IF (swarm_mpi%world%num_pe < n_workers + 1) THEN
94 cpabort("There are not enough processes for n_workers + 1. Aborting.")
95 END IF
96
97 pe_per_worker = (swarm_mpi%world%num_pe - 1)/n_workers
98
99 IF (iw > 0) THEN
100 WRITE (iw, '(A,45X,I8)') " SWARM| Number of mpi ranks", swarm_mpi%world%num_pe
101 WRITE (iw, '(A,47X,I8)') " SWARM| Number of workers", n_workers
102 END IF
103
104 ! the last task becomes the master. Preseves node-alignment of other tasks.
105 im_the_master = (swarm_mpi%world%mepos == swarm_mpi%world%num_pe - 1)
106
107 ! First split split para_env into a master- and a workers-groups...
108 IF (im_the_master) THEN
109 ALLOCATE (swarm_mpi%master)
110 CALL swarm_mpi%master%from_split(swarm_mpi%world, 1)
111 subgroup_size = swarm_mpi%master%num_pe
112 subgroup_rank = swarm_mpi%master%mepos
113 IF (swarm_mpi%master%num_pe /= 1) cpabort("mp_comm_split_direct failed (master)")
114 ELSE
115 CALL subgroup%from_split(swarm_mpi%world, 2)
116 subgroup_size = subgroup%num_pe
117 subgroup_rank = subgroup%mepos
118 IF (subgroup_size /= swarm_mpi%world%num_pe - 1) cpabort("mp_comm_split_direct failed (worker)")
119 END IF
120
121 ALLOCATE (swarm_mpi%wid2group(n_workers))
122 swarm_mpi%wid2group = 0
123
124 IF (.NOT. im_the_master) THEN
125 ! ...then split workers-group into n_workers groups - one for each worker.
126 group_distribution_p => group_distribution
127 ALLOCATE (swarm_mpi%worker)
128 CALL swarm_mpi%worker%from_split(subgroup, n_groups_created, group_distribution_p, n_subgroups=n_workers)
129 worker_id = group_distribution(subgroup_rank) + 1 ! shall start by 1
130 IF (n_groups_created /= n_workers) cpabort("mp_comm_split failed.")
131 CALL subgroup%free()
132
133 !WRITE (*,*) "this is worker ", worker_id, swarm_mpi%worker%mepos, swarm_mpi%worker%num_pe
134
135 ! collect world-ranks of each worker groups rank-0 node
136 IF (swarm_mpi%worker%mepos == 0) &
137 swarm_mpi%wid2group(worker_id) = swarm_mpi%world%mepos
138
139 END IF
140
141 CALL swarm_mpi%world%sum(swarm_mpi%wid2group)
142 !WRITE (*,*), "wid2group table: ",swarm_mpi%wid2group
143
144 CALL logger_init_master(swarm_mpi)
145 CALL logger_init_worker(swarm_mpi, root_section, worker_id)
146 END SUBROUTINE swarm_mpi_init
147
148! **************************************************************************************************
149!> \brief Helper routine for swarm_mpi_init, configures the master's logger.
150!> \param swarm_mpi ...
151!> \author Ole Schuett
152! **************************************************************************************************
153 SUBROUTINE logger_init_master(swarm_mpi)
155
156 INTEGER :: output_unit
157 TYPE(cp_logger_type), POINTER :: logger
158
159! broadcast master_output_path to all ranks
160
161 IF (swarm_mpi%world%is_source()) THEN
162 logger => cp_get_default_logger()
163 output_unit = logger%default_local_unit_nr
164 swarm_mpi%master_output_path = output_unit2path(output_unit)
165 IF (output_unit /= default_output_unit) &
166 CLOSE (output_unit)
167 END IF
168
169 CALL swarm_mpi%world%bcast(swarm_mpi%master_output_path)
170
171 IF (ASSOCIATED(swarm_mpi%master)) &
172 CALL error_add_new_logger(swarm_mpi%master, swarm_mpi%master_output_path)
173 END SUBROUTINE logger_init_master
174
175! **************************************************************************************************
176!> \brief Helper routine for logger_init_master, inquires filename for given unit.
177!> \param output_unit ...
178!> \return ...
179!> \author Ole Schuett
180! **************************************************************************************************
181 FUNCTION output_unit2path(output_unit) RESULT(output_path)
182 INTEGER, INTENT(IN) :: output_unit
183 CHARACTER(LEN=default_path_length) :: output_path
184
185 output_path = "__STD_OUT__"
186 IF (output_unit /= default_output_unit) &
187 INQUIRE (unit=output_unit, name=output_path)
188 END FUNCTION output_unit2path
189
190! **************************************************************************************************
191!> \brief Helper routine for swarm_mpi_init, configures the workers's logger.
192!> \param swarm_mpi ...
193!> \param root_section ...
194!> \param worker_id ...
195!> \author Ole Schuett
196! **************************************************************************************************
197 SUBROUTINE logger_init_worker(swarm_mpi, root_section, worker_id)
199 TYPE(section_vals_type), POINTER :: root_section
200 INTEGER :: worker_id
201
202 CHARACTER(LEN=default_path_length) :: output_path
203 CHARACTER(len=default_string_length) :: new_project_name, project_name, &
204 worker_name
205 TYPE(cp_iteration_info_type), POINTER :: new_iter_info
206 TYPE(cp_logger_type), POINTER :: old_logger
207
208 NULLIFY (old_logger, new_iter_info)
209 IF (ASSOCIATED(swarm_mpi%worker)) THEN
210 old_logger => cp_get_default_logger()
211 project_name = old_logger%iter_info%project_name
212 IF (worker_id > 99999) THEN
213 cpabort("Did not expect so many workers.")
214 END IF
215 WRITE (worker_name, "(A,I5.5)") 'WORKER', worker_id
216 IF (len_trim(project_name) + 1 + len_trim(worker_name) > default_string_length) THEN
217 cpabort("project name too long")
218 END IF
219 output_path = trim(project_name)//"-"//trim(worker_name)//".out"
220 new_project_name = trim(project_name)//"-"//trim(worker_name)
221 CALL section_vals_val_set(root_section, "GLOBAL%PROJECT_NAME", c_val=new_project_name)
222 CALL cp_iteration_info_create(new_iter_info, new_project_name)
223 CALL error_add_new_logger(swarm_mpi%worker, output_path, new_iter_info)
224 CALL cp_iteration_info_release(new_iter_info)
225 END IF
226 END SUBROUTINE logger_init_worker
227
228! **************************************************************************************************
229!> \brief Helper routine for logger_init_master and logger_init_worker
230!> \param para_env ...
231!> \param output_path ...
232!> \param iter_info ...
233!> \author Ole Schuett
234! **************************************************************************************************
235 SUBROUTINE error_add_new_logger(para_env, output_path, iter_info)
236 TYPE(mp_para_env_type), POINTER :: para_env
237 CHARACTER(LEN=default_path_length) :: output_path
238 TYPE(cp_iteration_info_type), OPTIONAL, POINTER :: iter_info
239
240 INTEGER :: output_unit
241 TYPE(cp_logger_type), POINTER :: new_logger, old_logger
242
243 NULLIFY (new_logger, old_logger)
244 output_unit = -1
245 IF (para_env%is_source()) THEN
246 ! open output_unit according to output_path
247 output_unit = default_output_unit
248 IF (output_path /= "__STD_OUT__") &
249 CALL open_file(file_name=output_path, file_status="UNKNOWN", &
250 file_action="WRITE", file_position="APPEND", unit_number=output_unit)
251 END IF
252
253 old_logger => cp_get_default_logger()
254 CALL cp_logger_create(new_logger, para_env=para_env, &
255 default_global_unit_nr=output_unit, close_global_unit_on_dealloc=.false., &
256 template_logger=old_logger, iter_info=iter_info)
257
258 CALL cp_add_default_logger(new_logger)
259 CALL cp_logger_release(new_logger)
260 END SUBROUTINE error_add_new_logger
261
262! **************************************************************************************************
263!> \brief Finalizes the MPI communicators of a swarm run.
264!> \param swarm_mpi ...
265!> \param root_section ...
266!> \author Ole Schuett
267! **************************************************************************************************
268 SUBROUTINE swarm_mpi_finalize(swarm_mpi, root_section)
270 TYPE(section_vals_type), POINTER :: root_section
271
272 CALL swarm_mpi%world%sync()
273 CALL logger_finalize(swarm_mpi, root_section)
274
275 IF (ASSOCIATED(swarm_mpi%worker)) CALL mp_para_env_release(swarm_mpi%worker)
276 IF (ASSOCIATED(swarm_mpi%master)) CALL mp_para_env_release(swarm_mpi%master)
277 NULLIFY (swarm_mpi%worker, swarm_mpi%master)
278 DEALLOCATE (swarm_mpi%wid2group)
279 END SUBROUTINE swarm_mpi_finalize
280
281! **************************************************************************************************
282!> \brief Helper routine for swarm_mpi_finalize, restores the original loggers
283!> \param swarm_mpi ...
284!> \param root_section ...
285!> \author Ole Schuett
286! **************************************************************************************************
287 SUBROUTINE logger_finalize(swarm_mpi, root_section)
289 TYPE(section_vals_type), POINTER :: root_section
290
291 INTEGER :: output_unit
292 TYPE(cp_logger_type), POINTER :: logger, old_logger
293
294 NULLIFY (logger, old_logger)
295 logger => cp_get_default_logger()
296 output_unit = logger%default_local_unit_nr
297 IF (output_unit > 0 .AND. output_unit /= default_output_unit) &
298 CALL close_file(output_unit)
299
300 CALL cp_rm_default_logger() !pops the top-most logger
301 old_logger => cp_get_default_logger()
302
303 ! restore GLOBAL%PROJECT_NAME
304 CALL section_vals_val_set(root_section, "GLOBAL%PROJECT_NAME", &
305 c_val=old_logger%iter_info%project_name)
306
307 CALL swarm_mpi%world%sync()
308
309 ! do this only on master's rank 0
310 IF (swarm_mpi%world%is_source() .AND. output_unit /= default_output_unit) THEN
311 output_unit = old_logger%default_local_unit_nr
312 OPEN (unit=output_unit, file=swarm_mpi%master_output_path, &
313 status="UNKNOWN", action="WRITE", position="APPEND")
314 END IF
315 END SUBROUTINE logger_finalize
316
317! **************************************************************************************************
318!> \brief Sends a report via MPI
319!> \param swarm_mpi ...
320!> \param report ...
321!> \author Ole Schuett
322! **************************************************************************************************
323 SUBROUTINE swarm_mpi_send_report(swarm_mpi, report)
325 TYPE(swarm_message_type) :: report
326
327 INTEGER :: dest, tag
328
329! Only rank-0 of worker group sends its report
330
331 IF (swarm_mpi%worker%is_source()) THEN
332 dest = swarm_mpi%world%num_pe - 1
333 tag = 42
334 CALL swarm_message_mpi_send(report, group=swarm_mpi%world, dest=dest, tag=tag)
335 END IF
336
337 END SUBROUTINE swarm_mpi_send_report
338
339! **************************************************************************************************
340!> \brief Receives a report via MPI
341!> \param swarm_mpi ...
342!> \param report ...
343!> \author Ole Schuett
344! **************************************************************************************************
345 SUBROUTINE swarm_mpi_recv_report(swarm_mpi, report)
347 TYPE(swarm_message_type), INTENT(OUT) :: report
348
349 INTEGER :: src, tag
350
351 tag = 42
352 src = mp_any_source
353
354 CALL swarm_message_mpi_recv(report, group=swarm_mpi%world, src=src, tag=tag)
355
356 END SUBROUTINE swarm_mpi_recv_report
357
358! **************************************************************************************************
359!> \brief Sends a command via MPI
360!> \param swarm_mpi ...
361!> \param cmd ...
362!> \author Ole Schuett
363! **************************************************************************************************
364 SUBROUTINE swarm_mpi_send_command(swarm_mpi, cmd)
366 TYPE(swarm_message_type) :: cmd
367
368 INTEGER :: dest, tag, worker_id
369
370 CALL swarm_message_get(cmd, "worker_id", worker_id)
371 tag = 42
372 dest = swarm_mpi%wid2group(worker_id)
373
374 CALL swarm_message_mpi_send(cmd, group=swarm_mpi%world, dest=dest, tag=tag)
375
376 END SUBROUTINE swarm_mpi_send_command
377
378! **************************************************************************************************
379!> \brief Receives a command via MPI and broadcasts it within a worker.
380!> \param swarm_mpi ...
381!> \param cmd ...
382!> \author Ole Schuett
383! **************************************************************************************************
384 SUBROUTINE swarm_mpi_recv_command(swarm_mpi, cmd)
386 TYPE(swarm_message_type), INTENT(OUT) :: cmd
387
388 INTEGER :: src, tag
389
390! This is a two step communication schema.
391! First: The rank-0 of the worker groups receives the command from the master.
392
393 IF (swarm_mpi%worker%is_source()) THEN
394 src = swarm_mpi%world%num_pe - 1 !
395 tag = 42
396 CALL swarm_message_mpi_recv(cmd, group=swarm_mpi%world, src=src, tag=tag)
397
398 END IF
399
400! ! Second: The command is broadcasted within the worker group.
401 CALL swarm_message_mpi_bcast(cmd, src=swarm_mpi%worker%source, group=swarm_mpi%worker)
402
403 END SUBROUTINE swarm_mpi_recv_command
404
405END MODULE swarm_mpi
406
Returns an entry from a swarm-message.
Utility routines to open and close files. Tracking of preconnections.
Definition cp_files.F:16
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
Definition cp_files.F:308
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Definition cp_files.F:119
Collection of routines to handle the iteration info.
pure subroutine, public cp_iteration_info_create(iteration_info, project_name)
creates an output info object
subroutine, public cp_iteration_info_release(iteration_info)
releases the iteration_info (see doc/ReferenceCounting.html)
various routines to log and control the output. The idea is that decisions about where to log should ...
subroutine, public cp_rm_default_logger()
the cousin of cp_add_default_logger, decrements the stack, so that the default logger is what it has ...
subroutine, public cp_logger_release(logger)
releases this logger
subroutine, public cp_logger_create(logger, para_env, print_level, default_global_unit_nr, default_local_unit_nr, global_filename, local_filename, close_global_unit_on_dealloc, iter_info, close_local_unit_on_dealloc, suffix, template_logger)
initializes a logger
subroutine, public cp_add_default_logger(logger)
adds a default logger. MUST be called before logging occours
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
objects that represent the structure of input sections and the data contained in an input section
subroutine, public section_vals_val_set(section_vals, keyword_name, i_rep_section, i_rep_val, val, l_val, i_val, r_val, c_val, l_vals_ptr, i_vals_ptr, r_vals_ptr, c_vals_ptr)
sets the requested value
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public default_string_length
Definition kinds.F:57
integer, parameter, public default_path_length
Definition kinds.F:58
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
integer, parameter, public default_output_unit
Definition machine.F:45
Interface to the message passing library MPI.
subroutine, public mp_para_env_release(para_env)
releases the para object (to be called when you don't want anymore the shared copy of this object)
integer, parameter, public mp_any_source
Swarm-message, a convenient data-container for with build-in serialization.
subroutine, public swarm_message_mpi_send(msg, group, dest, tag)
Sends a swarm message via MPI.
subroutine, public swarm_message_mpi_bcast(msg, src, group)
Broadcasts a swarm message via MPI.
subroutine, public swarm_message_mpi_recv(msg, group, src, tag)
Receives a swarm message via MPI.
Handles the MPI communication of the swarm framework.
Definition swarm_mpi.F:12
subroutine, public swarm_mpi_init(swarm_mpi, world_para_env, root_section, n_workers, worker_id, iw)
Initialize MPI communicators for a swarm run.
Definition swarm_mpi.F:70
subroutine, public swarm_mpi_send_command(swarm_mpi, cmd)
Sends a command via MPI.
Definition swarm_mpi.F:365
subroutine, public swarm_mpi_recv_report(swarm_mpi, report)
Receives a report via MPI.
Definition swarm_mpi.F:346
subroutine, public swarm_mpi_recv_command(swarm_mpi, cmd)
Receives a command via MPI and broadcasts it within a worker.
Definition swarm_mpi.F:385
subroutine, public swarm_mpi_send_report(swarm_mpi, report)
Sends a report via MPI.
Definition swarm_mpi.F:324
subroutine, public swarm_mpi_finalize(swarm_mpi, root_section)
Finalizes the MPI communicators of a swarm run.
Definition swarm_mpi.F:269
contains the information about the current state of the program to be able to decide if output is nec...
type of a logger, at the moment it contains just a print level starting at which level it should be l...
stores all the informations relevant to an mpi environment