(git:b279b6b)
swarm_master.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Master's routines for the swarm-framework
10 !> \author Ole Schuett
11 ! **************************************************************************************************
15  cp_logger_type
18  USE cp_parser_types, ONLY: cp_parser_type,&
24  glbopt_master_type
25  USE global_types, ONLY: global_environment_type
28  section_vals_type,&
30  USE kinds, ONLY: default_path_length,&
32  USE message_passing, ONLY: mp_para_env_type
33  USE swarm_message, ONLY: swarm_message_add,&
38  swarm_message_get,&
39  swarm_message_type
40 #include "../base/base_uses.f90"
41 
42  IMPLICIT NONE
43  PRIVATE
44 
45  CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_master'
46 
47  PUBLIC :: swarm_master_type
49  PUBLIC :: swarm_master_steer
50 
51  TYPE swarm_message_p_type
52  TYPE(swarm_message_type), POINTER :: p => null()
53  END TYPE swarm_message_p_type
54 
55  TYPE swarm_master_type
56  PRIVATE
57  INTEGER :: behavior = -1
58  TYPE(glbopt_master_type), POINTER :: glbopt => null()
59  !possibly more behaviors ...
60  INTEGER :: iw = 0
61  INTEGER :: i_iteration = 0
62  INTEGER :: max_iter = 0
63  LOGICAL :: should_stop = .false.
64  INTEGER :: n_workers = -1
65  INTEGER :: comlog_unit = -1
66  TYPE(section_vals_type), POINTER :: swarm_section => null()
67  TYPE(mp_para_env_type), POINTER :: para_env => null()
68  TYPE(swarm_message_p_type), DIMENSION(:), POINTER :: queued_commands => null()
69  TYPE(global_environment_type), POINTER :: globenv => null()
70  LOGICAL :: ignore_last_iteration = .false.
71  INTEGER :: n_waiting = 0
72  END TYPE swarm_master_type
73 
74 CONTAINS
75 
76 ! **************************************************************************************************
77 !> \brief Initializes the swarm master
78 !> \param master ...
79 !> \param para_env ...
80 !> \param globenv ...
81 !> \param root_section ...
82 !> \param n_workers ...
83 !> \author Ole Schuett
84 ! **************************************************************************************************
85  SUBROUTINE swarm_master_init(master, para_env, globenv, root_section, n_workers)
86  TYPE(swarm_master_type) :: master
87  TYPE(mp_para_env_type), POINTER :: para_env
88  TYPE(global_environment_type), POINTER :: globenv
89  TYPE(section_vals_type), POINTER :: root_section
90  INTEGER, INTENT(IN) :: n_workers
91 
92  TYPE(cp_logger_type), POINTER :: logger
93 
94  master%swarm_section => section_vals_get_subs_vals(root_section, "SWARM")
95 
96  logger => cp_get_default_logger()
97  master%n_workers = n_workers
98  master%para_env => para_env
99  master%globenv => globenv
100  ALLOCATE (master%queued_commands(master%n_workers))
101  master%iw = cp_print_key_unit_nr(logger, master%swarm_section, &
102  "PRINT%MASTER_RUN_INFO", extension=".masterLog")
103 
104  CALL section_vals_val_get(master%swarm_section, "BEHAVIOR", i_val=master%behavior)
105 
106  ! uses logger%iter_info%project_name to construct filename
107  master%comlog_unit = cp_print_key_unit_nr(logger, master%swarm_section, "PRINT%COMMUNICATION_LOG", &
108  !middle_name="comlog", extension=".xyz", &
109  extension=".comlog", &
110  file_action="WRITE", file_position="REWIND")
111 
112  CALL section_vals_val_get(master%swarm_section, "MAX_ITER", i_val=master%max_iter)
113 
114  SELECT CASE (master%behavior)
115  CASE (swarm_do_glbopt)
116  ALLOCATE (master%glbopt)
117  CALL glbopt_master_init(master%glbopt, para_env, root_section, n_workers, master%iw)
118  CASE DEFAULT
119  cpabort("got unknown behavior")
120  END SELECT
121 
122  CALL replay_comlog(master)
123  END SUBROUTINE swarm_master_init
124 
125 ! **************************************************************************************************
126 !> \brief Helper routine for swarm_master_init, restarts a calculation
127 !> \param master ...
128 !> \author Ole Schuett
129 ! **************************************************************************************************
130  SUBROUTINE replay_comlog(master)
131  TYPE(swarm_master_type) :: master
132 
133  CHARACTER(LEN=default_path_length) :: filename
134  CHARACTER(LEN=default_string_length) :: command_log
135  INTEGER :: handle, i, worker_id
136  LOGICAL :: at_end, explicit
137  TYPE(cp_parser_type) :: parser
138  TYPE(swarm_message_type) :: cmd_log, report_log
139  TYPE(swarm_message_type), &
140  DIMENSION(master%n_workers) :: last_commands
141  TYPE(swarm_message_type), POINTER :: cmd_now
142 
143  ! Initialize parser for trajectory
144  CALL section_vals_val_get(master%swarm_section, "REPLAY_COMMUNICATION_LOG", &
145  c_val=filename, explicit=explicit)
146 
147  IF (.NOT. explicit) RETURN
148  IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
149  " SWARM| Starting replay of communication-log: ", trim(filename)
150 
151  CALL timeset("swarm_master_replay_comlog", handle)
152  CALL parser_create(parser, filename, para_env=master%para_env)
153 
154  at_end = .false.
155  DO
156  CALL swarm_message_file_read(report_log, parser, at_end)
157  IF (at_end) EXIT
158 
159  CALL swarm_message_file_read(cmd_log, parser, at_end)
160  IF (at_end) EXIT
161 
162  ALLOCATE (cmd_now)
163  CALL swarm_master_steer(master, report_log, cmd_now)
164 
165  !TODO: maybe we should just exit the loop instead of stopping?
166  CALL swarm_message_get(cmd_log, "command", command_log)
167  IF (trim(command_log) /= "shutdown") THEN
168  IF (.NOT. commands_equal(cmd_now, cmd_log, master%iw)) cpabort("wrong behaviour")
169  END IF
170 
171  CALL swarm_message_free(cmd_log)
172  CALL swarm_message_free(report_log)
173  CALL swarm_message_get(cmd_now, "worker_id", worker_id)
174  CALL swarm_message_free(last_commands(worker_id))
175  last_commands(worker_id) = cmd_now
176  DEALLOCATE (cmd_now)
177  END DO
178 
179  CALL swarm_message_free(report_log) !don't worry about double-frees
180  CALL swarm_message_free(cmd_log)
181 
182  IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
183  " SWARM| Reached end of communication log. Queueing last commands."
184 
185  DO i = 1, master%n_workers
186  ALLOCATE (master%queued_commands(i)%p)
187  master%queued_commands(i)%p = last_commands(i)
188  END DO
189 
190  CALL parser_release(parser)
191  CALL timestop(handle)
192  END SUBROUTINE replay_comlog
193 
194 ! **************************************************************************************************
195 !> \brief Helper routine for replay_comlog, compares two commands
196 !> \param cmd1 ...
197 !> \param cmd2 ...
198 !> \param iw ...
199 !> \return ...
200 !> \author Ole Schuett
201 ! **************************************************************************************************
202  FUNCTION commands_equal(cmd1, cmd2, iw) RESULT(res)
203  TYPE(swarm_message_type) :: cmd1, cmd2
204  INTEGER :: iw
205  LOGICAL :: res
206 
207  res = swarm_message_equal(cmd1, cmd2)
208  IF (.NOT. res .AND. iw > 0) THEN
209  WRITE (iw, *) "Command 1:"
210  CALL swarm_message_file_write(cmd1, iw)
211  WRITE (iw, *) "Command 2:"
212  CALL swarm_message_file_write(cmd2, iw)
213  END IF
214  END FUNCTION commands_equal
215 
216 ! **************************************************************************************************
217 !> \brief Central steering routine of the swarm master
218 !> \param master ...
219 !> \param report ...
220 !> \param cmd ...
221 !> \author Ole Schuett
222 ! **************************************************************************************************
223  SUBROUTINE swarm_master_steer(master, report, cmd)
224  TYPE(swarm_master_type), INTENT(INOUT) :: master
225  TYPE(swarm_message_type), INTENT(IN) :: report
226  TYPE(swarm_message_type), INTENT(OUT) :: cmd
227 
228  CHARACTER(len=default_string_length) :: command, status
229  INTEGER :: handle, worker_id
230  LOGICAL :: should_stop
231 
232  should_stop = .false.
233 
234  CALL timeset("swarm_master_steer", handle)
235 
236  ! First check if there are queued commands for this worker
237  CALL swarm_message_get(report, "worker_id", worker_id)
238 
239  IF (ASSOCIATED(master%queued_commands(worker_id)%p)) THEN
240  cmd = master%queued_commands(worker_id)%p
241  DEALLOCATE (master%queued_commands(worker_id)%p)
242  IF (master%iw > 0) WRITE (master%iw, '(A,A,A,I9,1X,A)') ' SWARM| ', &
243  repeat("*", 9), " Sending out queued command to worker: ", &
244  worker_id, repeat("*", 9)
245  CALL timestop(handle)
246  RETURN
247  END IF
248 
249  IF (.NOT. master%ignore_last_iteration) THEN
250  ! There are no queued commands. Do the normal processing.
251  master%i_iteration = master%i_iteration + 1
252 
253  IF (master%iw > 0) WRITE (master%iw, '(A,A,1X,I8,A,A)') ' SWARM| ', repeat("*", 15), &
254  master%i_iteration, ' Master / Worker Communication ', repeat("*", 15)
255  END IF
256 
257  IF (master%i_iteration >= master%max_iter .AND. .NOT. master%should_stop) THEN
258  IF (master%iw > 0) WRITE (master%iw, '(A)') " SWARM| Reached MAX_ITER. Quitting."
259  master%should_stop = .true.
260  END IF
261 
262  IF (.NOT. master%should_stop) THEN
263  CALL external_control(master%should_stop, "SWARM", master%globenv)
264  IF (master%should_stop .AND. master%iw > 0) &
265  WRITE (master%iw, *) " SWARM| Received stop from external_control. Quitting."
266  END IF
267 
268  !IF(unit > 0) &
269 
270  IF (master%should_stop) THEN
271  CALL swarm_message_add(cmd, "command", "shutdown")
272  IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
273  "SWARM| Sending shutdown command to worker", worker_id
274  ELSE
275  SELECT CASE (master%behavior)
276  CASE (swarm_do_glbopt)
277  CALL glbopt_master_steer(master%glbopt, report, cmd, should_stop)
278  CASE DEFAULT
279  cpabort("got unknown behavior")
280  END SELECT
281 
282  IF (should_stop) THEN
283  CALL swarm_message_free(cmd)
284  CALL swarm_message_add(cmd, "command", "shutdown") !overwrite command
285  IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
286  "SWARM| Sending shutdown command to worker", worker_id
287  master%should_stop = .true.
288  END IF
289  END IF
290 
291  CALL swarm_message_add(cmd, "worker_id", worker_id)
292 
293  ! Don't pollute comlog with "continue waiting"-commands.
294  CALL swarm_message_get(report, "status", status)
295  CALL swarm_message_get(cmd, "command", command)
296  IF (trim(status) == "wait_done") master%n_waiting = master%n_waiting - 1
297  IF (trim(command) == "wait") master%n_waiting = master%n_waiting + 1
298  IF (master%n_waiting < 0) cpabort("master%n_waiting < 0")
299  IF (trim(status) /= "wait_done" .OR. trim(command) /= "wait") THEN
300  CALL swarm_message_file_write(report, master%comlog_unit)
301  CALL swarm_message_file_write(cmd, master%comlog_unit)
302  IF (master%n_waiting > 0 .AND. master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
303  "SWARM| Number of waiting workers:", master%n_waiting
304  master%ignore_last_iteration = .false.
305  ELSE
306  master%ignore_last_iteration = .true.
307  END IF
308  CALL timestop(handle)
309  END SUBROUTINE swarm_master_steer
310 
311 ! **************************************************************************************************
312 !> \brief Finalizes the swarm master
313 !> \param master ...
314 !> \author Ole Schuett
315 ! **************************************************************************************************
316  SUBROUTINE swarm_master_finalize(master)
317  TYPE(swarm_master_type) :: master
318 
319  TYPE(cp_logger_type), POINTER :: logger
320 
321  IF (master%iw > 0) THEN
322  WRITE (master%iw, "(1X,A,T71,I10)") "SWARM| Total number of iterations ", master%i_iteration
323  WRITE (master%iw, "(A)") " SWARM| Shutting down the master."
324  END IF
325 
326  SELECT CASE (master%behavior)
327  CASE (swarm_do_glbopt)
328  CALL glbopt_master_finalize(master%glbopt)
329  DEALLOCATE (master%glbopt)
330  CASE DEFAULT
331  cpabort("got unknown behavior")
332  END SELECT
333 
334  DEALLOCATE (master%queued_commands)
335 
336  logger => cp_get_default_logger()
337  CALL cp_print_key_finished_output(master%iw, logger, &
338  master%swarm_section, "PRINT%MASTER_RUN_INFO")
339  CALL cp_print_key_finished_output(master%comlog_unit, logger, &
340  master%swarm_section, "PRINT%COMMUNICATION_LOG")
341 
342  !CALL rm_timer_env() !pops the top-most timer
343  END SUBROUTINE swarm_master_finalize
344 
345 END MODULE swarm_master
346 
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
Utility routines to read data from files. Kept as close as possible to the old parser because.
subroutine, public parser_release(parser)
releases the parser
subroutine, public parser_create(parser, file_name, unit_nr, para_env, end_section_label, separator_chars, comment_char, continuation_char, quote_char, section_char, parse_white_lines, initial_variables, apply_preprocessing)
Start a parser run. Initial variables allow to @SET stuff before opening the file.
Master's routines for global optimization.
Definition: glbopt_master.F:12
subroutine, public glbopt_master_steer(this, report, cmd, should_stop)
Central steering routine of global optimizer.
subroutine, public glbopt_master_init(this, para_env, root_section, n_walkers, iw)
Initializes the master of the global optimizer.
Definition: glbopt_master.F:97
subroutine, public glbopt_master_finalize(this)
Finalized the master of the global optimizer.
Define type storing the global information of a run. Keep the amount of stored data small....
Definition: global_types.F:21
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public swarm_do_glbopt
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public default_string_length
Definition: kinds.F:57
integer, parameter, public default_path_length
Definition: kinds.F:58
Interface to the message passing library MPI.
Master's routines for the swarm-framework.
Definition: swarm_master.F:12
subroutine, public swarm_master_finalize(master)
Finalizes the swarm master.
Definition: swarm_master.F:317
subroutine, public swarm_master_steer(master, report, cmd)
Central steering routine of the swarm master.
Definition: swarm_master.F:224
subroutine, public swarm_master_init(master, para_env, globenv, root_section, n_workers)
Initializes the swarm master.
Definition: swarm_master.F:86
Swarm-message, a convenient data-container for with build-in serialization.
Definition: swarm_message.F:12
subroutine, public swarm_message_file_write(msg, unit)
Write a swarm-message to a given file / unit.
logical function, public swarm_message_equal(msg1, msg2)
Checks if two swarm-messages are equal.
subroutine, public swarm_message_free(msg)
Deallocates all entries contained in a swarm-message.
subroutine, public swarm_message_file_read(msg, parser, at_end)
Reads a swarm-message from a given file / unit.