(git:374b731)
Loading...
Searching...
No Matches
swarm_master.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Master's routines for the swarm-framework
10!> \author Ole Schuett
11! **************************************************************************************************
30 USE kinds, ONLY: default_path_length,&
40#include "../base/base_uses.f90"
41
42 IMPLICIT NONE
43 PRIVATE
44
45 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'swarm_master'
46
47 PUBLIC :: swarm_master_type
49 PUBLIC :: swarm_master_steer
50
51 TYPE swarm_message_p_type
52 TYPE(swarm_message_type), POINTER :: p => null()
53 END TYPE swarm_message_p_type
54
56 PRIVATE
57 INTEGER :: behavior = -1
58 TYPE(glbopt_master_type), POINTER :: glbopt => null()
59 !possibly more behaviors ...
60 INTEGER :: iw = 0
61 INTEGER :: i_iteration = 0
62 INTEGER :: max_iter = 0
63 LOGICAL :: should_stop = .false.
64 INTEGER :: n_workers = -1
65 INTEGER :: comlog_unit = -1
66 TYPE(section_vals_type), POINTER :: swarm_section => null()
67 TYPE(mp_para_env_type), POINTER :: para_env => null()
68 TYPE(swarm_message_p_type), DIMENSION(:), POINTER :: queued_commands => null()
69 TYPE(global_environment_type), POINTER :: globenv => null()
70 LOGICAL :: ignore_last_iteration = .false.
71 INTEGER :: n_waiting = 0
72 END TYPE swarm_master_type
73
74CONTAINS
75
76! **************************************************************************************************
77!> \brief Initializes the swarm master
78!> \param master ...
79!> \param para_env ...
80!> \param globenv ...
81!> \param root_section ...
82!> \param n_workers ...
83!> \author Ole Schuett
84! **************************************************************************************************
85 SUBROUTINE swarm_master_init(master, para_env, globenv, root_section, n_workers)
86 TYPE(swarm_master_type) :: master
87 TYPE(mp_para_env_type), POINTER :: para_env
88 TYPE(global_environment_type), POINTER :: globenv
89 TYPE(section_vals_type), POINTER :: root_section
90 INTEGER, INTENT(IN) :: n_workers
91
92 TYPE(cp_logger_type), POINTER :: logger
93
94 master%swarm_section => section_vals_get_subs_vals(root_section, "SWARM")
95
96 logger => cp_get_default_logger()
97 master%n_workers = n_workers
98 master%para_env => para_env
99 master%globenv => globenv
100 ALLOCATE (master%queued_commands(master%n_workers))
101 master%iw = cp_print_key_unit_nr(logger, master%swarm_section, &
102 "PRINT%MASTER_RUN_INFO", extension=".masterLog")
103
104 CALL section_vals_val_get(master%swarm_section, "BEHAVIOR", i_val=master%behavior)
105
106 ! uses logger%iter_info%project_name to construct filename
107 master%comlog_unit = cp_print_key_unit_nr(logger, master%swarm_section, "PRINT%COMMUNICATION_LOG", &
108 !middle_name="comlog", extension=".xyz", &
109 extension=".comlog", &
110 file_action="WRITE", file_position="REWIND")
111
112 CALL section_vals_val_get(master%swarm_section, "MAX_ITER", i_val=master%max_iter)
113
114 SELECT CASE (master%behavior)
115 CASE (swarm_do_glbopt)
116 ALLOCATE (master%glbopt)
117 CALL glbopt_master_init(master%glbopt, para_env, root_section, n_workers, master%iw)
118 CASE DEFAULT
119 cpabort("got unknown behavior")
120 END SELECT
121
122 CALL replay_comlog(master)
123 END SUBROUTINE swarm_master_init
124
125! **************************************************************************************************
126!> \brief Helper routine for swarm_master_init, restarts a calculation
127!> \param master ...
128!> \author Ole Schuett
129! **************************************************************************************************
130 SUBROUTINE replay_comlog(master)
131 TYPE(swarm_master_type) :: master
132
133 CHARACTER(LEN=default_path_length) :: filename
134 CHARACTER(LEN=default_string_length) :: command_log
135 INTEGER :: handle, i, worker_id
136 LOGICAL :: at_end, explicit
137 TYPE(cp_parser_type) :: parser
138 TYPE(swarm_message_type) :: cmd_log, report_log
139 TYPE(swarm_message_type), &
140 DIMENSION(master%n_workers) :: last_commands
141 TYPE(swarm_message_type), POINTER :: cmd_now
142
143 ! Initialize parser for trajectory
144 CALL section_vals_val_get(master%swarm_section, "REPLAY_COMMUNICATION_LOG", &
145 c_val=filename, explicit=explicit)
146
147 IF (.NOT. explicit) RETURN
148 IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
149 " SWARM| Starting replay of communication-log: ", trim(filename)
150
151 CALL timeset("swarm_master_replay_comlog", handle)
152 CALL parser_create(parser, filename, para_env=master%para_env)
153
154 at_end = .false.
155 DO
156 CALL swarm_message_file_read(report_log, parser, at_end)
157 IF (at_end) EXIT
158
159 CALL swarm_message_file_read(cmd_log, parser, at_end)
160 IF (at_end) EXIT
161
162 ALLOCATE (cmd_now)
163 CALL swarm_master_steer(master, report_log, cmd_now)
164
165 !TODO: maybe we should just exit the loop instead of stopping?
166 CALL swarm_message_get(cmd_log, "command", command_log)
167 IF (trim(command_log) /= "shutdown") THEN
168 IF (.NOT. commands_equal(cmd_now, cmd_log, master%iw)) cpabort("wrong behaviour")
169 END IF
170
171 CALL swarm_message_free(cmd_log)
172 CALL swarm_message_free(report_log)
173 CALL swarm_message_get(cmd_now, "worker_id", worker_id)
174 CALL swarm_message_free(last_commands(worker_id))
175 last_commands(worker_id) = cmd_now
176 DEALLOCATE (cmd_now)
177 END DO
178
179 CALL swarm_message_free(report_log) !don't worry about double-frees
180 CALL swarm_message_free(cmd_log)
181
182 IF (master%iw > 0) WRITE (master%iw, '(A,A)') &
183 " SWARM| Reached end of communication log. Queueing last commands."
184
185 DO i = 1, master%n_workers
186 ALLOCATE (master%queued_commands(i)%p)
187 master%queued_commands(i)%p = last_commands(i)
188 END DO
189
190 CALL parser_release(parser)
191 CALL timestop(handle)
192 END SUBROUTINE replay_comlog
193
194! **************************************************************************************************
195!> \brief Helper routine for replay_comlog, compares two commands
196!> \param cmd1 ...
197!> \param cmd2 ...
198!> \param iw ...
199!> \return ...
200!> \author Ole Schuett
201! **************************************************************************************************
202 FUNCTION commands_equal(cmd1, cmd2, iw) RESULT(res)
203 TYPE(swarm_message_type) :: cmd1, cmd2
204 INTEGER :: iw
205 LOGICAL :: res
206
207 res = swarm_message_equal(cmd1, cmd2)
208 IF (.NOT. res .AND. iw > 0) THEN
209 WRITE (iw, *) "Command 1:"
210 CALL swarm_message_file_write(cmd1, iw)
211 WRITE (iw, *) "Command 2:"
212 CALL swarm_message_file_write(cmd2, iw)
213 END IF
214 END FUNCTION commands_equal
215
216! **************************************************************************************************
217!> \brief Central steering routine of the swarm master
218!> \param master ...
219!> \param report ...
220!> \param cmd ...
221!> \author Ole Schuett
222! **************************************************************************************************
223 SUBROUTINE swarm_master_steer(master, report, cmd)
224 TYPE(swarm_master_type), INTENT(INOUT) :: master
225 TYPE(swarm_message_type), INTENT(IN) :: report
226 TYPE(swarm_message_type), INTENT(OUT) :: cmd
227
228 CHARACTER(len=default_string_length) :: command, status
229 INTEGER :: handle, worker_id
230 LOGICAL :: should_stop
231
232 should_stop = .false.
233
234 CALL timeset("swarm_master_steer", handle)
235
236 ! First check if there are queued commands for this worker
237 CALL swarm_message_get(report, "worker_id", worker_id)
238
239 IF (ASSOCIATED(master%queued_commands(worker_id)%p)) THEN
240 cmd = master%queued_commands(worker_id)%p
241 DEALLOCATE (master%queued_commands(worker_id)%p)
242 IF (master%iw > 0) WRITE (master%iw, '(A,A,A,I9,1X,A)') ' SWARM| ', &
243 repeat("*", 9), " Sending out queued command to worker: ", &
244 worker_id, repeat("*", 9)
245 CALL timestop(handle)
246 RETURN
247 END IF
248
249 IF (.NOT. master%ignore_last_iteration) THEN
250 ! There are no queued commands. Do the normal processing.
251 master%i_iteration = master%i_iteration + 1
252
253 IF (master%iw > 0) WRITE (master%iw, '(A,A,1X,I8,A,A)') ' SWARM| ', repeat("*", 15), &
254 master%i_iteration, ' Master / Worker Communication ', repeat("*", 15)
255 END IF
256
257 IF (master%i_iteration >= master%max_iter .AND. .NOT. master%should_stop) THEN
258 IF (master%iw > 0) WRITE (master%iw, '(A)') " SWARM| Reached MAX_ITER. Quitting."
259 master%should_stop = .true.
260 END IF
261
262 IF (.NOT. master%should_stop) THEN
263 CALL external_control(master%should_stop, "SWARM", master%globenv)
264 IF (master%should_stop .AND. master%iw > 0) &
265 WRITE (master%iw, *) " SWARM| Received stop from external_control. Quitting."
266 END IF
267
268 !IF(unit > 0) &
269
270 IF (master%should_stop) THEN
271 CALL swarm_message_add(cmd, "command", "shutdown")
272 IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
273 "SWARM| Sending shutdown command to worker", worker_id
274 ELSE
275 SELECT CASE (master%behavior)
276 CASE (swarm_do_glbopt)
277 CALL glbopt_master_steer(master%glbopt, report, cmd, should_stop)
278 CASE DEFAULT
279 cpabort("got unknown behavior")
280 END SELECT
281
282 IF (should_stop) THEN
283 CALL swarm_message_free(cmd)
284 CALL swarm_message_add(cmd, "command", "shutdown") !overwrite command
285 IF (master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
286 "SWARM| Sending shutdown command to worker", worker_id
287 master%should_stop = .true.
288 END IF
289 END IF
290
291 CALL swarm_message_add(cmd, "worker_id", worker_id)
292
293 ! Don't pollute comlog with "continue waiting"-commands.
294 CALL swarm_message_get(report, "status", status)
295 CALL swarm_message_get(cmd, "command", command)
296 IF (trim(status) == "wait_done") master%n_waiting = master%n_waiting - 1
297 IF (trim(command) == "wait") master%n_waiting = master%n_waiting + 1
298 IF (master%n_waiting < 0) cpabort("master%n_waiting < 0")
299 IF (trim(status) /= "wait_done" .OR. trim(command) /= "wait") THEN
300 CALL swarm_message_file_write(report, master%comlog_unit)
301 CALL swarm_message_file_write(cmd, master%comlog_unit)
302 IF (master%n_waiting > 0 .AND. master%iw > 0) WRITE (master%iw, '(1X,A,T71,I10)') &
303 "SWARM| Number of waiting workers:", master%n_waiting
304 master%ignore_last_iteration = .false.
305 ELSE
306 master%ignore_last_iteration = .true.
307 END IF
308 CALL timestop(handle)
309 END SUBROUTINE swarm_master_steer
310
311! **************************************************************************************************
312!> \brief Finalizes the swarm master
313!> \param master ...
314!> \author Ole Schuett
315! **************************************************************************************************
316 SUBROUTINE swarm_master_finalize(master)
317 TYPE(swarm_master_type) :: master
318
319 TYPE(cp_logger_type), POINTER :: logger
320
321 IF (master%iw > 0) THEN
322 WRITE (master%iw, "(1X,A,T71,I10)") "SWARM| Total number of iterations ", master%i_iteration
323 WRITE (master%iw, "(A)") " SWARM| Shutting down the master."
324 END IF
325
326 SELECT CASE (master%behavior)
327 CASE (swarm_do_glbopt)
328 CALL glbopt_master_finalize(master%glbopt)
329 DEALLOCATE (master%glbopt)
330 CASE DEFAULT
331 cpabort("got unknown behavior")
332 END SELECT
333
334 DEALLOCATE (master%queued_commands)
335
336 logger => cp_get_default_logger()
337 CALL cp_print_key_finished_output(master%iw, logger, &
338 master%swarm_section, "PRINT%MASTER_RUN_INFO")
339 CALL cp_print_key_finished_output(master%comlog_unit, logger, &
340 master%swarm_section, "PRINT%COMMUNICATION_LOG")
341
342 !CALL rm_timer_env() !pops the top-most timer
343 END SUBROUTINE swarm_master_finalize
344
345END MODULE swarm_master
346
Adds an entry from a swarm-message.
Returns an entry from a swarm-message.
Routines to handle the external control of CP2K.
subroutine, public external_control(should_stop, flag, globenv, target_time, start_time, force_check)
External manipulations during a run : when the <PROJECT_NAME>.EXIT_$runtype command is sent the progr...
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
Utility routines to read data from files. Kept as close as possible to the old parser because.
subroutine, public parser_release(parser)
releases the parser
subroutine, public parser_create(parser, file_name, unit_nr, para_env, end_section_label, separator_chars, comment_char, continuation_char, quote_char, section_char, parse_white_lines, initial_variables, apply_preprocessing)
Start a parser run. Initial variables allow to @SET stuff before opening the file.
Master's routines for global optimization.
subroutine, public glbopt_master_steer(this, report, cmd, should_stop)
Central steering routine of global optimizer.
subroutine, public glbopt_master_init(this, para_env, root_section, n_walkers, iw)
Initializes the master of the global optimizer.
subroutine, public glbopt_master_finalize(this)
Finalized the master of the global optimizer.
Define type storing the global information of a run. Keep the amount of stored data small....
collects all constants needed in input so that they can be used without circular dependencies
integer, parameter, public swarm_do_glbopt
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public default_string_length
Definition kinds.F:57
integer, parameter, public default_path_length
Definition kinds.F:58
Interface to the message passing library MPI.
Master's routines for the swarm-framework.
subroutine, public swarm_master_finalize(master)
Finalizes the swarm master.
subroutine, public swarm_master_steer(master, report, cmd)
Central steering routine of the swarm master.
subroutine, public swarm_master_init(master, para_env, globenv, root_section, n_workers)
Initializes the swarm master.
Swarm-message, a convenient data-container for with build-in serialization.
subroutine, public swarm_message_file_write(msg, unit)
Write a swarm-message to a given file / unit.
logical function, public swarm_message_equal(msg1, msg2)
Checks if two swarm-messages are equal.
subroutine, public swarm_message_free(msg)
Deallocates all entries contained in a swarm-message.
subroutine, public swarm_message_file_read(msg, parser, at_end)
Reads a swarm-message from a given file / unit.
type of a logger, at the moment it contains just a print level starting at which level it should be l...
contains the initially parsed file and the initial parallel environment
stores all the informations relevant to an mpi environment