(git:0de0cc2)
farming_methods.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
10  USE cp_files, ONLY: get_unit_number
12  cp_logger_type
16  USE farming_types, ONLY: farming_env_type,&
18  job_finished,&
19  job_pending,&
23  section_vals_type,&
25  USE message_passing, ONLY: mp_para_env_type
26 #include "./base/base_uses.f90"
27 
28  IMPLICIT NONE
29  PRIVATE
31 
32  ! must be negative in order to avoid confusion with job numbers
33  INTEGER, PARAMETER, PUBLIC :: do_nothing = -1, &
34  do_wait = -2, &
35  do_deadlock = -3
36 
37  CHARACTER(len=*), PARAMETER, PRIVATE :: modulen = 'farming_methods'
38 
39 CONTAINS
40 
41 ! **************************************************************************************************
42 !> \brief ...
43 !> \param farming_env ...
44 !> \param start ...
45 !> \param END ...
46 !> \param current ...
47 !> \param todo ...
48 ! **************************************************************************************************
49  SUBROUTINE get_next_job(farming_env, start, END, current, todo)
50  TYPE(farming_env_type), POINTER :: farming_env
51  INTEGER, INTENT(IN) :: start, end
52  INTEGER, INTENT(INOUT) :: current
53  INTEGER, INTENT(OUT) :: todo
54 
55  INTEGER :: icheck, idep, itry, ndep
56  LOGICAL :: dep_ok
57 
58  IF (farming_env%cycle) THEN
59  IF (current < start) THEN
60  current = start
61  ELSE
62  current = current + 1
63  END IF
64  IF (current > END) then
65  todo = do_nothing
66  ELSE
67  todo = modulo(current - 1, farming_env%njobs) + 1
68  END IF
69  ELSE
70  ! find a pending job
71  itry = start
72  todo = do_nothing
73  DO itry = start, END
74  IF (farming_env%job(itry)%status == job_pending) THEN
75 
76  ! see if all dependencies are OK
77  ndep = SIZE(farming_env%job(itry)%dependencies)
78  dep_ok = .true.
79  dep: DO idep = 1, ndep
80  DO icheck = start, END
81  IF (farming_env%job(icheck)%status .NE. job_finished) THEN
82  IF (farming_env%job(icheck)%id == farming_env%job(itry)%dependencies(idep)) THEN
83  dep_ok = .false.
84  EXIT dep
85  END IF
86  END IF
87  END DO
88  END DO dep
89 
90  ! if there are pending jobs, the minion can not be told to stop
91  ! at least wait if there are unresolved dependencies
92  IF (dep_ok) THEN
93  todo = itry
94  EXIT
95  ELSE
96  todo = do_wait
97  END IF
98  END IF
99  END DO
100  ! If we have to wait, but there are no running jobs we are deadlocked
101  ! which we signal
102  IF (todo == do_wait) THEN
103  dep_ok = .false.
104  DO itry = start, END
105  IF (farming_env%job(itry)%status .EQ. job_running) dep_ok = .true.
106  END DO
107  IF (.NOT. dep_ok) todo = do_deadlock
108  END IF
109  END IF
110  END SUBROUTINE get_next_job
111 
112 ! **************************************************************************************************
113 !> \brief ...
114 !> \param farming_env ...
115 !> \param root_section ...
116 !> \param para_env ...
117 ! **************************************************************************************************
118  SUBROUTINE farming_parse_input(farming_env, root_section, para_env)
119  TYPE(farming_env_type), POINTER :: farming_env
120  TYPE(section_vals_type), POINTER :: root_section
121  TYPE(mp_para_env_type), POINTER :: para_env
122 
123  CHARACTER(LEN=3) :: text
124  INTEGER :: i, iunit, n_rep_val, num_minions, &
125  output_unit, stat
126  INTEGER, DIMENSION(:), POINTER :: dependencies, i_vals
127  LOGICAL :: explicit, has_dep
128  TYPE(cp_logger_type), POINTER :: logger
129  TYPE(section_vals_type), POINTER :: farming_section, jobs_section, print_key
130 
131  NULLIFY (farming_section, jobs_section, print_key, logger, dependencies, i_vals)
132  logger => cp_get_default_logger()
133  farming_env%group_size_wish_set = .false.
134  farming_env%ngroup_wish_set = .false.
135  farming_section => section_vals_get_subs_vals(root_section, "FARMING")
136 
137  IF (ASSOCIATED(farming_env%group_partition)) THEN
138  DEALLOCATE (farming_env%group_partition)
139  END IF
140 
141  ! The following input order is used
142  ! 1) GROUP_PARTITION
143  ! 2) NGROUP
144  ! 3) GROUP_SIZE (default 8)
145  CALL section_vals_val_get(farming_section, "GROUP_PARTITION", &
146  n_rep_val=n_rep_val)
147  IF (n_rep_val > 0) THEN
148  CALL section_vals_val_get(farming_section, "GROUP_PARTITION", &
149  i_vals=i_vals)
150  ALLOCATE (farming_env%group_partition(0:SIZE(i_vals) - 1))
151  farming_env%group_partition(:) = i_vals
152  farming_env%ngroup_wish_set = .true.
153  farming_env%ngroup_wish = SIZE(i_vals)
154  ELSE
155  CALL section_vals_val_get(farming_section, "NGROUP", &
156  n_rep_val=n_rep_val)
157  IF (n_rep_val > 0) THEN
158  CALL section_vals_val_get(farming_section, "NGROUP", &
159  i_val=farming_env%ngroup_wish)
160  farming_env%ngroup_wish_set = .true.
161  ELSE
162  CALL section_vals_val_get(farming_section, "GROUP_SIZE", &
163  i_val=farming_env%group_size_wish)
164  farming_env%group_size_wish_set = .true.
165  END IF
166  END IF
167  CALL section_vals_val_get(farming_section, "STRIDE", &
168  i_val=farming_env%stride)
169 
170  CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", &
171  explicit=explicit)
172  IF (explicit) THEN
173  CALL section_vals_val_get(farming_section, "RESTART_FILE_NAME", &
174  c_val=farming_env%restart_file_name)
175  ELSE
176  print_key => section_vals_get_subs_vals(farming_section, "RESTART")
177  farming_env%restart_file_name = cp_print_key_generate_filename(logger, print_key, extension=".restart", &
178  my_local=.false.)
179  END IF
180 
181  CALL section_vals_val_get(farming_section, "DO_RESTART", &
182  l_val=farming_env%restart)
183  CALL section_vals_val_get(farming_section, "MAX_JOBS_PER_GROUP", &
184  i_val=farming_env%max_steps)
185  CALL section_vals_val_get(farming_section, "CYCLE", &
186  l_val=farming_env%cycle)
187  CALL section_vals_val_get(farming_section, "WAIT_TIME", &
188  r_val=farming_env%wait_time)
189 
190  CALL section_vals_val_get(farming_section, "CAPTAIN_MINION", &
191  l_val=farming_env%captain_minion)
192 
193  jobs_section => section_vals_get_subs_vals(farming_section, "JOB")
194  CALL section_vals_get(jobs_section, n_repetition=farming_env%njobs)
195 
196  ALLOCATE (farming_env%Job(farming_env%njobs))
197  CALL init_job_type(farming_env%job)
198 
199  has_dep = .false.
200  DO i = 1, farming_env%njobs
201  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
202  keyword_name="DIRECTORY", c_val=farming_env%Job(i)%cwd)
203  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
204  keyword_name="INPUT_FILE_NAME", c_val=farming_env%Job(i)%input)
205  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
206  keyword_name="OUTPUT_FILE_NAME", c_val=farming_env%Job(i)%output)
207 
208  ! if job id is not specified the job id is the index
209  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
210  keyword_name="JOB_ID", n_rep_val=n_rep_val)
211  IF (n_rep_val == 0) THEN
212  farming_env%Job(i)%id = i
213  ELSE
214  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
215  keyword_name="JOB_ID", i_val=farming_env%Job(i)%id)
216  END IF
217 
218  ! get dependencies
219  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
220  keyword_name="DEPENDENCIES", n_rep_val=n_rep_val)
221  IF (n_rep_val == 0) THEN
222  ALLOCATE (farming_env%Job(i)%dependencies(0))
223  ELSE
224  CALL section_vals_val_get(jobs_section, i_rep_section=i, &
225  keyword_name="DEPENDENCIES", i_vals=dependencies)
226  ALLOCATE (farming_env%Job(i)%dependencies(SIZE(dependencies, 1)))
227  farming_env%Job(i)%dependencies = dependencies
228  IF (SIZE(dependencies, 1) .NE. 0) has_dep = .true.
229  END IF
230  END DO
231 
232  IF (has_dep) THEN
233  cpassert(farming_env%captain_minion)
234  cpassert(.NOT. farming_env%cycle)
235  END IF
236 
237  output_unit = cp_print_key_unit_nr(logger, farming_section, "PROGRAM_RUN_INFO", &
238  extension=".log")
239 
240  ! Captain/Minion not supported
241  IF (para_env%num_pe == 1) THEN
242  farming_env%captain_minion = .false.
243  WRITE (output_unit, fmt="(T2,A)") "FARMING| Captain-Minion setup not supported for serial runs"
244  END IF
245  IF (farming_env%captain_minion) THEN
246  num_minions = para_env%num_pe - 1
247  ELSE
248  num_minions = para_env%num_pe
249  END IF
250 
251  IF (output_unit > 0) THEN
252  WRITE (output_unit, fmt="(T2,A,T71,I10)") "FARMING| Number of jobs found", farming_env%njobs
253  IF (farming_env%ngroup_wish_set) THEN
254  WRITE (output_unit, fmt="(T2,A,T71,I10)") "FARMING| Ngroup wish:", farming_env%ngroup_wish
255  IF (ASSOCIATED(farming_env%group_partition)) THEN
256  WRITE (output_unit, fmt="(T2,A)", advance="NO") "FARMING| User partition:"
257  DO i = 0, SIZE(farming_env%group_partition) - 1
258  IF (modulo(i, 4) == 0) WRITE (output_unit, *)
259  WRITE (output_unit, fmt='(I4)', advance="NO") farming_env%group_partition(i)
260  END DO
261  WRITE (output_unit, *)
262  IF (sum(farming_env%group_partition) .NE. num_minions) THEN
263  WRITE (output_unit, fmt="(T2,A,T61,I10,T71,I10)") &
264  "FARMING| WARNING : group partition CPUs not equal to the available number (ignoring Captain) ", &
265  num_minions, sum(farming_env%group_partition)
266  WRITE (output_unit, fmt="(T2,A)") "FARMING| partition data ignored" ! any better idea ??
267  DEALLOCATE (farming_env%group_partition)
268  END IF
269  END IF
270  END IF
271  IF (farming_env%group_size_wish_set) THEN
272  WRITE (output_unit, fmt="(T2,A,T71,I10)") "FARMING| Group size wish:", &
273  farming_env%group_size_wish
274  END IF
275  WRITE (output_unit, fmt="(T2,A,T71,I10)") "FARMING| Max steps :", farming_env%max_steps
276  IF (farming_env%cycle) THEN
277  text = "YES"
278  ELSE
279  text = " NO"
280  END IF
281  WRITE (output_unit, fmt="(T2,A,T78,A3)") "FARMING| Cyclic jobs execution:", text
282  IF (farming_env%restart) THEN
283  text = "YES"
284  ELSE
285  text = " NO"
286  END IF
287  WRITE (output_unit, fmt="(T2,A,T78,A3)") "FARMING| Restarting farm:", text
288  farming_env%restart_n = 1
289  IF (farming_env%restart) THEN
290  iunit = get_unit_number()
291  OPEN (unit=iunit, file=farming_env%restart_file_name, iostat=stat)
292  IF (stat == 0) THEN
293  READ (unit=iunit, fmt=*, iostat=stat) farming_env%restart_n
294  IF (stat /= 0) THEN
295  WRITE (output_unit, "(T2,A)") &
296  "FARMING| ---- WARNING ---- failed to read from ("//trim(farming_env%restart_file_name)//") starting at 1"
297  ELSE
298  WRITE (output_unit, "(T2,A)") &
299  "FARMING| restarting from ("//trim(farming_env%restart_file_name)//")"
300  WRITE (output_unit, "(T2,A,T71,I10)") &
301  "FARMING| restarting at ", farming_env%restart_n
302  END IF
303  ELSE
304  WRITE (output_unit, "(T2,A)") &
305  "FARMING| ---- WARNING ---- failed to open ("//trim(farming_env%restart_file_name)//"), starting at 1"
306  END IF
307  CLOSE (iunit, iostat=stat)
308  END IF
309 
310  CALL cp_print_key_finished_output(output_unit, logger, farming_section, &
311  "PROGRAM_RUN_INFO")
312  END IF
313  CALL para_env%bcast(farming_env%restart_n)
314 
315  END SUBROUTINE
316 
317 END MODULE farming_methods
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
Definition: grid_common.h:117
Utility routines to open and close files. Tracking of preconnections.
Definition: cp_files.F:16
integer function, public get_unit_number(file_name)
Returns the first logical unit that is not preconnected.
Definition: cp_files.F:237
various routines to log and control the output. The idea is that decisions about where to log should ...
type(cp_logger_type) function, pointer, public cp_get_default_logger()
returns the default logger
routines to handle the output, The idea is to remove the decision of wheter to output and what to out...
integer function, public cp_print_key_unit_nr(logger, basis_section, print_key_path, extension, middle_name, local, log_filename, ignore_should_output, file_form, file_position, file_action, file_status, do_backup, on_file, is_new_file, mpi_io, fout)
...
character(len=default_path_length) function, public cp_print_key_generate_filename(logger, print_key, middle_name, extension, my_local)
Utility function that returns a unit number to write the print key. Might open a file with a unique f...
subroutine, public cp_print_key_finished_output(unit_nr, logger, basis_section, print_key_path, local, ignore_should_output, on_file, mpi_io)
should be called after you finish working with a unit obtained with cp_print_key_unit_nr,...
subroutine, public farming_parse_input(farming_env, root_section, para_env)
...
subroutine, public get_next_job(farming_env, start, END, current, todo)
...
integer, parameter, public do_nothing
integer, parameter, public do_wait
integer, parameter, public do_deadlock
elemental subroutine, public init_job_type(job)
provide a default initialization
Definition: farming_types.F:90
integer, parameter, public job_finished
Definition: farming_types.F:20
integer, parameter, public job_running
Definition: farming_types.F:20
integer, parameter, public job_pending
Definition: farming_types.F:20
objects that represent the structure of input sections and the data contained in an input section
recursive type(section_vals_type) function, pointer, public section_vals_get_subs_vals(section_vals, subsection_name, i_rep_section, can_return_null)
returns the values of the requested subsection
subroutine, public section_vals_get(section_vals, ref_count, n_repetition, n_subs_vals_rep, section, explicit)
returns various attributes about the section_vals
subroutine, public section_vals_val_get(section_vals, keyword_name, i_rep_section, i_rep_val, n_rep_val, val, l_val, i_val, r_val, c_val, l_vals, i_vals, r_vals, c_vals, explicit)
returns the requested value
Interface to the message passing library MPI.