(git:b279b6b)
timings_report.F
Go to the documentation of this file.
1 !--------------------------------------------------------------------------------------------------!
2 ! CP2K: A general program to perform molecular dynamics simulations !
3 ! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4 ! !
5 ! SPDX-License-Identifier: GPL-2.0-or-later !
6 !--------------------------------------------------------------------------------------------------!
7 
8 ! **************************************************************************************************
9 !> \brief Timing routines for accounting
10 !> \par History
11 !> 02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
12 !> 11.2004 storable timer_envs (for f77 interface) [fawzi]
13 !> 10.2005 binary search to speed up lookup in timeset [fawzi]
14 !> 12.2012 Complete rewrite based on dictionaries. [ole]
15 !> 01.2014 Collect statistics from all MPI ranks. [ole]
16 !> \author JGH
17 ! **************************************************************************************************
19  USE callgraph, ONLY: callgraph_item_type,&
21  USE cp_files, ONLY: close_file,&
22  open_file
23  USE kinds, ONLY: default_string_length,&
24  dp,&
25  int_8
26  USE list, ONLY: list_destroy,&
27  list_get,&
28  list_init,&
29  list_isready,&
30  list_pop,&
31  list_push,&
32  list_size
33  USE list_routinereport, ONLY: list_routinereport_type
34  USE message_passing, ONLY: mp_para_env_type
35  USE routine_map, ONLY: routine_map_get,&
37  USE timings, ONLY: get_timer_env
38  USE timings_base_type, ONLY: call_stat_type,&
39  routine_report_type,&
40  routine_stat_type
41  USE timings_types, ONLY: timer_env_type
42  USE util, ONLY: sort
43 #include "../base/base_uses.f90"
44 
45  IMPLICIT NONE
46  PRIVATE
47 
48  INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
49 
51 
52 CONTAINS
53 
54 ! **************************************************************************************************
55 !> \brief Print accumulated information on timers
56 !> \param iw ...
57 !> \param r_timings ...
58 !> \param sort_by_self_time ...
59 !> \param cost_type ...
60 !> \param report_maxloc ...
61 !> \param para_env is needed to collect statistics from other nodes.
62 !> \par History
63 !> none
64 !> \author JGH
65 ! **************************************************************************************************
66  SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
67  INTEGER, INTENT(IN) :: iw
68  REAL(kind=dp), INTENT(IN) :: r_timings
69  LOGICAL, INTENT(IN) :: sort_by_self_time
70  INTEGER, INTENT(IN) :: cost_type
71  LOGICAL, INTENT(IN) :: report_maxloc
72  TYPE(mp_para_env_type), INTENT(IN) :: para_env
73 
74  TYPE(list_routinereport_type) :: reports
75  TYPE(routine_report_type), POINTER :: r_report
76 
77  CALL list_init(reports)
78  CALL collect_reports_from_ranks(reports, cost_type, para_env)
79 
80  IF (list_size(reports) > 0 .AND. iw > 0) &
81  CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
82 
83  ! deallocate reports
84  DO WHILE (list_size(reports) > 0)
85  r_report => list_pop(reports)
86  DEALLOCATE (r_report)
87  END DO
88  CALL list_destroy(reports)
89 
90  END SUBROUTINE timings_report_print
91 
92 ! **************************************************************************************************
93 !> \brief Collects the timing or energy reports from all MPI ranks.
94 !> \param reports ...
95 !> \param cost_type ...
96 !> \param para_env ...
97 !> \author Ole Schuett
98 ! **************************************************************************************************
99  SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
100  TYPE(list_routinereport_type), INTENT(INOUT) :: reports
101  INTEGER, INTENT(IN) :: cost_type
102  TYPE(mp_para_env_type), INTENT(IN) :: para_env
103 
104  CHARACTER(LEN=default_string_length) :: routinen
105  INTEGER :: local_routine_id, sending_rank
106  INTEGER, ALLOCATABLE, DIMENSION(:) :: collected
107  REAL(kind=dp) :: foobar
108  REAL(kind=dp), DIMENSION(2) :: dbuf
109  TYPE(routine_report_type), POINTER :: r_report
110  TYPE(routine_stat_type), POINTER :: r_stat
111  TYPE(timer_env_type), POINTER :: timer_env
112 
113  NULLIFY (r_stat, r_report, timer_env)
114  IF (.NOT. list_isready(reports)) &
115  cpabort("BUG")
116 
117  timer_env => get_timer_env()
118 
119  ! make sure all functions have been called so that list_size(timer_env%routine_stats)
120  ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
121  ! this hack makes sure they are called before
122  routinen = ""
123  CALL para_env%bcast(routinen, 0)
124  sending_rank = 0
125  CALL para_env%max(sending_rank)
126  CALL para_env%sum(sending_rank)
127  foobar = 0.0_dp
128  CALL para_env%max(foobar)
129  dbuf = 0.0_dp
130  CALL para_env%maxloc(dbuf)
131  CALL para_env%sum(foobar)
132  ! end hack
133 
134  ! Array collected is used as a bit field.
135  ! It's of type integer in order to use the convenient MINLOC routine.
136  ALLOCATE (collected(list_size(timer_env%routine_stats)))
137  collected(:) = 0
138 
139  DO
140  ! does any rank have uncollected stats?
141  sending_rank = -1
142  IF (.NOT. all(collected == 1)) sending_rank = para_env%mepos
143  CALL para_env%max(sending_rank)
144  IF (sending_rank < 0) EXIT ! every rank got all routines collected
145  IF (sending_rank == para_env%mepos) THEN
146  local_routine_id = minloc(collected, dim=1)
147  r_stat => list_get(timer_env%routine_stats, local_routine_id)
148  routinen = r_stat%routineN
149  END IF
150  CALL para_env%bcast(routinen, sending_rank)
151 
152  ! Create new report for routineN
153  ALLOCATE (r_report)
154  CALL list_push(reports, r_report)
155  r_report%routineN = routinen
156 
157  ! If routineN was called on local node, add local stats
158  IF (routine_map_haskey(timer_env%routine_names, routinen)) THEN
159  local_routine_id = routine_map_get(timer_env%routine_names, routinen)
160  collected(local_routine_id) = 1
161  r_stat => list_get(timer_env%routine_stats, local_routine_id)
162  r_report%max_total_calls = r_stat%total_calls
163  r_report%sum_total_calls = r_stat%total_calls
164  r_report%sum_stackdepth = r_stat%stackdepth_accu
165  SELECT CASE (cost_type)
166  CASE (cost_type_energy)
167  r_report%max_icost = r_stat%incl_energy_accu
168  r_report%sum_icost = r_stat%incl_energy_accu
169  r_report%max_ecost = r_stat%excl_energy_accu
170  r_report%sum_ecost = r_stat%excl_energy_accu
171  CASE (cost_type_time)
172  r_report%max_icost = r_stat%incl_walltime_accu
173  r_report%sum_icost = r_stat%incl_walltime_accu
174  r_report%max_ecost = r_stat%excl_walltime_accu
175  r_report%sum_ecost = r_stat%excl_walltime_accu
176  CASE DEFAULT
177  cpabort("BUG")
178  END SELECT
179  END IF
180 
181  ! collect stats of routineN via MPI
182  CALL para_env%max(r_report%max_total_calls)
183  CALL para_env%sum(r_report%sum_total_calls)
184  CALL para_env%sum(r_report%sum_stackdepth)
185 
186  ! get value and rank of the maximum inclusive cost
187  dbuf = (/r_report%max_icost, real(para_env%mepos, kind=dp)/)
188  CALL para_env%maxloc(dbuf)
189  r_report%max_icost = dbuf(1)
190  r_report%max_irank = int(dbuf(2))
191 
192  CALL para_env%sum(r_report%sum_icost)
193 
194  ! get value and rank of the maximum exclusive cost
195  dbuf = (/r_report%max_ecost, real(para_env%mepos, kind=dp)/)
196  CALL para_env%maxloc(dbuf)
197  r_report%max_ecost = dbuf(1)
198  r_report%max_erank = int(dbuf(2))
199 
200  CALL para_env%sum(r_report%sum_ecost)
201  END DO
202 
203  END SUBROUTINE collect_reports_from_ranks
204 
205 ! **************************************************************************************************
206 !> \brief Print the collected reports
207 !> \param reports ...
208 !> \param iw ...
209 !> \param threshold ...
210 !> \param sort_by_exclusiv_cost ...
211 !> \param cost_type ...
212 !> \param report_maxloc ...
213 !> \param para_env ...
214 !> \par History
215 !> 01.2014 Refactored (Ole Schuett)
216 !> \author JGH
217 ! **************************************************************************************************
218  SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
219  TYPE(list_routinereport_type), INTENT(IN) :: reports
220  INTEGER, INTENT(IN) :: iw
221  REAL(kind=dp), INTENT(IN) :: threshold
222  LOGICAL, INTENT(IN) :: sort_by_exclusiv_cost
223  INTEGER, INTENT(IN) :: cost_type
224  LOGICAL, INTENT(IN) :: report_maxloc
225  TYPE(mp_para_env_type), INTENT(IN) :: para_env
226 
227  CHARACTER(LEN=4) :: label
228  CHARACTER(LEN=default_string_length) :: fmt, title
229  INTEGER :: decimals, i, j, num_routines
230  INTEGER, ALLOCATABLE, DIMENSION(:) :: indices
231  REAL(kind=dp) :: asd, maxcost, mincost
232  REAL(kind=dp), ALLOCATABLE, DIMENSION(:) :: max_costs
233  TYPE(routine_report_type), POINTER :: r_report_i, r_report_j
234 
235  NULLIFY (r_report_i, r_report_j)
236  IF (.NOT. list_isready(reports)) &
237  cpabort("BUG")
238 
239  ! are we printing timing or energy ?
240  SELECT CASE (cost_type)
241  CASE (cost_type_energy)
242  title = "E N E R G Y"
243  label = "ENER"
244  CASE (cost_type_time)
245  title = "T I M I N G"
246  label = "TIME"
247  CASE DEFAULT
248  cpabort("BUG")
249  END SELECT
250 
251  ! write banner
252  WRITE (unit=iw, fmt="(/,T2,A)") repeat("-", 79)
253  WRITE (unit=iw, fmt="(T2,A,T80,A)") "-", "-"
254  WRITE (unit=iw, fmt="(T2,A,T35,A,T80,A)") "-", trim(title), "-"
255  WRITE (unit=iw, fmt="(T2,A,T80,A)") "-", "-"
256  WRITE (unit=iw, fmt="(T2,A)") repeat("-", 79)
257  IF (report_maxloc) THEN
258  WRITE (unit=iw, fmt="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
259  "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
260  ELSE
261  WRITE (unit=iw, fmt="(T2,A,T35,A,T41,A,T45,2A18)") &
262  "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
263  END IF
264 
265  WRITE (unit=iw, fmt="(T33,A)") &
266  "MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM"
267 
268  ! sort statistics
269  num_routines = list_size(reports)
270  ALLOCATE (max_costs(num_routines))
271  DO i = 1, num_routines
272  r_report_i => list_get(reports, i)
273  IF (sort_by_exclusiv_cost) THEN
274  max_costs(i) = r_report_i%max_ecost
275  ELSE
276  max_costs(i) = r_report_i%max_icost
277  END IF
278  END DO
279  ALLOCATE (indices(num_routines))
280  CALL sort(max_costs, num_routines, indices)
281 
282  maxcost = maxval(max_costs)
283  mincost = maxcost*threshold
284 
285  ! adjust fmt dynamically based on the max walltime.
286  ! few clocks have more than 3 digits resolution, so stop there
287  decimals = 3
288  IF (maxcost >= 10000) decimals = 2
289  IF (maxcost >= 100000) decimals = 1
290  IF (maxcost >= 1000000) decimals = 0
291  IF (report_maxloc) THEN
292  WRITE (unit=fmt, fmt="(A,I0,A)") &
293  "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
294  ELSE
295  WRITE (unit=fmt, fmt="(A,I0,A)") &
296  "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
297  END IF
298 
299  !write output
300  DO i = num_routines, 1, -1
301  IF (max_costs(i) >= mincost) THEN
302  j = indices(i)
303  r_report_j => list_get(reports, j)
304  ! average stack depth
305  asd = real(r_report_j%sum_stackdepth, kind=dp)/ &
306  REAL(max(1_int_8, r_report_j%sum_total_calls), kind=dp)
307  IF (report_maxloc) THEN
308  WRITE (unit=iw, fmt=fmt) &
309  adjustl(r_report_j%routineN(1:31)), &
310  r_report_j%max_total_calls, &
311  asd, &
312  r_report_j%sum_ecost/para_env%num_pe, &
313  r_report_j%max_ecost, &
314  r_report_j%sum_icost/para_env%num_pe, &
315  r_report_j%max_icost, &
316  r_report_j%max_erank
317  ELSE
318  WRITE (unit=iw, fmt=fmt) &
319  adjustl(r_report_j%routineN(1:31)), &
320  r_report_j%max_total_calls, &
321  asd, &
322  r_report_j%sum_ecost/para_env%num_pe, &
323  r_report_j%max_ecost, &
324  r_report_j%sum_icost/para_env%num_pe, &
325  r_report_j%max_icost
326  END IF
327  END IF
328  END DO
329  WRITE (unit=iw, fmt="(T2,A,/)") repeat("-", 79)
330 
331  END SUBROUTINE print_reports
332 
333 ! **************************************************************************************************
334 !> \brief Write accumulated callgraph information as cachegrind-file.
335 !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
336 !> \param filename ...
337 !> \par History
338 !> 12.2012 initial version[ole]
339 !> \author Ole Schuett
340 ! **************************************************************************************************
341  SUBROUTINE timings_report_callgraph(filename)
342  CHARACTER(len=*), INTENT(in) :: filename
343 
344  INTEGER, PARAMETER :: e = 1000, t = 100000
345 
346  INTEGER :: i, unit
347  TYPE(call_stat_type), POINTER :: c_stat
348  TYPE(callgraph_item_type), DIMENSION(:), POINTER :: ct_items
349  TYPE(routine_stat_type), POINTER :: r_stat
350  TYPE(timer_env_type), POINTER :: timer_env
351 
352  CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
353  file_form="FORMATTED", unit_number=unit)
354  timer_env => get_timer_env()
355 
356  ! use outermost routine as total runtime
357  r_stat => list_get(timer_env%routine_stats, 1)
358  WRITE (unit=unit, fmt="(A)") "events: Walltime Energy"
359  WRITE (unit=unit, fmt="(A,I0,1X,I0)") "summary: ", &
360  int(t*r_stat%incl_walltime_accu, kind=int_8), &
361  int(e*r_stat%incl_energy_accu, kind=int_8)
362 
363  DO i = 1, list_size(timer_env%routine_stats)
364  r_stat => list_get(timer_env%routine_stats, i)
365  WRITE (unit=unit, fmt="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
366  WRITE (unit=unit, fmt="(A,I0,1X,I0)") "1 ", &
367  int(t*r_stat%excl_walltime_accu, kind=int_8), &
368  int(e*r_stat%excl_energy_accu, kind=int_8)
369  END DO
370 
371  ct_items => callgraph_items(timer_env%callgraph)
372  DO i = 1, SIZE(ct_items)
373  c_stat => ct_items(i)%value
374  WRITE (unit=unit, fmt="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
375  WRITE (unit=unit, fmt="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
376  WRITE (unit=unit, fmt="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
377  WRITE (unit=unit, fmt="(A,I0,1X,I0)") "1 ", &
378  int(t*c_stat%incl_walltime_accu, kind=int_8), &
379  int(e*c_stat%incl_energy_accu, kind=int_8)
380  END DO
381  DEALLOCATE (ct_items)
382 
383  CALL close_file(unit_number=unit, file_status="KEEP")
384 
385  END SUBROUTINE timings_report_callgraph
386 END MODULE timings_report
387 
type(callgraph_item_type) function, dimension(:), pointer, public callgraph_items(hash_map)
Returns a pointer to an array of all key/value-items stored in the hash map. Caution: The caller is r...
Definition: callgraph.F:383
Utility routines to open and close files. Tracking of preconnections.
Definition: cp_files.F:16
subroutine, public open_file(file_name, file_status, file_form, file_action, file_position, file_pad, unit_number, debug, skip_get_unit_number, file_access)
Opens the requested file using a free unit number.
Definition: cp_files.F:308
subroutine, public close_file(unit_number, file_status, keep_preconnection)
Close an open file given by its logical unit number. Optionally, keep the file and unit preconnected.
Definition: cp_files.F:119
Defines the basic variable types.
Definition: kinds.F:23
integer, parameter, public int_8
Definition: kinds.F:54
integer, parameter, public dp
Definition: kinds.F:34
integer, parameter, public default_string_length
Definition: kinds.F:57
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
An array-based list which grows on demand. When the internal array is full, a new array of twice the ...
Definition: list.F:24
Interface to the message passing library MPI.
integer(kind=int_4) function, public routine_map_get(hash_map, key, default_value)
Gets a value for a given key from the hash map. If the key is not found the default_value will be ret...
Definition: routine_map.F:254
logical function, public routine_map_haskey(hash_map, key)
Checks whether a given key is currently stored in the hash_map.
Definition: routine_map.F:347
Types used by timings.F and timings_report.F The types in this module are used within dict or list,...
Timing routines for accounting.
integer, parameter, public cost_type_energy
subroutine, public timings_report_callgraph(filename)
Write accumulated callgraph information as cachegrind-file. http://kcachegrind.sourceforge....
integer, parameter, public cost_type_time
subroutine, public timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
Print accumulated information on timers.
Types used by timings.F and timings_report.F Due to the fortran restriction on cicular module-depende...
Definition: timings_types.F:17
Timing routines for accounting.
Definition: timings.F:17
type(timer_env_type) function, pointer, public get_timer_env()
returns the current timer env from the stack
Definition: timings.F:147
All kind of helpful little routines.
Definition: util.F:14