(git:374b731)
Loading...
Searching...
No Matches
dbm_tests.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2024 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: BSD-3-Clause !
6!--------------------------------------------------------------------------------------------------!
7
9 USE omp_lib, ONLY: omp_get_wtime
10 USE dbm_api, ONLY: &
15 USE kinds, ONLY: dp,&
16 int_8
17 USE machine, ONLY: m_flush
18 USE message_passing, ONLY: mp_cart_type,&
21#include "../base/base_uses.f90"
22
23 IMPLICIT NONE
24
25 PRIVATE
26
28
29 INTEGER, PRIVATE, SAVE :: randmat_counter = 0
30
31CONTAINS
32
33! **************************************************************************************************
34!> \brief Tests the DBM library.
35!> \param mp_group MPI communicator
36!> \param io_unit Unit to write to, if not negative
37!> \param matrix_sizes Size of matrices to test
38!> \param trs Transposes of the two matrices
39!> \param bs_m Block sizes of the 1. dimension
40!> \param bs_n Block sizes of the 2. dimension
41!> \param bs_k Block sizes of the 3. dimension
42!> \param sparsities Sparsities of matrices to create
43!> \param alpha Alpha value to use in multiply
44!> \param beta Beta value to use in multiply
45!> \param n_loops Number of repetition for each multiplication
46!> \param eps Epsilon value for filtering
47!> \param retain_sparsity Retain the result matrix's sparsity
48!> \param always_checksum Checksum after each multiplication
49!> \author Ole Schuett
50! **************************************************************************************************
51 SUBROUTINE dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, &
52 bs_m, bs_n, bs_k, sparsities, alpha, beta, &
53 n_loops, eps, retain_sparsity, always_checksum)
54
55 CLASS(mp_comm_type), INTENT(IN) :: mp_group
56 INTEGER, INTENT(IN) :: io_unit
57 INTEGER, DIMENSION(:), INTENT(in) :: matrix_sizes
58 LOGICAL, DIMENSION(2), INTENT(in) :: trs
59 INTEGER, DIMENSION(:), POINTER :: bs_m, bs_n, bs_k
60 REAL(kind=dp), DIMENSION(3), INTENT(in) :: sparsities
61 REAL(kind=dp), INTENT(in) :: alpha, beta
62 INTEGER, INTENT(IN) :: n_loops
63 REAL(kind=dp), INTENT(in) :: eps
64 LOGICAL, INTENT(in) :: retain_sparsity, always_checksum
65
66 CHARACTER(len=*), PARAMETER :: routinen = 'dbm_run_tests'
67
68 INTEGER :: bmax, bmin, handle
69 INTEGER, CONTIGUOUS, DIMENSION(:), POINTER :: col_dist_a, col_dist_b, col_dist_c, &
70 row_dist_a, row_dist_b, row_dist_c, &
71 sizes_k, sizes_m, sizes_n
72 INTEGER, DIMENSION(2) :: npdims
73 TYPE(dbm_distribution_obj) :: dist_a, dist_b, dist_c
74 TYPE(dbm_type), TARGET :: matrix_a, matrix_b, matrix_c
75 TYPE(mp_cart_type) :: cart_group
76
77 CALL timeset(routinen, handle)
78
79 ! Create MPI processor grid.
80 npdims(:) = 0
81 CALL mp_dims_create(mp_group%num_pe, npdims)
82 CALL cart_group%create(mp_group, 2, npdims)
83 npdims = cart_group%num_pe_cart
84
85 ! Initialize random number generator.
86 randmat_counter = 12341313
87
88 ! Create the row/column block sizes.
89 NULLIFY (sizes_k, sizes_m, sizes_n)
90 IF (ASSOCIATED(bs_m)) THEN
91 bmin = minval(bs_m(2::2))
92 bmax = maxval(bs_m(2::2))
93 CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), bs_m)
94 ELSE
95 CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), (/1, 13, 2, 5/))
96 bmin = 5; bmax = 13
97 END IF
98 IF (ASSOCIATED(bs_n)) THEN
99 bmin = min(bmin, minval(bs_n(2::2)))
100 bmax = max(bmax, maxval(bs_n(2::2)))
101 CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), bs_n)
102 ELSE
103 CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), (/1, 13, 2, 5/))
104 bmin = min(bmin, 5); bmax = max(bmax, 13)
105 END IF
106 IF (ASSOCIATED(bs_k)) THEN
107 bmin = min(bmin, minval(bs_k(2::2)))
108 bmax = max(bmax, maxval(bs_k(2::2)))
109 CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), bs_k)
110 ELSE
111 CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), (/1, 13, 2, 5/))
112 bmin = min(bmin, 5); bmax = max(bmax, 13)
113 END IF
114
115 ! Create Matrix C
116 CALL generate_1d_dist(row_dist_c, SIZE(sizes_m), npdims(1), sizes_m)
117 CALL generate_1d_dist(col_dist_c, SIZE(sizes_n), npdims(2), sizes_n)
118 CALL dbm_distribution_new(dist_c, cart_group, row_dist_c, col_dist_c)
119 CALL dbm_create(matrix_c, "Matrix C", dist_c, sizes_m, sizes_n)
120 CALL fill_matrix(matrix_c, sparsity=sparsities(3), group=cart_group)
121 CALL dbm_distribution_release(dist_c)
122
123 ! Create Matrix A
124 IF (trs(1)) THEN
125 CALL generate_1d_dist(row_dist_a, SIZE(sizes_k), npdims(1), sizes_k)
126 CALL generate_1d_dist(col_dist_a, SIZE(sizes_m), npdims(2), sizes_m)
127 CALL dbm_distribution_new(dist_a, cart_group, row_dist_a, col_dist_a)
128 CALL dbm_create(matrix_a, "Matrix A", dist_a, sizes_k, sizes_m)
129 CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
130 DEALLOCATE (row_dist_a, col_dist_a)
131 ELSE
132 CALL generate_1d_dist(col_dist_a, SIZE(sizes_k), npdims(2), sizes_k)
133 CALL dbm_distribution_new(dist_a, cart_group, row_dist_c, col_dist_a)
134 CALL dbm_create(matrix_a, "Matrix A", dist_a, sizes_m, sizes_k)
135 CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
136 DEALLOCATE (col_dist_a)
137 END IF
138 CALL dbm_distribution_release(dist_a)
139
140 ! Create Matrix B
141 IF (trs(2)) THEN
142 CALL generate_1d_dist(row_dist_b, SIZE(sizes_n), npdims(1), sizes_n)
143 CALL generate_1d_dist(col_dist_b, SIZE(sizes_k), npdims(2), sizes_k)
144 CALL dbm_distribution_new(dist_b, cart_group, row_dist_b, col_dist_b)
145 CALL dbm_create(matrix_b, "Matrix B", dist_b, sizes_n, sizes_k)
146 CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
147 DEALLOCATE (row_dist_b, col_dist_b)
148 ELSE
149 CALL generate_1d_dist(row_dist_b, SIZE(sizes_k), npdims(1), sizes_k)
150 CALL dbm_distribution_new(dist_b, cart_group, row_dist_b, col_dist_c)
151 CALL dbm_create(matrix_b, "Matrix B", dist_b, sizes_k, sizes_n)
152 CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
153 DEALLOCATE (row_dist_b)
154 END IF
155 CALL dbm_distribution_release(dist_b)
156 DEALLOCATE (row_dist_c, col_dist_c, sizes_m, sizes_n, sizes_k)
157
158 ! Prepare test parameters
159 IF (io_unit > 0) THEN
160 WRITE (io_unit, '(A,3(1X,I6),1X,A,2(1X,I5),1X,A,2(1X,L1))') &
161 "Testing with sizes", matrix_sizes(1:3), &
162 "min/max block sizes", bmin, bmax, "transposed?", trs(1:2)
163 END IF
164
165 CALL run_multiply_test(matrix_a, matrix_b, matrix_c, &
166 transa=trs(1), transb=trs(2), &
167 alpha=alpha, beta=beta, &
168 n_loops=n_loops, &
169 eps=eps, &
170 group=cart_group, &
171 io_unit=io_unit, &
172 always_checksum=always_checksum, &
173 retain_sparsity=retain_sparsity)
174
175 CALL dbm_release(matrix_a)
176 CALL dbm_release(matrix_b)
177 CALL dbm_release(matrix_c)
178 CALL cart_group%free()
179
180 CALL timestop(handle)
181 END SUBROUTINE dbm_run_tests
182
183! **************************************************************************************************
184!> \brief Runs the multiplication test.
185!> \param matrix_a ...
186!> \param matrix_b ...
187!> \param matrix_c ...
188!> \param transa ...
189!> \param transb ...
190!> \param alpha ...
191!> \param beta ...
192!> \param retain_sparsity ...
193!> \param n_loops ...
194!> \param eps ...
195!> \param group ...
196!> \param io_unit ...
197!> \param always_checksum ...
198!> \author Ole Schuett
199! **************************************************************************************************
200 SUBROUTINE run_multiply_test(matrix_a, matrix_b, matrix_c, transa, transb, alpha, beta, &
201 retain_sparsity, n_loops, eps, group, io_unit, always_checksum)
202 TYPE(dbm_type), INTENT(in) :: matrix_a, matrix_b
203 TYPE(dbm_type), INTENT(inout) :: matrix_c
204 LOGICAL, INTENT(in) :: transa, transb
205 REAL(kind=dp), INTENT(in) :: alpha, beta
206 LOGICAL, INTENT(in) :: retain_sparsity
207 INTEGER, INTENT(IN) :: n_loops
208 REAL(kind=dp), INTENT(in) :: eps
209
210 CLASS(mp_comm_type), INTENT(IN) :: group
211 INTEGER, INTENT(IN) :: io_unit
212 LOGICAL, INTENT(in) :: always_checksum
213
214 CHARACTER(len=*), PARAMETER :: routinen = 'run_multiply_test'
215
216 INTEGER :: handle, loop_iter
217 INTEGER(kind=int_8) :: flop
218 REAL(kind=dp) :: cs, duration, flops_all, time_start
219 TYPE(dbm_type) :: matrix_c_orig
220
221 CALL timeset(routinen, handle)
222
223 CALL dbm_create_from_template(matrix_c_orig, "Original Matrix C", matrix_c)
224 CALL dbm_copy(matrix_c_orig, matrix_c)
225
226 associate(numnodes => group%num_pe)
227 DO loop_iter = 1, n_loops
228 CALL group%sync()
229 time_start = omp_get_wtime()
230 IF (eps < -0.0_dp) THEN
231 CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
232 retain_sparsity=retain_sparsity, flop=flop)
233 ELSE
234 CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
235 retain_sparsity=retain_sparsity, flop=flop, filter_eps=eps)
236 END IF
237 duration = omp_get_wtime() - time_start
238
239 CALL group%max(duration)
240 CALL group%sum(flop)
241 duration = max(duration, epsilon(duration)) ! avoid division by zero
242 flops_all = real(flop, kind=dp)/duration/numnodes/(1024*1024)
243 IF (io_unit .GT. 0) THEN
244 WRITE (io_unit, '(A,I5,A,I5,A,F12.3,A,I9,A)') &
245 " loop ", loop_iter, " with ", numnodes, " MPI ranks: using ", &
246 duration, "s ", int(flops_all), " Mflops/rank"
247 CALL m_flush(io_unit)
248 END IF
249
250 IF (loop_iter .EQ. n_loops .OR. always_checksum) THEN
251 cs = dbm_checksum(matrix_c)
252 IF (io_unit > 0) THEN
253 WRITE (io_unit, *) "Final checksums", cs
254 END IF
255 END IF
256
257 CALL dbm_copy(matrix_c, matrix_c_orig)
258 END DO
259 END associate
260
261 CALL dbm_release(matrix_c_orig)
262 CALL timestop(handle)
263 END SUBROUTINE run_multiply_test
264
265! **************************************************************************************************
266!> \brief Fills give matrix with random blocks.
267!> \param matrix ...
268!> \param sparsity ...
269!> \param group ...
270! **************************************************************************************************
271 SUBROUTINE fill_matrix(matrix, sparsity, group)
272 TYPE(dbm_type), INTENT(inout) :: matrix
273 REAL(kind=dp), INTENT(in) :: sparsity
274
275 CLASS(mp_comm_type), INTENT(IN) :: group
276
277 CHARACTER(len=*), PARAMETER :: routinen = 'fill_matrix'
278
279 INTEGER :: block_node, col, handle, ncol, &
280 nrow, row
281 INTEGER(KIND=int_8) :: counter, ele, increment, nmax
282 INTEGER, DIMENSION(4) :: iseed, jseed
283 INTEGER, DIMENSION(:), POINTER :: col_block_sizes, row_block_sizes
284 REAL(kind=dp) :: my_sparsity
285 REAL(kind=dp), ALLOCATABLE, DIMENSION(:, :) :: block
286 REAL(kind=dp), DIMENSION(1) :: value
287
288 CALL timeset(routinen, handle)
289
290 ! Check that the counter was initialised (or has not overflowed)
291 cpassert(randmat_counter .NE. 0)
292 ! the counter goes into the seed. Every new call gives a new random matrix
293 randmat_counter = randmat_counter + 1
294
295 IF (sparsity .GT. 1) THEN
296 my_sparsity = sparsity/100.0
297 ELSE
298 my_sparsity = sparsity
299 END IF
300
301 row_block_sizes => dbm_get_row_block_sizes(matrix)
302 col_block_sizes => dbm_get_col_block_sizes(matrix)
303 nrow = SIZE(row_block_sizes)
304 ncol = SIZE(col_block_sizes)
305 nmax = int(nrow, kind=int_8)*int(ncol, kind=int_8)
306 ele = -1
307 counter = 0
308 jseed = generate_larnv_seed(7, 42, 3, 42, randmat_counter)
309
310 associate(mynode => group%mepos)
311 DO
312 ! find the next block to add, this is given by a geometrically distributed variable
313 ! we number the blocks of the matrix and jump to the next one
314 CALL dlarnv(1, jseed, 1, value)
315 IF (my_sparsity > 0) THEN
316 increment = 1 + floor(log(value(1))/log(my_sparsity), kind=int_8)
317 ELSE
318 increment = 1
319 END IF
320 ele = ele + increment
321 IF (ele >= nmax) EXIT
322 counter = counter + 1
323 row = int(ele/ncol) + 1
324 col = int(mod(ele, int(ncol, kind=kind(ele)))) + 1
325
326 ! Only deal with the local blocks.
327 CALL dbm_get_stored_coordinates(matrix, row, col, block_node)
328 IF (block_node == mynode) THEN
329 ! fill based on a block based seed, makes this the same values in parallel
330 iseed = generate_larnv_seed(row, nrow, col, ncol, randmat_counter)
331 ALLOCATE (block(row_block_sizes(row), col_block_sizes(col)))
332 CALL dlarnv(1, iseed, SIZE(block), block)
333 CALL dbm_put_block(matrix, row, col, block)
334 DEALLOCATE (block)
335 END IF
336 END DO
337 END associate
338
339 CALL timestop(handle)
340 END SUBROUTINE fill_matrix
341
342! **************************************************************************************************
343!> \brief Assigns given elements to bins. Uses given element_sizes for load balancing.
344!> \param bin_dist Distribution of elements to bins
345!> \param nelements Number of elements
346!> \param nbins Number of bins
347!> \param element_sizes sizes of elements
348!> \author Ole Schuett
349! **************************************************************************************************
350 SUBROUTINE generate_1d_dist(bin_dist, nelements, nbins, element_sizes)
351 INTEGER, DIMENSION(:), INTENT(OUT), POINTER :: bin_dist
352 INTEGER, INTENT(IN) :: nelements, nbins
353 INTEGER, DIMENSION(:), INTENT(IN) :: element_sizes
354
355 INTEGER :: bin, i
356 INTEGER, DIMENSION(nbins) :: bin_counts
357
358 cpassert(SIZE(element_sizes) == nelements)
359 ALLOCATE (bin_dist(nelements))
360
361 bin_counts(:) = [(0, bin=0, nbins - 1)]
362 DO i = 1, nelements
363 bin = minloc(bin_counts, dim=1) ! greedy algorithm
364 bin_dist(i) = bin - 1
365 bin_counts(bin) = bin_counts(bin) + element_sizes(i)
366 END DO
367 END SUBROUTINE generate_1d_dist
368
369! **************************************************************************************************
370!> \brief Generates a block_sizes by "randomly" selecting from size_mix.
371!> \param block_sizes ...
372!> \param size_sum ...
373!> \param size_mix ...
374!> \author Ole Schuett
375! **************************************************************************************************
376 SUBROUTINE generate_mixed_block_sizes(block_sizes, size_sum, size_mix)
377 INTEGER, DIMENSION(:), INTENT(inout), POINTER :: block_sizes
378 INTEGER, INTENT(in) :: size_sum
379 INTEGER, DIMENSION(:), INTENT(in) :: size_mix
380
381 INTEGER :: block_size, current_sum, ipass, nblocks, &
382 nsize_mix, selector
383 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: mixer
384
385 cpassert(.NOT. ASSOCIATED(block_sizes))
386 nsize_mix = SIZE(size_mix)/2
387 ALLOCATE (mixer(3, nsize_mix))
388
389 ! 1st pass to compute nblocks and allocate block_sizes, 2nd pass to fill block_sizes.
390 DO ipass = 1, 2
391 mixer(1, :) = size_mix(1:nsize_mix*2 - 1:2)
392 mixer(2, :) = size_mix(2:nsize_mix*2:2)
393 mixer(3, :) = 1
394 selector = 1
395 nblocks = 0
396 current_sum = 0
397 DO WHILE (current_sum < size_sum)
398 nblocks = nblocks + 1
399 block_size = min(mixer(2, selector), size_sum - current_sum)
400 IF (ipass == 2) THEN
401 block_sizes(nblocks) = block_size
402 END IF
403 current_sum = current_sum + block_size
404 mixer(3, selector) = mixer(3, selector) + 1
405 IF (mixer(3, selector) > mixer(1, selector)) THEN
406 mixer(3, selector) = 1
407 selector = mod(selector, nsize_mix) + 1
408 END IF
409 END DO
410 IF (ipass == 1) THEN
411 ALLOCATE (block_sizes(nblocks))
412 END IF
413 END DO
414
415 current_sum = sum(block_sizes)
416 cpassert(current_sum == size_sum)
417 END SUBROUTINE generate_mixed_block_sizes
418
419! **************************************************************************************************
420!> \brief Generate a seed respecting the lapack constraints,
421!> - values between 0..4095 (2**12-1)
422!> - iseed(4) odd
423!> also try to avoid iseed that are zero.
424!> Have but with a unique 2D mapping (irow,icol), and a 'mini-seed' ival
425!>
426!> \param irow 1..nrow
427!> \param nrow ...
428!> \param icol 1..ncol
429!> \param ncol ...
430!> \param ival mini-seed
431!> \return a lapack compatible seed
432!> \author Patrick Seewald
433! **************************************************************************************************
434 FUNCTION generate_larnv_seed(irow, nrow, icol, ncol, ival) RESULT(iseed)
435
436 INTEGER, INTENT(IN) :: irow, nrow, icol, ncol, ival
437 INTEGER :: iseed(4)
438
439 INTEGER(KIND=int_8) :: map
440
441 map = ((irow - 1 + icol*int(nrow, int_8))*(1 + modulo(ival, 2**16)))*2 + 1 + 0*ncol ! ncol used
442 iseed(4) = int(modulo(map, 2_int_8**12)); map = map/2_int_8**12; ! keep odd
443 iseed(3) = int(modulo(ieor(map, 3541_int_8), 2_int_8**12)); map = map/2_int_8**12
444 iseed(2) = int(modulo(ieor(map, 1153_int_8), 2_int_8**12)); map = map/2_int_8**12
445 iseed(1) = int(modulo(ieor(map, 2029_int_8), 2_int_8**12)); map = map/2_int_8**12
446 END FUNCTION generate_larnv_seed
447
448END MODULE dbm_tests
void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col, const bool summation, const double *block)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
Definition dbm_matrix.c:228
int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
Definition dbm_matrix.c:605
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
subroutine, public dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, retain_sparsity, filter_eps, flop)
Computes matrix product: matrix_c = alpha * matrix_a * matrix_b + beta * matrix_c.
Definition dbm_api.F:736
subroutine, public dbm_create_from_template(matrix, name, template)
Creates a new matrix from given template, reusing dist and row/col_block_sizes.
Definition dbm_api.F:265
subroutine, public dbm_distribution_release(dist)
Decreases the reference counter of the given distribution.
Definition dbm_api.F:1401
subroutine, public dbm_get_stored_coordinates(matrix, row, column, processor)
Returns the MPI rank on which the given block should be stored.
Definition dbm_api.F:1233
subroutine, public dbm_create(matrix, name, dist, row_block_sizes, col_block_sizes)
Creates a new matrix.
Definition dbm_api.F:293
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
Definition dbm_api.F:1103
subroutine, public dbm_put_block(matrix, row, col, block, summation)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
Definition dbm_api.F:491
real(kind=dp) function, public dbm_checksum(matrix)
Computes a checksum of the given matrix.
Definition dbm_api.F:969
subroutine, public dbm_copy(matrix_a, matrix_b)
Copies content of matrix_b into matrix_a. Matrices must have the same row/col block sizes and distrib...
Definition dbm_api.F:380
subroutine, public dbm_release(matrix)
Releases a matrix and all its ressources.
Definition dbm_api.F:354
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
Definition dbm_api.F:1130
subroutine, public dbm_distribution_new(dist, mp_comm, row_dist_block, col_dist_block)
Creates a new two dimensional distribution.
Definition dbm_api.F:1294
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
Definition dbm_tests.F:54
integer function, dimension(4), public generate_larnv_seed(irow, nrow, icol, ncol, ival)
Generate a seed respecting the lapack constraints,.
Definition dbm_tests.F:435
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public int_8
Definition kinds.F:54
integer, parameter, public dp
Definition kinds.F:34
Machine interface based on Fortran 2003 and POSIX.
Definition machine.F:17
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Definition machine.F:106
Interface to the message passing library MPI.
subroutine, public mp_dims_create(nodes, dims)
wrapper to MPI_Dims_create