9 USE omp_lib,
ONLY: omp_get_wtime
21 #include "../base/base_uses.f90"
29 INTEGER,
PRIVATE,
SAVE :: randmat_counter = 0
52 bs_m, bs_n, bs_k, sparsities, alpha, beta, &
53 n_loops, eps, retain_sparsity, always_checksum)
55 CLASS(mp_comm_type),
INTENT(IN) :: mp_group
56 INTEGER,
INTENT(IN) :: io_unit
57 INTEGER,
DIMENSION(:),
INTENT(in) :: matrix_sizes
58 LOGICAL,
DIMENSION(2),
INTENT(in) :: trs
59 INTEGER,
DIMENSION(:),
POINTER :: bs_m, bs_n, bs_k
60 REAL(kind=
dp),
DIMENSION(3),
INTENT(in) :: sparsities
61 REAL(kind=
dp),
INTENT(in) :: alpha, beta
62 INTEGER,
INTENT(IN) :: n_loops
63 REAL(kind=
dp),
INTENT(in) :: eps
64 LOGICAL,
INTENT(in) :: retain_sparsity, always_checksum
66 CHARACTER(len=*),
PARAMETER :: routinen =
'dbm_run_tests'
68 INTEGER :: bmax, bmin, handle
69 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: col_dist_a, col_dist_b, col_dist_c, &
70 row_dist_a, row_dist_b, row_dist_c, &
71 sizes_k, sizes_m, sizes_n
72 INTEGER,
DIMENSION(2) :: npdims
73 TYPE(dbm_distribution_obj) :: dist_a, dist_b, dist_c
74 TYPE(dbm_type),
TARGET :: matrix_a, matrix_b, matrix_c
75 TYPE(mp_cart_type) :: cart_group
77 CALL timeset(routinen, handle)
82 CALL cart_group%create(mp_group, 2, npdims)
83 npdims = cart_group%num_pe_cart
86 randmat_counter = 12341313
89 NULLIFY (sizes_k, sizes_m, sizes_n)
90 IF (
ASSOCIATED(bs_m))
THEN
91 bmin = minval(bs_m(2::2))
92 bmax = maxval(bs_m(2::2))
93 CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), bs_m)
95 CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), (/1, 13, 2, 5/))
98 IF (
ASSOCIATED(bs_n))
THEN
99 bmin = min(bmin, minval(bs_n(2::2)))
100 bmax = max(bmax, maxval(bs_n(2::2)))
101 CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), bs_n)
103 CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), (/1, 13, 2, 5/))
104 bmin = min(bmin, 5); bmax = max(bmax, 13)
106 IF (
ASSOCIATED(bs_k))
THEN
107 bmin = min(bmin, minval(bs_k(2::2)))
108 bmax = max(bmax, maxval(bs_k(2::2)))
109 CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), bs_k)
111 CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), (/1, 13, 2, 5/))
112 bmin = min(bmin, 5); bmax = max(bmax, 13)
116 CALL generate_1d_dist(row_dist_c,
SIZE(sizes_m), npdims(1), sizes_m)
117 CALL generate_1d_dist(col_dist_c,
SIZE(sizes_n), npdims(2), sizes_n)
119 CALL dbm_create(matrix_c,
"Matrix C", dist_c, sizes_m, sizes_n)
120 CALL fill_matrix(matrix_c, sparsity=sparsities(3), group=cart_group)
125 CALL generate_1d_dist(row_dist_a,
SIZE(sizes_k), npdims(1), sizes_k)
126 CALL generate_1d_dist(col_dist_a,
SIZE(sizes_m), npdims(2), sizes_m)
128 CALL dbm_create(matrix_a,
"Matrix A", dist_a, sizes_k, sizes_m)
129 CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
130 DEALLOCATE (row_dist_a, col_dist_a)
132 CALL generate_1d_dist(col_dist_a,
SIZE(sizes_k), npdims(2), sizes_k)
134 CALL dbm_create(matrix_a,
"Matrix A", dist_a, sizes_m, sizes_k)
135 CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
136 DEALLOCATE (col_dist_a)
142 CALL generate_1d_dist(row_dist_b,
SIZE(sizes_n), npdims(1), sizes_n)
143 CALL generate_1d_dist(col_dist_b,
SIZE(sizes_k), npdims(2), sizes_k)
145 CALL dbm_create(matrix_b,
"Matrix B", dist_b, sizes_n, sizes_k)
146 CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
147 DEALLOCATE (row_dist_b, col_dist_b)
149 CALL generate_1d_dist(row_dist_b,
SIZE(sizes_k), npdims(1), sizes_k)
151 CALL dbm_create(matrix_b,
"Matrix B", dist_b, sizes_k, sizes_n)
152 CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
153 DEALLOCATE (row_dist_b)
156 DEALLOCATE (row_dist_c, col_dist_c, sizes_m, sizes_n, sizes_k)
159 IF (io_unit > 0)
THEN
160 WRITE (io_unit,
'(A,3(1X,I6),1X,A,2(1X,I5),1X,A,2(1X,L1))') &
161 "Testing with sizes", matrix_sizes(1:3), &
162 "min/max block sizes", bmin, bmax,
"transposed?", trs(1:2)
165 CALL run_multiply_test(matrix_a, matrix_b, matrix_c, &
166 transa=trs(1), transb=trs(2), &
167 alpha=alpha, beta=beta, &
172 always_checksum=always_checksum, &
173 retain_sparsity=retain_sparsity)
178 CALL cart_group%free()
180 CALL timestop(handle)
200 SUBROUTINE run_multiply_test(matrix_a, matrix_b, matrix_c, transa, transb, alpha, beta, &
201 retain_sparsity, n_loops, eps, group, io_unit, always_checksum)
202 TYPE(dbm_type),
INTENT(in) :: matrix_a, matrix_b
203 TYPE(dbm_type),
INTENT(inout) :: matrix_c
204 LOGICAL,
INTENT(in) :: transa, transb
205 REAL(kind=
dp),
INTENT(in) :: alpha, beta
206 LOGICAL,
INTENT(in) :: retain_sparsity
207 INTEGER,
INTENT(IN) :: n_loops
208 REAL(kind=
dp),
INTENT(in) :: eps
210 CLASS(mp_comm_type),
INTENT(IN) :: group
211 INTEGER,
INTENT(IN) :: io_unit
212 LOGICAL,
INTENT(in) :: always_checksum
214 CHARACTER(len=*),
PARAMETER :: routinen =
'run_multiply_test'
216 INTEGER :: handle, loop_iter
217 INTEGER(kind=int_8) :: flop
218 REAL(kind=
dp) :: cs, duration, flops_all, time_start
219 TYPE(dbm_type) :: matrix_c_orig
221 CALL timeset(routinen, handle)
224 CALL dbm_copy(matrix_c_orig, matrix_c)
226 associate(numnodes => group%num_pe)
227 DO loop_iter = 1, n_loops
229 time_start = omp_get_wtime()
230 IF (eps < -0.0_dp)
THEN
231 CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
232 retain_sparsity=retain_sparsity, flop=flop)
234 CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
235 retain_sparsity=retain_sparsity, flop=flop, filter_eps=eps)
237 duration = omp_get_wtime() - time_start
239 CALL group%max(duration)
241 duration = max(duration, epsilon(duration))
242 flops_all = real(flop, kind=
dp)/duration/numnodes/(1024*1024)
243 IF (io_unit .GT. 0)
THEN
244 WRITE (io_unit,
'(A,I5,A,I5,A,F12.3,A,I9,A)') &
245 " loop ", loop_iter,
" with ", numnodes,
" MPI ranks: using ", &
246 duration,
"s ", int(flops_all),
" Mflops/rank"
250 IF (loop_iter .EQ. n_loops .OR. always_checksum)
THEN
252 IF (io_unit > 0)
THEN
253 WRITE (io_unit, *)
"Final checksums", cs
257 CALL dbm_copy(matrix_c, matrix_c_orig)
262 CALL timestop(handle)
263 END SUBROUTINE run_multiply_test
271 SUBROUTINE fill_matrix(matrix, sparsity, group)
272 TYPE(dbm_type),
INTENT(inout) :: matrix
273 REAL(kind=dp),
INTENT(in) :: sparsity
275 CLASS(mp_comm_type),
INTENT(IN) :: group
277 CHARACTER(len=*),
PARAMETER :: routinen =
'fill_matrix'
279 INTEGER :: block_node, col, handle, ncol, &
281 INTEGER(KIND=int_8) :: counter, ele, increment, nmax
282 INTEGER,
DIMENSION(4) :: iseed, jseed
283 INTEGER,
DIMENSION(:),
POINTER :: col_block_sizes, row_block_sizes
284 REAL(kind=dp) :: my_sparsity
285 REAL(kind=dp),
ALLOCATABLE,
DIMENSION(:, :) :: block
286 REAL(kind=dp),
DIMENSION(1) ::
value
288 CALL timeset(routinen, handle)
291 cpassert(randmat_counter .NE. 0)
293 randmat_counter = randmat_counter + 1
295 IF (sparsity .GT. 1)
THEN
296 my_sparsity = sparsity/100.0
298 my_sparsity = sparsity
301 row_block_sizes => dbm_get_row_block_sizes(matrix)
302 col_block_sizes => dbm_get_col_block_sizes(matrix)
303 nrow =
SIZE(row_block_sizes)
304 ncol =
SIZE(col_block_sizes)
305 nmax = int(nrow, kind=int_8)*int(ncol, kind=int_8)
310 associate(mynode => group%mepos)
314 CALL dlarnv(1, jseed, 1,
value)
315 IF (my_sparsity > 0)
THEN
316 increment = 1 + floor(log(value(1))/log(my_sparsity), kind=int_8)
320 ele = ele + increment
321 IF (ele >= nmax)
EXIT
322 counter = counter + 1
323 row = int(ele/ncol) + 1
324 col = int(mod(ele, int(ncol, kind=kind(ele)))) + 1
328 IF (block_node == mynode)
THEN
331 ALLOCATE (block(row_block_sizes(row), col_block_sizes(col)))
332 CALL dlarnv(1, iseed,
SIZE(block), block)
339 CALL timestop(handle)
340 END SUBROUTINE fill_matrix
350 SUBROUTINE generate_1d_dist(bin_dist, nelements, nbins, element_sizes)
351 INTEGER,
DIMENSION(:),
INTENT(OUT),
POINTER :: bin_dist
352 INTEGER,
INTENT(IN) :: nelements, nbins
353 INTEGER,
DIMENSION(:),
INTENT(IN) :: element_sizes
356 INTEGER,
DIMENSION(nbins) :: bin_counts
358 cpassert(
SIZE(element_sizes) == nelements)
359 ALLOCATE (bin_dist(nelements))
361 bin_counts(:) = [(0, bin=0, nbins - 1)]
363 bin = minloc(bin_counts, dim=1)
364 bin_dist(i) = bin - 1
365 bin_counts(bin) = bin_counts(bin) + element_sizes(i)
367 END SUBROUTINE generate_1d_dist
376 SUBROUTINE generate_mixed_block_sizes(block_sizes, size_sum, size_mix)
377 INTEGER,
DIMENSION(:),
INTENT(inout),
POINTER :: block_sizes
378 INTEGER,
INTENT(in) :: size_sum
379 INTEGER,
DIMENSION(:),
INTENT(in) :: size_mix
381 INTEGER :: block_size, current_sum, ipass, nblocks, &
383 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: mixer
385 cpassert(.NOT.
ASSOCIATED(block_sizes))
386 nsize_mix =
SIZE(size_mix)/2
387 ALLOCATE (mixer(3, nsize_mix))
391 mixer(1, :) = size_mix(1:nsize_mix*2 - 1:2)
392 mixer(2, :) = size_mix(2:nsize_mix*2:2)
397 DO WHILE (current_sum < size_sum)
398 nblocks = nblocks + 1
399 block_size = min(mixer(2, selector), size_sum - current_sum)
401 block_sizes(nblocks) = block_size
403 current_sum = current_sum + block_size
404 mixer(3, selector) = mixer(3, selector) + 1
405 IF (mixer(3, selector) > mixer(1, selector))
THEN
406 mixer(3, selector) = 1
407 selector = mod(selector, nsize_mix) + 1
411 ALLOCATE (block_sizes(nblocks))
415 current_sum = sum(block_sizes)
416 cpassert(current_sum == size_sum)
417 END SUBROUTINE generate_mixed_block_sizes
436 INTEGER,
INTENT(IN) :: irow, nrow, icol, ncol, ival
439 INTEGER(KIND=int_8) :: map
441 map = ((irow - 1 + icol*int(nrow, int_8))*(1 +
modulo(ival, 2**16)))*2 + 1 + 0*ncol
442 iseed(4) = int(
modulo(map, 2_int_8**12)); map = map/2_int_8**12;
443 iseed(3) = int(
modulo(ieor(map, 3541_int_8), 2_int_8**12)); map = map/2_int_8**12
444 iseed(2) = int(
modulo(ieor(map, 1153_int_8), 2_int_8**12)); map = map/2_int_8**12
445 iseed(1) = int(
modulo(ieor(map, 2029_int_8), 2_int_8**12)); map = map/2_int_8**12
void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col, const bool summation, const double *block)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
subroutine, public dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, retain_sparsity, filter_eps, flop)
Computes matrix product: matrix_c = alpha * matrix_a * matrix_b + beta * matrix_c.
subroutine, public dbm_create_from_template(matrix, name, template)
Creates a new matrix from given template, reusing dist and row/col_block_sizes.
subroutine, public dbm_distribution_release(dist)
Decreases the reference counter of the given distribution.
subroutine, public dbm_get_stored_coordinates(matrix, row, column, processor)
Returns the MPI rank on which the given block should be stored.
subroutine, public dbm_create(matrix, name, dist, row_block_sizes, col_block_sizes)
Creates a new matrix.
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
subroutine, public dbm_put_block(matrix, row, col, block, summation)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
real(kind=dp) function, public dbm_checksum(matrix)
Computes a checksum of the given matrix.
subroutine, public dbm_copy(matrix_a, matrix_b)
Copies content of matrix_b into matrix_a. Matrices must have the same row/col block sizes and distrib...
subroutine, public dbm_release(matrix)
Releases a matrix and all its ressources.
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
subroutine, public dbm_distribution_new(dist, mp_comm, row_dist_block, col_dist_block)
Creates a new two dimensional distribution.
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
integer function, dimension(4), public generate_larnv_seed(irow, nrow, icol, ncol, ival)
Generate a seed respecting the lapack constraints,.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Interface to the message passing library MPI.
subroutine, public mp_dims_create(nodes, dims)
wrapper to MPI_Dims_create