9   USE omp_lib,                         
ONLY: omp_get_wtime
 
   21#include "../base/base_uses.f90" 
   29   INTEGER, 
PRIVATE, 
SAVE                                      :: randmat_counter = 0
 
   52                            bs_m, bs_n, bs_k, sparsities, alpha, beta, &
 
   53                            n_loops, eps, retain_sparsity, always_checksum)
 
   56      INTEGER, 
INTENT(IN)                                :: io_unit
 
   57      INTEGER, 
DIMENSION(:), 
INTENT(in)                  :: matrix_sizes
 
   58      LOGICAL, 
DIMENSION(2), 
INTENT(in)                  :: trs
 
   59      INTEGER, 
DIMENSION(:), 
POINTER                     :: bs_m, bs_n, bs_k
 
   60      REAL(kind=
dp), 
DIMENSION(3), 
INTENT(in)            :: sparsities
 
   61      REAL(kind=
dp), 
INTENT(in)                          :: alpha, beta
 
   62      INTEGER, 
INTENT(IN)                                :: n_loops
 
   63      REAL(kind=
dp), 
INTENT(in)                          :: eps
 
   64      LOGICAL, 
INTENT(in)                                :: retain_sparsity, always_checksum
 
   66      CHARACTER(len=*), 
PARAMETER                        :: routinen = 
'dbm_run_tests' 
   68      INTEGER                                            :: bmax, bmin, handle
 
   69      INTEGER, 
CONTIGUOUS, 
DIMENSION(:), 
POINTER         :: col_dist_a, col_dist_b, col_dist_c, &
 
   70                                                            row_dist_a, row_dist_b, row_dist_c, &
 
   71                                                            sizes_k, sizes_m, sizes_n
 
   72      INTEGER, 
DIMENSION(2)                              :: npdims
 
   74      TYPE(
dbm_type), 
TARGET                             :: matrix_a, matrix_b, matrix_c
 
   77      CALL timeset(routinen, handle)
 
   82      CALL cart_group%create(mp_group, 2, npdims)
 
   83      npdims = cart_group%num_pe_cart
 
   86      randmat_counter = 12341313
 
   89      NULLIFY (sizes_k, sizes_m, sizes_n)
 
   90      IF (
ASSOCIATED(bs_m)) 
THEN 
   91         bmin = minval(bs_m(2::2))
 
   92         bmax = maxval(bs_m(2::2))
 
   93         CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), bs_m)
 
   95         CALL generate_mixed_block_sizes(sizes_m, matrix_sizes(1), (/1, 13, 2, 5/))
 
   98      IF (
ASSOCIATED(bs_n)) 
THEN 
   99         bmin = min(bmin, minval(bs_n(2::2)))
 
  100         bmax = max(bmax, maxval(bs_n(2::2)))
 
  101         CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), bs_n)
 
  103         CALL generate_mixed_block_sizes(sizes_n, matrix_sizes(2), (/1, 13, 2, 5/))
 
  104         bmin = min(bmin, 5); bmax = max(bmax, 13)
 
  106      IF (
ASSOCIATED(bs_k)) 
THEN 
  107         bmin = min(bmin, minval(bs_k(2::2)))
 
  108         bmax = max(bmax, maxval(bs_k(2::2)))
 
  109         CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), bs_k)
 
  111         CALL generate_mixed_block_sizes(sizes_k, matrix_sizes(3), (/1, 13, 2, 5/))
 
  112         bmin = min(bmin, 5); bmax = max(bmax, 13)
 
  116      CALL generate_1d_dist(row_dist_c, 
SIZE(sizes_m), npdims(1), sizes_m)
 
  117      CALL generate_1d_dist(col_dist_c, 
SIZE(sizes_n), npdims(2), sizes_n)
 
  119      CALL dbm_create(matrix_c, 
"Matrix C", dist_c, sizes_m, sizes_n)
 
  120      CALL fill_matrix(matrix_c, sparsity=sparsities(3), group=cart_group)
 
  125         CALL generate_1d_dist(row_dist_a, 
SIZE(sizes_k), npdims(1), sizes_k)
 
  126         CALL generate_1d_dist(col_dist_a, 
SIZE(sizes_m), npdims(2), sizes_m)
 
  128         CALL dbm_create(matrix_a, 
"Matrix A", dist_a, sizes_k, sizes_m)
 
  129         CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
 
  130         DEALLOCATE (row_dist_a, col_dist_a)
 
  132         CALL generate_1d_dist(col_dist_a, 
SIZE(sizes_k), npdims(2), sizes_k)
 
  134         CALL dbm_create(matrix_a, 
"Matrix A", dist_a, sizes_m, sizes_k)
 
  135         CALL fill_matrix(matrix_a, sparsity=sparsities(1), group=cart_group)
 
  136         DEALLOCATE (col_dist_a)
 
  142         CALL generate_1d_dist(row_dist_b, 
SIZE(sizes_n), npdims(1), sizes_n)
 
  143         CALL generate_1d_dist(col_dist_b, 
SIZE(sizes_k), npdims(2), sizes_k)
 
  145         CALL dbm_create(matrix_b, 
"Matrix B", dist_b, sizes_n, sizes_k)
 
  146         CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
 
  147         DEALLOCATE (row_dist_b, col_dist_b)
 
  149         CALL generate_1d_dist(row_dist_b, 
SIZE(sizes_k), npdims(1), sizes_k)
 
  151         CALL dbm_create(matrix_b, 
"Matrix B", dist_b, sizes_k, sizes_n)
 
  152         CALL fill_matrix(matrix_b, sparsity=sparsities(2), group=cart_group)
 
  153         DEALLOCATE (row_dist_b)
 
  156      DEALLOCATE (row_dist_c, col_dist_c, sizes_m, sizes_n, sizes_k)
 
  159      IF (io_unit > 0) 
THEN 
  160         WRITE (io_unit, 
'(A,3(1X,I6),1X,A,2(1X,I5),1X,A,2(1X,L1))') &
 
  161            "Testing with sizes", matrix_sizes(1:3), &
 
  162            "min/max block sizes", bmin, bmax, 
"transposed?", trs(1:2)
 
  165      CALL run_multiply_test(matrix_a, matrix_b, matrix_c, &
 
  166                             transa=trs(1), transb=trs(2), &
 
  167                             alpha=alpha, beta=beta, &
 
  172                             always_checksum=always_checksum, &
 
  173                             retain_sparsity=retain_sparsity)
 
  178      CALL cart_group%free()
 
  180      CALL timestop(handle)
 
 
  200   SUBROUTINE run_multiply_test(matrix_a, matrix_b, matrix_c, transa, transb, alpha, beta, &
 
  201                                retain_sparsity, n_loops, eps, group, io_unit, always_checksum)
 
  202      TYPE(
dbm_type), 
INTENT(in)                         :: matrix_a, matrix_b
 
  203      TYPE(
dbm_type), 
INTENT(inout)                      :: matrix_c
 
  204      LOGICAL, 
INTENT(in)                                :: transa, transb
 
  205      REAL(kind=
dp), 
INTENT(in)                          :: alpha, beta
 
  206      LOGICAL, 
INTENT(in)                                :: retain_sparsity
 
  207      INTEGER, 
INTENT(IN)                                :: n_loops
 
  208      REAL(kind=
dp), 
INTENT(in)                          :: eps
 
  211      INTEGER, 
INTENT(IN)                                :: io_unit
 
  212      LOGICAL, 
INTENT(in)                                :: always_checksum
 
  214      CHARACTER(len=*), 
PARAMETER                        :: routinen = 
'run_multiply_test' 
  216      INTEGER                                            :: handle, loop_iter
 
  217      INTEGER(kind=int_8)                                :: flop
 
  218      REAL(kind=
dp)                                      :: cs, duration, flops_all, time_start
 
  221      CALL timeset(routinen, handle)
 
  224      CALL dbm_copy(matrix_c_orig, matrix_c)
 
  226      associate(numnodes => group%num_pe)
 
  227         DO loop_iter = 1, n_loops
 
  229            time_start = omp_get_wtime()
 
  230            IF (eps < -0.0_dp) 
THEN 
  231               CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
 
  232                                 retain_sparsity=retain_sparsity, flop=flop)
 
  234               CALL dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, &
 
  235                                 retain_sparsity=retain_sparsity, flop=flop, filter_eps=eps)
 
  237            duration = omp_get_wtime() - time_start
 
  239            CALL group%max(duration)
 
  241            duration = max(duration, epsilon(duration))  
 
  242            flops_all = real(flop, kind=
dp)/duration/numnodes/(1024*1024)
 
  243            IF (io_unit .GT. 0) 
THEN 
  244               WRITE (io_unit, 
'(A,I5,A,I5,A,F12.3,A,I9,A)') &
 
  245                  " loop ", loop_iter, 
" with ", numnodes, 
" MPI ranks: using ", &
 
  246                  duration, 
"s ", int(flops_all), 
" Mflops/rank" 
  250            IF (loop_iter .EQ. n_loops .OR. always_checksum) 
THEN 
  252               IF (io_unit > 0) 
THEN 
  253                  WRITE (io_unit, *) 
"Final checksums", cs
 
  257            CALL dbm_copy(matrix_c, matrix_c_orig)
 
  262      CALL timestop(handle)
 
  263   END SUBROUTINE run_multiply_test
 
  271   SUBROUTINE fill_matrix(matrix, sparsity, group)
 
  272      TYPE(dbm_type), 
INTENT(inout)                      :: matrix
 
  273      REAL(kind=dp), 
INTENT(in)                          :: sparsity
 
  275      CLASS(mp_comm_type), 
INTENT(IN)                     :: group
 
  277      CHARACTER(len=*), 
PARAMETER                        :: routinen = 
'fill_matrix' 
  279      INTEGER                                            :: block_node, col, handle, ncol, &
 
  281      INTEGER(KIND=int_8)                                :: counter, ele, increment, nmax
 
  282      INTEGER, 
DIMENSION(4)                              :: iseed, jseed
 
  283      INTEGER, 
DIMENSION(:), 
POINTER                     :: col_block_sizes, row_block_sizes
 
  284      REAL(kind=dp)                                      :: my_sparsity
 
  285      REAL(kind=dp), 
ALLOCATABLE, 
DIMENSION(:, :)        :: block
 
  286      REAL(kind=dp), 
DIMENSION(1)                        :: 
value 
  288      CALL timeset(routinen, handle)
 
  291      cpassert(randmat_counter .NE. 0)
 
  293      randmat_counter = randmat_counter + 1
 
  295      IF (sparsity .GT. 1) 
THEN 
  296         my_sparsity = sparsity/100.0
 
  298         my_sparsity = sparsity
 
  301      row_block_sizes => dbm_get_row_block_sizes(matrix)
 
  302      col_block_sizes => dbm_get_col_block_sizes(matrix)
 
  303      nrow = 
SIZE(row_block_sizes)
 
  304      ncol = 
SIZE(col_block_sizes)
 
  305      nmax = int(nrow, kind=int_8)*int(ncol, kind=int_8)
 
  310      associate(mynode => group%mepos)
 
  314            CALL dlarnv(1, jseed, 1, 
value)
 
  315            IF (my_sparsity > 0) 
THEN 
  316               increment = 1 + floor(log(value(1))/log(my_sparsity), kind=int_8)
 
  320            ele = ele + increment
 
  321            IF (ele >= nmax) 
EXIT 
  322            counter = counter + 1
 
  323            row = int(ele/ncol) + 1
 
  324            col = int(mod(ele, int(ncol, kind=kind(ele)))) + 1
 
  328            IF (block_node == mynode) 
THEN 
  331               ALLOCATE (block(row_block_sizes(row), col_block_sizes(col)))
 
  332               CALL dlarnv(1, iseed, 
SIZE(block), block)
 
  339      CALL timestop(handle)
 
  340   END SUBROUTINE fill_matrix
 
  350   SUBROUTINE generate_1d_dist(bin_dist, nelements, nbins, element_sizes)
 
  351      INTEGER, 
DIMENSION(:), 
INTENT(OUT), 
POINTER        :: bin_dist
 
  352      INTEGER, 
INTENT(IN)                                :: nelements, nbins
 
  353      INTEGER, 
DIMENSION(:), 
INTENT(IN)                  :: element_sizes
 
  356      INTEGER, 
DIMENSION(nbins)                          :: bin_counts
 
  358      cpassert(
SIZE(element_sizes) == nelements)
 
  359      ALLOCATE (bin_dist(nelements))
 
  361      bin_counts(:) = [(0, bin=0, nbins - 1)]
 
  363         bin = minloc(bin_counts, dim=1) 
 
  364         bin_dist(i) = bin - 1
 
  365         bin_counts(bin) = bin_counts(bin) + element_sizes(i)
 
  367   END SUBROUTINE generate_1d_dist
 
  376   SUBROUTINE generate_mixed_block_sizes(block_sizes, size_sum, size_mix)
 
  377      INTEGER, 
DIMENSION(:), 
INTENT(inout), 
POINTER      :: block_sizes
 
  378      INTEGER, 
INTENT(in)                                :: size_sum
 
  379      INTEGER, 
DIMENSION(:), 
INTENT(in)                  :: size_mix
 
  381      INTEGER                                            :: block_size, current_sum, ipass, nblocks, &
 
  383      INTEGER, 
ALLOCATABLE, 
DIMENSION(:, :)              :: mixer
 
  385      cpassert(.NOT. 
ASSOCIATED(block_sizes))
 
  386      nsize_mix = 
SIZE(size_mix)/2
 
  387      ALLOCATE (mixer(3, nsize_mix))
 
  391         mixer(1, :) = size_mix(1:nsize_mix*2 - 1:2)
 
  392         mixer(2, :) = size_mix(2:nsize_mix*2:2)
 
  397         DO WHILE (current_sum < size_sum)
 
  398            nblocks = nblocks + 1
 
  399            block_size = min(mixer(2, selector), size_sum - current_sum)
 
  401               block_sizes(nblocks) = block_size
 
  403            current_sum = current_sum + block_size
 
  404            mixer(3, selector) = mixer(3, selector) + 1
 
  405            IF (mixer(3, selector) > mixer(1, selector)) 
THEN 
  406               mixer(3, selector) = 1
 
  407               selector = mod(selector, nsize_mix) + 1
 
  411            ALLOCATE (block_sizes(nblocks))
 
  415      current_sum = sum(block_sizes)
 
  416      cpassert(current_sum == size_sum)
 
  417   END SUBROUTINE generate_mixed_block_sizes
 
  436      INTEGER, 
INTENT(IN)                                :: irow, nrow, icol, ncol, ival
 
  439      INTEGER(KIND=int_8)                                :: map
 
  441      map = ((irow - 1 + icol*int(nrow, int_8))*(1 + 
modulo(ival, 2**16)))*2 + 1 + 0*ncol 
 
  442      iseed(4) = int(
modulo(map, 2_int_8**12)); map = map/2_int_8**12; 
 
  443      iseed(3) = int(
modulo(ieor(map, 3541_int_8), 2_int_8**12)); map = map/2_int_8**12
 
  444      iseed(2) = int(
modulo(ieor(map, 1153_int_8), 2_int_8**12)); map = map/2_int_8**12
 
  445      iseed(1) = int(
modulo(ieor(map, 2029_int_8), 2_int_8**12)); map = map/2_int_8**12
 
 
void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col, const bool summation, const double *block)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
static GRID_HOST_DEVICE int modulo(int a, int m)
Equivalent of Fortran's MODULO, which always return a positive number. https://gcc....
subroutine, public dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, retain_sparsity, filter_eps, flop)
Computes matrix product: matrix_c = alpha * matrix_a * matrix_b + beta * matrix_c.
subroutine, public dbm_create_from_template(matrix, name, template)
Creates a new matrix from given template, reusing dist and row/col_block_sizes.
subroutine, public dbm_distribution_release(dist)
Decreases the reference counter of the given distribution.
subroutine, public dbm_get_stored_coordinates(matrix, row, column, processor)
Returns the MPI rank on which the given block should be stored.
subroutine, public dbm_create(matrix, name, dist, row_block_sizes, col_block_sizes)
Creates a new matrix.
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
subroutine, public dbm_put_block(matrix, row, col, block, summation)
Adds a block to given matrix. This routine is thread-safe. If block already exist then it gets overwr...
real(kind=dp) function, public dbm_checksum(matrix)
Computes a checksum of the given matrix.
subroutine, public dbm_copy(matrix_a, matrix_b)
Copies content of matrix_b into matrix_a. Matrices must have the same row/col block sizes and distrib...
subroutine, public dbm_release(matrix)
Releases a matrix and all its ressources.
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
subroutine, public dbm_distribution_new(dist, mp_comm, row_dist_block, col_dist_block)
Creates a new two dimensional distribution.
subroutine, public dbm_run_tests(mp_group, io_unit, matrix_sizes, trs, bs_m, bs_n, bs_k, sparsities, alpha, beta, n_loops, eps, retain_sparsity, always_checksum)
Tests the DBM library.
integer function, dimension(4), public generate_larnv_seed(irow, nrow, icol, ncol, ival)
Generate a seed respecting the lapack constraints,.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Machine interface based on Fortran 2003 and POSIX.
subroutine, public m_flush(lunit)
flushes units if the &GLOBAL flag is set accordingly
Interface to the message passing library MPI.
subroutine, public mp_dims_create(nodes, dims)
wrapper to MPI_Dims_create