35#include "../../base/base_uses.f90"
48 INTEGER,
SAVE :: randmat_counter = 0
49 INTEGER,
PARAMETER,
PRIVATE :: rand_seed_init = 12341313
69 cbsizes, dist_splitsize, name, sparsity, reuse_comm)
74 INTEGER(KIND=int_8),
INTENT(IN) :: nrows, ncols
75 INTEGER,
DIMENSION(nrows),
INTENT(IN) :: rbsizes
76 INTEGER,
DIMENSION(ncols),
INTENT(IN) :: cbsizes
77 INTEGER,
DIMENSION(2),
INTENT(IN) :: dist_splitsize
78 CHARACTER(len=*),
INTENT(IN) :: name
79 REAL(kind=
dp),
INTENT(IN) :: sparsity
80 LOGICAL,
INTENT(IN),
OPTIONAL :: reuse_comm
82 CHARACTER(LEN=*),
PARAMETER :: routinen =
'dbt_tas_setup_test_matrix'
84 INTEGER :: col_size, handle, max_col_size, max_nze, &
85 max_row_size, mynode, node_holds_blk, &
87 INTEGER(KIND=int_8) :: col, col_s, ncol, nrow, row, row_s
88 INTEGER,
DIMENSION(2) :: pdims
89 INTEGER,
DIMENSION(4) :: iseed, jseed
90 LOGICAL :: reuse_comm_prv, tr
91 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: values
92 REAL(kind=
dp),
DIMENSION(1) :: rn
99 CALL timeset(routinen, handle)
102 cpassert(randmat_counter .NE. 0)
104 randmat_counter = randmat_counter + 1
106 IF (
PRESENT(reuse_comm))
THEN
107 reuse_comm_prv = reuse_comm
109 reuse_comm_prv = .false.
112 IF (reuse_comm_prv)
THEN
113 mp_comm_out = mp_comm
118 mynode = mp_comm_out%mepos
119 pdims = mp_comm_out%num_pe_cart
129 row_blk_size=rbsize_obj, col_blk_size=cbsize_obj, own_dist=.true.)
131 max_row_size = maxval(rbsizes)
132 max_col_size = maxval(cbsizes)
133 max_nze = max_row_size*max_col_size
138 ALLOCATE (values(max_row_size, max_col_size))
144 CALL dlarnv(1, jseed, 1, rn)
145 IF (rn(1) .LT. sparsity)
THEN
147 row_s = row; col_s = col
150 IF (node_holds_blk .EQ. mynode)
THEN
151 row_size = rbsize_obj%data(row_s)
152 col_size = cbsize_obj%data(col_s)
153 nze = row_size*col_size
155 CALL dlarnv(1, iseed, max_nze, values)
164 CALL timestop(handle)
182 SUBROUTINE dbt_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, matrix_c, compare_dbm, filter_eps, io_unit)
184 LOGICAL,
INTENT(IN) :: transa, transb, transc
185 TYPE(
dbt_tas_type),
INTENT(INOUT) :: matrix_a, matrix_b, matrix_c
186 LOGICAL,
INTENT(IN) :: compare_dbm
187 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: filter_eps
188 INTEGER,
INTENT(IN),
OPTIONAL :: io_unit
190 INTEGER :: handle1, handle2
191 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: cd_a, cd_b, cd_c, col_block_sizes_a, &
192 col_block_sizes_b, col_block_sizes_c, rd_a, rd_b, rd_c, row_block_sizes_a, &
193 row_block_sizes_b, row_block_sizes_c
194 INTEGER,
DIMENSION(2) :: npdims
196 TYPE(
dbm_type) :: dbm_a, dbm_a_mm, dbm_b, dbm_b_mm, dbm_c, &
203 IF (
PRESENT(io_unit))
THEN
204 IF (io_unit > 0)
THEN
205 WRITE (io_unit,
"(A)")
"starting tall-and-skinny benchmark"
208 CALL timeset(
"benchmark_tas_mm", handle1)
211 filter_eps=filter_eps, unit_nr=io_unit)
212 CALL timestop(handle1)
213 IF (
PRESENT(io_unit))
THEN
214 IF (io_unit > 0)
THEN
215 WRITE (io_unit,
"(A)")
"tall-and-skinny benchmark completed"
219 IF (compare_dbm)
THEN
226 CALL comm_dbm%create(mp_comm, 2, npdims)
251 DEALLOCATE (rd_a, rd_b, rd_c, cd_a, cd_b, cd_c)
262 row_block_sizes=row_block_sizes_a, col_block_sizes=col_block_sizes_a)
265 row_block_sizes=row_block_sizes_b, col_block_sizes=col_block_sizes_b)
268 row_block_sizes=row_block_sizes_c, col_block_sizes=col_block_sizes_c)
276 IF (
PRESENT(io_unit))
THEN
277 IF (io_unit > 0)
THEN
278 WRITE (io_unit,
"(A)")
"starting dbm benchmark"
281 CALL timeset(
"benchmark_block_mm", handle2)
282 CALL dbm_multiply(transa, transb, 1.0_dp, dbm_a_mm, dbm_b_mm, &
283 0.0_dp, dbm_c_mm, filter_eps=filter_eps)
284 CALL timestop(handle2)
285 IF (
PRESENT(io_unit))
THEN
286 IF (io_unit > 0)
THEN
287 WRITE (io_unit,
"(A)")
"dbm benchmark completed"
319 SUBROUTINE dbt_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_c, filter_eps, unit_nr, log_verbose)
320 LOGICAL,
INTENT(IN) :: transa, transb, transc
321 TYPE(
dbt_tas_type),
INTENT(INOUT) :: matrix_a, matrix_b, matrix_c
322 REAL(kind=
dp),
INTENT(IN),
OPTIONAL :: filter_eps
323 INTEGER,
INTENT(IN) :: unit_nr
324 LOGICAL,
INTENT(IN),
OPTIONAL :: log_verbose
326 REAL(kind=
dp),
PARAMETER :: test_tol = 1.0e-10_dp
328 CHARACTER(LEN=8) :: status_str
329 INTEGER :: io_unit, mynode
330 INTEGER,
CONTIGUOUS,
DIMENSION(:),
POINTER :: cd_a, cd_b, cd_c, col_block_sizes_a, &
331 col_block_sizes_b, col_block_sizes_c, rd_a, rd_b, rd_c, row_block_sizes_a, &
332 row_block_sizes_b, row_block_sizes_c
333 INTEGER,
DIMENSION(2) :: npdims
334 LOGICAL :: abort, transa_prv, transb_prv
335 REAL(kind=
dp) :: norm, rc_cs, sq_cs
337 TYPE(
dbm_type) :: dbm_a, dbm_a_mm, dbm_b, dbm_b_mm, dbm_c, &
338 dbm_c_mm, dbm_c_mm_check
346 mynode = mp_comm%mepos
349 IF (mynode .EQ. 0) io_unit = unit_nr
353 filter_eps=filter_eps, unit_nr=io_unit, log_verbose=log_verbose, optimize_dist=.true.)
360 CALL comm_dbm%create(mp_comm, 2, npdims)
385 DEALLOCATE (rd_a, rd_b, rd_c, cd_a, cd_b, cd_c)
395 CALL dbm_create(matrix=dbm_a_mm, name=
"matrix a", dist=dist_a, &
396 row_block_sizes=row_block_sizes_a, col_block_sizes=col_block_sizes_a)
398 CALL dbm_create(matrix=dbm_b_mm, name=
"matrix b", dist=dist_b, &
399 row_block_sizes=row_block_sizes_b, col_block_sizes=col_block_sizes_b)
401 CALL dbm_create(matrix=dbm_c_mm, name=
"matrix c", dist=dist_c, &
402 row_block_sizes=row_block_sizes_c, col_block_sizes=col_block_sizes_c)
404 CALL dbm_create(matrix=dbm_c_mm_check, name=
"matrix c check", dist=dist_c, &
405 row_block_sizes=row_block_sizes_c, col_block_sizes=col_block_sizes_c)
416 transa_prv = transa; transb_prv = transb
418 IF (.NOT. transc)
THEN
420 dbm_a_mm, dbm_b_mm, &
421 0.0_dp, dbm_c_mm, filter_eps=filter_eps)
423 transa_prv = .NOT. transa_prv
424 transb_prv = .NOT. transb_prv
426 dbm_b_mm, dbm_a_mm, &
427 0.0_dp, dbm_c_mm, filter_eps=filter_eps)
433 CALL dbm_add(dbm_c_mm_check, dbm_c_mm)
436 IF (io_unit > 0)
THEN
437 IF (abs(norm) > test_tol)
THEN
438 status_str =
" failed!"
441 status_str =
" passed!"
444 WRITE (io_unit,
"(A)") &
447 WRITE (io_unit,
"(A,1X,E9.2,1X,E9.2)")
"checksums", sq_cs, rc_cs
448 WRITE (io_unit,
"(A,1X,E9.2)")
"difference norm", norm
449 IF (abort) cpabort(
"DBT TAS test failed")
493 INTEGER,
DIMENSION(:),
INTENT(IN) :: sizes
494 INTEGER,
INTENT(IN) :: repeat
495 INTEGER,
DIMENSION(:),
INTENT(OUT) :: dbt_sizes
499 DO d = 1,
SIZE(dbt_sizes)
500 size_i = mod((d - 1)/repeat,
SIZE(sizes)) + 1
501 dbt_sizes(d) = sizes(size_i)
510 randmat_counter = rand_seed_init
subroutine, public dbm_multiply(transa, transb, alpha, matrix_a, matrix_b, beta, matrix_c, retain_sparsity, filter_eps, flop)
Computes matrix product: matrix_c = alpha * matrix_a * matrix_b + beta * matrix_c.
subroutine, public dbm_redistribute(matrix, redist)
Copies content of matrix_b into matrix_a. Matrices may have different distributions.
real(kind=dp) function, public dbm_maxabs(matrix)
Returns the absolute value of the larges element of the entire given matrix.
subroutine, public dbm_scale(matrix, alpha)
Multiplies all entries in the given matrix by the given factor alpha.
subroutine, public dbm_distribution_release(dist)
Decreases the reference counter of the given distribution.
subroutine, public dbm_create(matrix, name, dist, row_block_sizes, col_block_sizes)
Creates a new matrix.
integer function, dimension(:), pointer, contiguous, public dbm_get_row_block_sizes(matrix)
Returns the row block sizes of the given matrix.
character(len=default_string_length) function, public dbm_get_name(matrix)
Returns the name of the matrix of the given matrix.
real(kind=dp) function, public dbm_checksum(matrix)
Computes a checksum of the given matrix.
subroutine, public dbm_add(matrix_a, matrix_b)
Adds matrix_b to matrix_a.
subroutine, public dbm_finalize(matrix)
Needed to be called for DBCSR after blocks where inserted. For DBM it's a no-opt.
subroutine, public dbm_release(matrix)
Releases a matrix and all its ressources.
integer function, dimension(:), pointer, contiguous, public dbm_get_col_block_sizes(matrix)
Returns the column block sizes of the given matrix.
subroutine, public dbm_distribution_new(dist, mp_comm, row_dist_block, col_dist_block)
Creates a new two dimensional distribution.
integer function, dimension(4), public generate_larnv_seed(irow, nrow, icol, ncol, ival)
Generate a seed respecting the lapack constraints,.
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_convert_to_dbm(matrix_rect, matrix_dbm)
Convert a tall-and-skinny matrix into a normal DBM matrix. This is not recommended for matrices with ...
subroutine, public dbt_tas_get_stored_coordinates(matrix, row, column, processor)
As dbt_get_stored_coordinates.
pure integer(kind=int_8) function, public dbt_tas_nblkrows_total(matrix)
...
pure integer(kind=int_8) function, public dbt_tas_nblkcols_total(matrix)
...
subroutine, public dbt_tas_finalize(matrix)
...
subroutine, public dbt_tas_distribution_new(dist, mp_comm, row_dist, col_dist, split_info, nosplit)
create new distribution. Exactly like dbm_distribution_new but with custom types for row_dist and col...
type(dbt_tas_split_info) function, pointer, public dbt_tas_info(matrix)
get info on mpi grid splitting
subroutine, public dbt_tas_put_block(matrix, row, col, block, summation)
As dbm_put_block.
Global data (distribution and block sizes) for tall-and-skinny matrices For very sparse matrices with...
subroutine, public dbt_tas_default_distvec(nblk, nproc, blk_size, dist)
get a load-balanced and randomized distribution along one tensor dimension
Matrix multiplication for tall-and-skinny matrices. This uses the k-split (non-recursive) CARMA algor...
recursive subroutine, public dbt_tas_multiply(transa, transb, transc, alpha, matrix_a, matrix_b, beta, matrix_c, optimize_dist, split_opt, filter_eps, flop, move_data_a, move_data_b, retain_sparsity, simple_split, unit_nr, log_verbose)
tall-and-skinny matrix-matrix multiplication. Undocumented dummy arguments are identical to arguments...
methods to split tall-and-skinny matrices along longest dimension. Basically, we are splitting proces...
subroutine, public dbt_tas_get_split_info(info, mp_comm, nsplit, igroup, mp_comm_group, split_rowcol, pgrid_offset)
Get info on split.
testing infrastructure for tall-and-skinny matrices
subroutine, public dbt_tas_test_mm(transa, transb, transc, matrix_a, matrix_b, matrix_c, filter_eps, unit_nr, log_verbose)
Test tall-and-skinny matrix multiplication for accuracy.
subroutine, public dbt_tas_reset_randmat_seed()
Reset the seed used for generating random matrices to default value.
subroutine, public dbt_tas_benchmark_mm(transa, transb, transc, matrix_a, matrix_b, matrix_c, compare_dbm, filter_eps, io_unit)
Benchmark routine. Due to random sparsity (as opposed to structured sparsity pattern),...
subroutine, public dbt_tas_setup_test_matrix(matrix, mp_comm_out, mp_comm, nrows, ncols, rbsizes, cbsizes, dist_splitsize, name, sparsity, reuse_comm)
Setup tall-and-skinny matrix for testing.
real(kind=dp) function, public dbt_tas_checksum(matrix)
Calculate checksum of tall-and-skinny matrix consistent with dbm_checksum.
subroutine, public dbt_tas_random_bsizes(sizes, repeat, dbt_sizes)
Create random block sizes.
DBT tall-and-skinny base types. Mostly wrappers around existing DBM routines.
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
Interface to the message passing library MPI.
type for arbitrary block sizes
type for cyclic (round robin) distribution: