70#include "../base/base_uses.f90" 
   74   CHARACTER(len=*), 
PARAMETER, 
PRIVATE :: moduleN = 
'dbt_methods' 
  113   SUBROUTINE dbt_copy(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr)
 
  114      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_in, tensor_out
 
  115      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)), &
 
  116         INTENT(IN), 
OPTIONAL                        :: order
 
  117      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: summation, move_data
 
  118      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_in)), &
 
  119         INTENT(IN), 
OPTIONAL                        :: bounds
 
  120      INTEGER, 
INTENT(IN), 
OPTIONAL                  :: unit_nr
 
  123      CALL tensor_in%pgrid%mp_comm_2d%sync()
 
  124      CALL timeset(
"dbt_total", handle)
 
  130      CALL dbt_copy_expert(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr)
 
  131      CALL tensor_in%pgrid%mp_comm_2d%sync()
 
  132      CALL timestop(handle)
 
 
  139   SUBROUTINE dbt_copy_expert(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr)
 
  140      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_in, tensor_out
 
  141      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)), &
 
  142         INTENT(IN), 
OPTIONAL                        :: order
 
  143      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: summation, move_data
 
  144      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_in)), &
 
  145         INTENT(IN), 
OPTIONAL                        :: bounds
 
  146      INTEGER, 
INTENT(IN), 
OPTIONAL                  :: unit_nr
 
  148      TYPE(
dbt_type), 
POINTER                    :: in_tmp_1, in_tmp_2, &
 
  150      INTEGER                                        :: handle, unit_nr_prv
 
  151      INTEGER, 
DIMENSION(:), 
ALLOCATABLE             :: map1_in_1, map1_in_2, map2_in_1, map2_in_2
 
  153      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_copy' 
  154      LOGICAL                                        :: dist_compatible_tas, dist_compatible_tensor, &
 
  155                                                        summation_prv, new_in_1, new_in_2, &
 
  156                                                        new_in_3, new_out_1, block_compatible, &
 
  160      CALL timeset(routinen, handle)
 
  162      cpassert(tensor_out%valid)
 
  166      IF (
PRESENT(move_data)) 
THEN 
  172      dist_compatible_tas = .false.
 
  173      dist_compatible_tensor = .false.
 
  174      block_compatible = .false.
 
  180      IF (
PRESENT(summation)) 
THEN 
  181         summation_prv = summation
 
  183         summation_prv = .false.
 
  186      IF (
PRESENT(bounds)) 
THEN 
  188         CALL dbt_crop(tensor_in, in_tmp_1, bounds=bounds, move_data=move_prv)
 
  192         in_tmp_1 => tensor_in
 
  195      IF (
PRESENT(order)) 
THEN 
  196         CALL reorder_arrays(in_tmp_1%blk_sizes, blk_sizes_in, order=order)
 
  197         block_compatible = 
check_equal(blk_sizes_in, tensor_out%blk_sizes)
 
  199         block_compatible = 
check_equal(in_tmp_1%blk_sizes, tensor_out%blk_sizes)
 
  202      IF (.NOT. block_compatible) 
THEN 
  203         ALLOCATE (in_tmp_2, out_tmp_1)
 
  205                                         nodata2=.NOT. summation_prv, move_data=move_prv)
 
  206         new_in_2 = .true.; new_out_1 = .true.
 
  210         out_tmp_1 => tensor_out
 
  213      IF (
PRESENT(order)) 
THEN 
  215         CALL dbt_permute_index(in_tmp_2, in_tmp_3, order)
 
  229      IF (.NOT. 
PRESENT(order)) 
THEN 
  230         IF (array_eq_i(map1_in_1, map2_in_1) .AND. array_eq_i(map1_in_2, map2_in_2)) 
THEN 
  231            dist_compatible_tas = 
check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist)
 
  232         ELSEIF (array_eq_i([map1_in_1, map1_in_2], [map2_in_1, map2_in_2])) 
THEN 
  233            dist_compatible_tensor = 
check_equal(in_tmp_3%nd_dist, out_tmp_1%nd_dist)
 
  237      IF (dist_compatible_tas) 
THEN 
  238         CALL dbt_tas_copy(out_tmp_1%matrix_rep, in_tmp_3%matrix_rep, summation)
 
  240      ELSEIF (dist_compatible_tensor) 
THEN 
  241         CALL dbt_copy_nocomm(in_tmp_3, out_tmp_1, summation)
 
  244         CALL dbt_reshape(in_tmp_3, out_tmp_1, summation, move_data=move_prv)
 
  249         DEALLOCATE (in_tmp_1)
 
  254         DEALLOCATE (in_tmp_2)
 
  259         DEALLOCATE (in_tmp_3)
 
  263         IF (unit_nr_prv /= 0) 
THEN 
  268         DEALLOCATE (out_tmp_1)
 
  271      CALL timestop(handle)
 
  280   SUBROUTINE dbt_copy_nocomm(tensor_in, tensor_out, summation)
 
  281      TYPE(
dbt_type), 
INTENT(INOUT) :: tensor_in
 
  282      TYPE(
dbt_type), 
INTENT(INOUT) :: tensor_out
 
  283      LOGICAL, 
INTENT(IN), 
OPTIONAL                      :: summation
 
  285      INTEGER, 
DIMENSION(ndims_tensor(tensor_in))  :: ind_nd
 
  289      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_copy_nocomm' 
  292      CALL timeset(routinen, handle)
 
  293      cpassert(tensor_out%valid)
 
  295      IF (
PRESENT(summation)) 
THEN 
  296         IF (.NOT. summation) 
CALL dbt_clear(tensor_out)
 
  310         CALL dbt_put_block(tensor_out, ind_nd, blk_data, summation=summation)
 
  316      CALL timestop(handle)
 
  325      TYPE(
dbcsr_type), 
TARGET, 
INTENT(IN)               :: matrix_in
 
  326      TYPE(
dbt_type), 
INTENT(INOUT)             :: tensor_out
 
  327      LOGICAL, 
INTENT(IN), 
OPTIONAL                      :: summation
 
  330      INTEGER, 
DIMENSION(2)                              :: ind_2d
 
  331      REAL(kind=
dp), 
ALLOCATABLE, 
DIMENSION(:, :)    :: block_arr
 
  332      REAL(kind=
dp), 
DIMENSION(:, :), 
POINTER        :: block
 
  336      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_copy_matrix_to_tensor' 
  338      CALL timeset(routinen, handle)
 
  339      cpassert(tensor_out%valid)
 
  344         ALLOCATE (matrix_in_desym)
 
  347         matrix_in_desym => matrix_in
 
  350      IF (
PRESENT(summation)) 
THEN 
  351         IF (.NOT. summation) 
CALL dbt_clear(tensor_out)
 
  364         CALL dbt_put_block(tensor_out, ind_2d, shape(block_arr), block_arr, summation=summation)
 
  365         DEALLOCATE (block_arr)
 
  372         DEALLOCATE (matrix_in_desym)
 
  375      CALL timestop(handle)
 
 
  385      TYPE(
dbt_type), 
INTENT(INOUT)      :: tensor_in
 
  387      LOGICAL, 
INTENT(IN), 
OPTIONAL          :: summation
 
  390      INTEGER, 
DIMENSION(2)                  :: ind_2d
 
  391      REAL(kind=
dp), 
DIMENSION(:, :), 
ALLOCATABLE :: block
 
  392      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_copy_tensor_to_matrix' 
  395      CALL timeset(routinen, handle)
 
  397      IF (
PRESENT(summation)) 
THEN 
  416            CALL dbcsr_put_block(matrix_out, ind_2d(2), ind_2d(1), transpose(block), summation=summation)
 
  418            CALL dbcsr_put_block(matrix_out, ind_2d(1), ind_2d(2), block, summation=summation)
 
  425      CALL timestop(handle)
 
 
  492                           contract_1, notcontract_1, &
 
  493                           contract_2, notcontract_2, &
 
  495                           bounds_1, bounds_2, bounds_3, &
 
  496                           optimize_dist, pgrid_opt_1, pgrid_opt_2, pgrid_opt_3, &
 
  497                           filter_eps, flop, move_data, retain_sparsity, unit_nr, log_verbose)
 
  498      REAL(
dp), 
INTENT(IN)            :: alpha
 
  499      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_1
 
  500      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_2
 
  501      REAL(
dp), 
INTENT(IN)            :: beta
 
  502      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: contract_1
 
  503      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: contract_2
 
  504      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: map_1
 
  505      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: map_2
 
  506      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: notcontract_1
 
  507      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: notcontract_2
 
  508      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_3
 
  509      INTEGER, 
DIMENSION(2, SIZE(contract_1)), &
 
  510         INTENT(IN), 
OPTIONAL                        :: bounds_1
 
  511      INTEGER, 
DIMENSION(2, SIZE(notcontract_1)), &
 
  512         INTENT(IN), 
OPTIONAL                        :: bounds_2
 
  513      INTEGER, 
DIMENSION(2, SIZE(notcontract_2)), &
 
  514         INTENT(IN), 
OPTIONAL                        :: bounds_3
 
  515      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: optimize_dist
 
  517         POINTER, 
OPTIONAL                           :: pgrid_opt_1
 
  519         POINTER, 
OPTIONAL                           :: pgrid_opt_2
 
  521         POINTER, 
OPTIONAL                           :: pgrid_opt_3
 
  522      REAL(kind=
dp), 
INTENT(IN), 
OPTIONAL        :: filter_eps
 
  523      INTEGER(KIND=int_8), 
INTENT(OUT), 
OPTIONAL     :: flop
 
  524      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: move_data
 
  525      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: retain_sparsity
 
  526      INTEGER, 
OPTIONAL, 
INTENT(IN)                  :: unit_nr
 
  527      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: log_verbose
 
  531      CALL tensor_1%pgrid%mp_comm_2d%sync()
 
  532      CALL timeset(
"dbt_total", handle)
 
  533      CALL dbt_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, &
 
  534                               contract_1, notcontract_1, &
 
  535                               contract_2, notcontract_2, &
 
  540                               optimize_dist=optimize_dist, &
 
  541                               pgrid_opt_1=pgrid_opt_1, &
 
  542                               pgrid_opt_2=pgrid_opt_2, &
 
  543                               pgrid_opt_3=pgrid_opt_3, &
 
  544                               filter_eps=filter_eps, &
 
  546                               move_data=move_data, &
 
  547                               retain_sparsity=retain_sparsity, &
 
  549                               log_verbose=log_verbose)
 
  550      CALL tensor_1%pgrid%mp_comm_2d%sync()
 
  551      CALL timestop(handle)
 
 
  560   SUBROUTINE dbt_contract_expert(alpha, tensor_1, tensor_2, beta, tensor_3, &
 
  561                                  contract_1, notcontract_1, &
 
  562                                  contract_2, notcontract_2, &
 
  564                                  bounds_1, bounds_2, bounds_3, &
 
  565                                  optimize_dist, pgrid_opt_1, pgrid_opt_2, pgrid_opt_3, &
 
  566                                  filter_eps, flop, move_data, retain_sparsity, &
 
  567                                  nblks_local, unit_nr, log_verbose)
 
  568      REAL(
dp), 
INTENT(IN)            :: alpha
 
  569      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_1
 
  570      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_2
 
  571      REAL(
dp), 
INTENT(IN)            :: beta
 
  572      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: contract_1
 
  573      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: contract_2
 
  574      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: map_1
 
  575      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: map_2
 
  576      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: notcontract_1
 
  577      INTEGER, 
DIMENSION(:), 
INTENT(IN)              :: notcontract_2
 
  578      TYPE(
dbt_type), 
INTENT(INOUT), 
TARGET      :: tensor_3
 
  579      INTEGER, 
DIMENSION(2, SIZE(contract_1)), &
 
  580         INTENT(IN), 
OPTIONAL                        :: bounds_1
 
  581      INTEGER, 
DIMENSION(2, SIZE(notcontract_1)), &
 
  582         INTENT(IN), 
OPTIONAL                        :: bounds_2
 
  583      INTEGER, 
DIMENSION(2, SIZE(notcontract_2)), &
 
  584         INTENT(IN), 
OPTIONAL                        :: bounds_3
 
  585      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: optimize_dist
 
  587         POINTER, 
OPTIONAL                           :: pgrid_opt_1
 
  589         POINTER, 
OPTIONAL                           :: pgrid_opt_2
 
  591         POINTER, 
OPTIONAL                           :: pgrid_opt_3
 
  592      REAL(kind=
dp), 
INTENT(IN), 
OPTIONAL        :: filter_eps
 
  593      INTEGER(KIND=int_8), 
INTENT(OUT), 
OPTIONAL     :: flop
 
  594      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: move_data
 
  595      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: retain_sparsity
 
  596      INTEGER, 
INTENT(OUT), 
OPTIONAL                 :: nblks_local
 
  597      INTEGER, 
OPTIONAL, 
INTENT(IN)                  :: unit_nr
 
  598      LOGICAL, 
INTENT(IN), 
OPTIONAL                  :: log_verbose
 
  600      TYPE(
dbt_type), 
POINTER                    :: tensor_contr_1, tensor_contr_2, tensor_contr_3
 
  601      TYPE(
dbt_type), 
TARGET                     :: tensor_algn_1, tensor_algn_2, tensor_algn_3
 
  602      TYPE(
dbt_type), 
POINTER                    :: tensor_crop_1, tensor_crop_2
 
  603      TYPE(
dbt_type), 
POINTER                    :: tensor_small, tensor_large
 
  605      LOGICAL                                        :: assert_stmt, tensors_remapped
 
  606      INTEGER                                        :: max_mm_dim, max_tensor, &
 
  607                                                        unit_nr_prv, ref_tensor, handle
 
  609      INTEGER, 
DIMENSION(SIZE(contract_1))           :: contract_1_mod
 
  610      INTEGER, 
DIMENSION(SIZE(notcontract_1))        :: notcontract_1_mod
 
  611      INTEGER, 
DIMENSION(SIZE(contract_2))           :: contract_2_mod
 
  612      INTEGER, 
DIMENSION(SIZE(notcontract_2))        :: notcontract_2_mod
 
  613      INTEGER, 
DIMENSION(SIZE(map_1))                :: map_1_mod
 
  614      INTEGER, 
DIMENSION(SIZE(map_2))                :: map_2_mod
 
  615      LOGICAL                                        :: trans_1, trans_2, trans_3
 
  616      LOGICAL                                        :: new_1, new_2, new_3, move_data_1, move_data_2
 
  617      INTEGER                                        :: ndims1, ndims2, ndims3
 
  618      INTEGER                                        :: occ_1, occ_2
 
  619      INTEGER, 
DIMENSION(:), 
ALLOCATABLE             :: dims1, dims2, dims3
 
  621      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_contract' 
  622      CHARACTER(LEN=1), 
DIMENSION(:), 
ALLOCATABLE    :: indchar1, indchar2, indchar3, indchar1_mod, &
 
  623                                                        indchar2_mod, indchar3_mod
 
  624      CHARACTER(LEN=1), 
DIMENSION(15), 
SAVE :: alph = &
 
  625                                               [
'a', 
'b', 
'c', 
'd', 
'e', 
'f', 
'g', 
'h', 
'i', 
'j', 
'k', 
'l', 
'm', 
'n', 
'o']
 
  626      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_1)) :: bounds_t1
 
  627      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_2)) :: bounds_t2
 
  628      LOGICAL                                        :: do_crop_1, do_crop_2, do_write_3, nodata_3, do_batched, pgrid_changed, &
 
  629                                                        pgrid_changed_any, do_change_pgrid(2)
 
  631      INTEGER, 
DIMENSION(2) :: pdims_2d_opt, pdims_sub, pdims_sub_opt
 
  632      REAL(
dp) :: pdim_ratio, pdim_ratio_opt
 
  634      NULLIFY (tensor_contr_1, tensor_contr_2, tensor_contr_3, tensor_crop_1, tensor_crop_2, &
 
  637      CALL timeset(routinen, handle)
 
  639      cpassert(tensor_1%valid)
 
  640      cpassert(tensor_2%valid)
 
  641      cpassert(tensor_3%valid)
 
  643      assert_stmt = 
SIZE(contract_1) .EQ. 
SIZE(contract_2)
 
  644      cpassert(assert_stmt)
 
  646      assert_stmt = 
SIZE(map_1) .EQ. 
SIZE(notcontract_1)
 
  647      cpassert(assert_stmt)
 
  649      assert_stmt = 
SIZE(map_2) .EQ. 
SIZE(notcontract_2)
 
  650      cpassert(assert_stmt)
 
  652      assert_stmt = 
SIZE(notcontract_1) + 
SIZE(contract_1) .EQ. 
ndims_tensor(tensor_1)
 
  653      cpassert(assert_stmt)
 
  655      assert_stmt = 
SIZE(notcontract_2) + 
SIZE(contract_2) .EQ. 
ndims_tensor(tensor_2)
 
  656      cpassert(assert_stmt)
 
  658      assert_stmt = 
SIZE(map_1) + 
SIZE(map_2) .EQ. 
ndims_tensor(tensor_3)
 
  659      cpassert(assert_stmt)
 
  663      IF (
PRESENT(flop)) flop = 0
 
  664      IF (
PRESENT(nblks_local)) nblks_local = 0
 
  666      IF (
PRESENT(move_data)) 
THEN 
  667         move_data_1 = move_data
 
  668         move_data_2 = move_data
 
  670         move_data_1 = .false.
 
  671         move_data_2 = .false.
 
  675      IF (
PRESENT(retain_sparsity)) 
THEN 
  676         IF (retain_sparsity) nodata_3 = .false.
 
  679      CALL dbt_map_bounds_to_tensors(tensor_1, tensor_2, &
 
  680                                     contract_1, notcontract_1, &
 
  681                                     contract_2, notcontract_2, &
 
  682                                     bounds_t1, bounds_t2, &
 
  683                                     bounds_1=bounds_1, bounds_2=bounds_2, bounds_3=bounds_3, &
 
  684                                     do_crop_1=do_crop_1, do_crop_2=do_crop_2)
 
  687         ALLOCATE (tensor_crop_1)
 
  688         CALL dbt_crop(tensor_1, tensor_crop_1, bounds_t1, move_data=move_data_1)
 
  691         tensor_crop_1 => tensor_1
 
  695         ALLOCATE (tensor_crop_2)
 
  696         CALL dbt_crop(tensor_2, tensor_crop_2, bounds_t2, move_data=move_data_2)
 
  699         tensor_crop_2 => tensor_2
 
  705      associate(mp_comm => tensor_crop_1%pgrid%mp_comm_2d)
 
  707         CALL mp_comm%max(occ_1)
 
  709         CALL mp_comm%max(occ_2)
 
  712      IF (occ_1 == 0 .OR. occ_2 == 0) 
THEN 
  716            DEALLOCATE (tensor_crop_1)
 
  720            DEALLOCATE (tensor_crop_2)
 
  723         CALL timestop(handle)
 
  727      IF (unit_nr_prv /= 0) 
THEN 
  728         IF (unit_nr_prv > 0) 
THEN 
  729            WRITE (unit_nr_prv, 
'(A)') repeat(
"-", 80)
 
  730            WRITE (unit_nr_prv, 
'(A,1X,A,1X,A,1X,A,1X,A,1X,A)') 
"DBT TENSOR CONTRACTION:", &
 
  731               trim(tensor_crop_1%name), 
'x', trim(tensor_crop_2%name), 
'=', trim(tensor_3%name)
 
  732            WRITE (unit_nr_prv, 
'(A)') repeat(
"-", 80)
 
  744      ALLOCATE (indchar1(ndims1), indchar1_mod(ndims1))
 
  745      ALLOCATE (indchar2(ndims2), indchar2_mod(ndims2))
 
  746      ALLOCATE (indchar3(ndims3), indchar3_mod(ndims3))
 
  750      indchar1([notcontract_1, contract_1]) = alph(1:ndims1) 
 
  751      indchar2(notcontract_2) = alph(ndims1 + 1:ndims1 + 
SIZE(notcontract_2)) 
 
  752      indchar2(contract_2) = indchar1(contract_1)
 
  753      indchar3(map_1) = indchar1(notcontract_1)
 
  754      indchar3(map_2) = indchar2(notcontract_2)
 
  756      IF (unit_nr_prv /= 0) 
CALL dbt_print_contraction_index(tensor_crop_1, indchar1, &
 
  757                                                             tensor_crop_2, indchar2, &
 
  758                                                             tensor_3, indchar3, unit_nr_prv)
 
  759      IF (unit_nr_prv > 0) 
THEN 
  760         WRITE (unit_nr_prv, 
'(T2,A)') 
"aligning tensor index with data" 
  763      CALL align_tensor(tensor_crop_1, contract_1, notcontract_1, &
 
  764                        tensor_algn_1, contract_1_mod, notcontract_1_mod, indchar1, indchar1_mod)
 
  766      CALL align_tensor(tensor_crop_2, contract_2, notcontract_2, &
 
  767                        tensor_algn_2, contract_2_mod, notcontract_2_mod, indchar2, indchar2_mod)
 
  769      CALL align_tensor(tensor_3, map_1, map_2, &
 
  770                        tensor_algn_3, map_1_mod, map_2_mod, indchar3, indchar3_mod)
 
  772      IF (unit_nr_prv /= 0) 
CALL dbt_print_contraction_index(tensor_algn_1, indchar1_mod, &
 
  773                                                             tensor_algn_2, indchar2_mod, &
 
  774                                                             tensor_algn_3, indchar3_mod, unit_nr_prv)
 
  776      ALLOCATE (dims1(ndims1))
 
  777      ALLOCATE (dims2(ndims2))
 
  778      ALLOCATE (dims3(ndims3))
 
  786      max_mm_dim = maxloc([product(int(dims1(notcontract_1), 
int_8)), &
 
  787                           product(int(dims1(contract_1), 
int_8)), &
 
  788                           product(int(dims2(notcontract_2), 
int_8))], dim=1)
 
  789      max_tensor = maxloc([product(int(dims1, 
int_8)), product(int(dims2, 
int_8)), product(int(dims3, 
int_8))], dim=1)
 
  790      SELECT CASE (max_mm_dim)
 
  792         IF (unit_nr_prv > 0) 
THEN 
  793            WRITE (unit_nr_prv, 
'(T2,A)') 
"large tensors: 1, 3; small tensor: 2" 
  794            WRITE (unit_nr_prv, 
'(T2,A)') 
"sorting contraction indices" 
  796         CALL index_linked_sort(contract_1_mod, contract_2_mod)
 
  797         CALL index_linked_sort(map_2_mod, notcontract_2_mod)
 
  798         SELECT CASE (max_tensor)
 
  800            CALL index_linked_sort(notcontract_1_mod, map_1_mod)
 
  802            CALL index_linked_sort(map_1_mod, notcontract_1_mod)
 
  804            cpabort(
"should not happen")
 
  807         CALL reshape_mm_compatible(tensor_algn_1, tensor_algn_3, tensor_contr_1, tensor_contr_3, &
 
  808                                    contract_1_mod, notcontract_1_mod, map_2_mod, map_1_mod, &
 
  809                                    trans_1, trans_3, new_1, new_3, ref_tensor, nodata2=nodata_3, optimize_dist=optimize_dist, &
 
  810                                    move_data_1=move_data_1, unit_nr=unit_nr_prv)
 
  812         CALL reshape_mm_small(tensor_algn_2, contract_2_mod, notcontract_2_mod, tensor_contr_2, trans_2, &
 
  813                               new_2, move_data=move_data_2, unit_nr=unit_nr_prv)
 
  815         SELECT CASE (ref_tensor)
 
  817            tensor_large => tensor_contr_1
 
  819            tensor_large => tensor_contr_3
 
  821         tensor_small => tensor_contr_2
 
  824         IF (unit_nr_prv > 0) 
THEN 
  825            WRITE (unit_nr_prv, 
'(T2,A)') 
"large tensors: 1, 2; small tensor: 3" 
  826            WRITE (unit_nr_prv, 
'(T2,A)') 
"sorting contraction indices" 
  829         CALL index_linked_sort(notcontract_1_mod, map_1_mod)
 
  830         CALL index_linked_sort(notcontract_2_mod, map_2_mod)
 
  831         SELECT CASE (max_tensor)
 
  833            CALL index_linked_sort(contract_1_mod, contract_2_mod)
 
  835            CALL index_linked_sort(contract_2_mod, contract_1_mod)
 
  837            cpabort(
"should not happen")
 
  840         CALL reshape_mm_compatible(tensor_algn_1, tensor_algn_2, tensor_contr_1, tensor_contr_2, &
 
  841                                    notcontract_1_mod, contract_1_mod, notcontract_2_mod, contract_2_mod, &
 
  842                                    trans_1, trans_2, new_1, new_2, ref_tensor, optimize_dist=optimize_dist, &
 
  843                                    move_data_1=move_data_1, move_data_2=move_data_2, unit_nr=unit_nr_prv)
 
  844         trans_1 = .NOT. trans_1
 
  846         CALL reshape_mm_small(tensor_algn_3, map_1_mod, map_2_mod, tensor_contr_3, trans_3, &
 
  847                               new_3, nodata=nodata_3, unit_nr=unit_nr_prv)
 
  849         SELECT CASE (ref_tensor)
 
  851            tensor_large => tensor_contr_1
 
  853            tensor_large => tensor_contr_2
 
  855         tensor_small => tensor_contr_3
 
  858         IF (unit_nr_prv > 0) 
THEN 
  859            WRITE (unit_nr_prv, 
'(T2,A)') 
"large tensors: 2, 3; small tensor: 1" 
  860            WRITE (unit_nr_prv, 
'(T2,A)') 
"sorting contraction indices" 
  862         CALL index_linked_sort(map_1_mod, notcontract_1_mod)
 
  863         CALL index_linked_sort(contract_2_mod, contract_1_mod)
 
  864         SELECT CASE (max_tensor)
 
  866            CALL index_linked_sort(notcontract_2_mod, map_2_mod)
 
  868            CALL index_linked_sort(map_2_mod, notcontract_2_mod)
 
  870            cpabort(
"should not happen")
 
  873         CALL reshape_mm_compatible(tensor_algn_2, tensor_algn_3, tensor_contr_2, tensor_contr_3, &
 
  874                                    contract_2_mod, notcontract_2_mod, map_1_mod, map_2_mod, &
 
  875                                    trans_2, trans_3, new_2, new_3, ref_tensor, nodata2=nodata_3, optimize_dist=optimize_dist, &
 
  876                                    move_data_1=move_data_2, unit_nr=unit_nr_prv)
 
  878         trans_2 = .NOT. trans_2
 
  879         trans_3 = .NOT. trans_3
 
  881         CALL reshape_mm_small(tensor_algn_1, notcontract_1_mod, contract_1_mod, tensor_contr_1, &
 
  882                               trans_1, new_1, move_data=move_data_1, unit_nr=unit_nr_prv)
 
  884         SELECT CASE (ref_tensor)
 
  886            tensor_large => tensor_contr_2
 
  888            tensor_large => tensor_contr_3
 
  890         tensor_small => tensor_contr_1
 
  894      IF (unit_nr_prv /= 0) 
CALL dbt_print_contraction_index(tensor_contr_1, indchar1_mod, &
 
  895                                                             tensor_contr_2, indchar2_mod, &
 
  896                                                             tensor_contr_3, indchar3_mod, unit_nr_prv)
 
  897      IF (unit_nr_prv /= 0) 
THEN 
  905                            tensor_contr_1%matrix_rep, tensor_contr_2%matrix_rep, &
 
  907                            tensor_contr_3%matrix_rep, filter_eps=filter_eps, flop=flop, &
 
  908                            unit_nr=unit_nr_prv, log_verbose=log_verbose, &
 
  909                            split_opt=split_opt, &
 
  910                            move_data_a=move_data_1, move_data_b=move_data_2, retain_sparsity=retain_sparsity)
 
  912      IF (
PRESENT(pgrid_opt_1)) 
THEN 
  913         IF (.NOT. new_1) 
THEN 
  914            ALLOCATE (pgrid_opt_1)
 
  915            pgrid_opt_1 = opt_pgrid(tensor_1, split_opt)
 
  919      IF (
PRESENT(pgrid_opt_2)) 
THEN 
  920         IF (.NOT. new_2) 
THEN 
  921            ALLOCATE (pgrid_opt_2)
 
  922            pgrid_opt_2 = opt_pgrid(tensor_2, split_opt)
 
  926      IF (
PRESENT(pgrid_opt_3)) 
THEN 
  927         IF (.NOT. new_3) 
THEN 
  928            ALLOCATE (pgrid_opt_3)
 
  929            pgrid_opt_3 = opt_pgrid(tensor_3, split_opt)
 
  933      do_batched = tensor_small%matrix_rep%do_batched > 0
 
  935      tensors_remapped = .false.
 
  936      IF (new_1 .OR. new_2 .OR. new_3) tensors_remapped = .true.
 
  938      IF (tensors_remapped .AND. do_batched) 
THEN 
  939         CALL cp_warn(__location__, &
 
  940                      "Internal process grid optimization disabled because tensors are not in contraction-compatible format")
 
  944      do_change_pgrid(:) = .false.
 
  945      IF ((.NOT. tensors_remapped) .AND. do_batched) 
THEN 
  946         associate(storage => tensor_small%contraction_storage)
 
  947            cpassert(storage%static)
 
  949            do_change_pgrid(:) = &
 
  950               update_contraction_storage(storage, split_opt, split)
 
  952            IF (any(do_change_pgrid)) 
THEN 
  953               mp_comm_opt = 
dbt_tas_mp_comm(tensor_small%pgrid%mp_comm_2d, split_opt%split_rowcol, nint(storage%nsplit_avg))
 
  955                                         nint(storage%nsplit_avg), own_comm=.true.)
 
  956               pdims_2d_opt = split_opt_avg%mp_comm%num_pe_cart
 
  961         IF (do_change_pgrid(1) .AND. .NOT. do_change_pgrid(2)) 
THEN 
  963            pdims_sub_opt = split_opt_avg%mp_comm_group%num_pe_cart
 
  964            pdims_sub = split%mp_comm_group%num_pe_cart
 
  966            pdim_ratio = maxval(real(pdims_sub, dp))/minval(pdims_sub)
 
  967            pdim_ratio_opt = maxval(real(pdims_sub_opt, dp))/minval(pdims_sub_opt)
 
  968            IF (pdim_ratio/pdim_ratio_opt <= default_pdims_accept_ratio**2) 
THEN 
  969               do_change_pgrid(1) = .false.
 
  970               CALL dbt_tas_release_info(split_opt_avg)
 
  975      IF (unit_nr_prv /= 0) 
THEN 
  977         IF (tensor_contr_3%matrix_rep%do_batched > 0) 
THEN 
  978            IF (tensor_contr_3%matrix_rep%mm_storage%batched_out) do_write_3 = .false.
 
  981            CALL dbt_write_tensor_info(tensor_contr_3, unit_nr_prv, full_info=log_verbose)
 
  982            CALL dbt_write_tensor_dist(tensor_contr_3, unit_nr_prv)
 
  988         CALL dbt_scale(tensor_algn_3, beta)
 
  989         CALL dbt_copy_expert(tensor_contr_3, tensor_algn_3, summation=.true., move_data=.true.)
 
  990         IF (
PRESENT(filter_eps)) 
CALL dbt_filter(tensor_algn_3, filter_eps)
 
  996      CALL dbt_copy_contraction_storage(tensor_contr_1, tensor_1)
 
  997      CALL dbt_copy_contraction_storage(tensor_contr_2, tensor_2)
 
  998      CALL dbt_copy_contraction_storage(tensor_contr_3, tensor_3)
 
 1000      IF (unit_nr_prv /= 0) 
THEN 
 1001         IF (new_3 .AND. do_write_3) 
CALL dbt_write_tensor_info(tensor_3, unit_nr_prv, full_info=log_verbose)
 
 1002         IF (new_3 .AND. do_write_3) 
CALL dbt_write_tensor_dist(tensor_3, unit_nr_prv)
 
 1005      CALL dbt_destroy(tensor_algn_1)
 
 1006      CALL dbt_destroy(tensor_algn_2)
 
 1007      CALL dbt_destroy(tensor_algn_3)
 
 1010         CALL dbt_destroy(tensor_crop_1)
 
 1011         DEALLOCATE (tensor_crop_1)
 
 1015         CALL dbt_destroy(tensor_crop_2)
 
 1016         DEALLOCATE (tensor_crop_2)
 
 1020         CALL dbt_destroy(tensor_contr_1)
 
 1021         DEALLOCATE (tensor_contr_1)
 
 1024         CALL dbt_destroy(tensor_contr_2)
 
 1025         DEALLOCATE (tensor_contr_2)
 
 1028         CALL dbt_destroy(tensor_contr_3)
 
 1029         DEALLOCATE (tensor_contr_3)
 
 1032      IF (
PRESENT(move_data)) 
THEN 
 1034            CALL dbt_clear(tensor_1)
 
 1035            CALL dbt_clear(tensor_2)
 
 1039      IF (unit_nr_prv > 0) 
THEN 
 1040         WRITE (unit_nr_prv, 
'(A)') repeat(
"-", 80)
 
 1041         WRITE (unit_nr_prv, 
'(A)') 
"TENSOR CONTRACTION DONE" 
 1042         WRITE (unit_nr_prv, 
'(A)') repeat(
"-", 80)
 
 1045      IF (any(do_change_pgrid)) 
THEN 
 1046         pgrid_changed_any = .false.
 
 1047         SELECT CASE (max_mm_dim)
 
 1049            IF (
ALLOCATED(tensor_1%contraction_storage) .AND. 
ALLOCATED(tensor_3%contraction_storage)) 
THEN 
 1050               CALL dbt_change_pgrid_2d(tensor_1, tensor_1%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1051                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1052                                        pgrid_changed=pgrid_changed, &
 
 1053                                        unit_nr=unit_nr_prv)
 
 1054               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1055               CALL dbt_change_pgrid_2d(tensor_3, tensor_3%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1056                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1057                                        pgrid_changed=pgrid_changed, &
 
 1058                                        unit_nr=unit_nr_prv)
 
 1059               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1061            IF (pgrid_changed_any) 
THEN 
 1062               IF (tensor_2%matrix_rep%do_batched == 3) 
THEN 
 1065                  CALL dbt_tas_batched_mm_complete(tensor_2%matrix_rep)
 
 1069            IF (
ALLOCATED(tensor_1%contraction_storage) .AND. 
ALLOCATED(tensor_2%contraction_storage)) 
THEN 
 1070               CALL dbt_change_pgrid_2d(tensor_1, tensor_1%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1071                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1072                                        pgrid_changed=pgrid_changed, &
 
 1073                                        unit_nr=unit_nr_prv)
 
 1074               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1075               CALL dbt_change_pgrid_2d(tensor_2, tensor_2%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1076                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1077                                        pgrid_changed=pgrid_changed, &
 
 1078                                        unit_nr=unit_nr_prv)
 
 1079               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1081            IF (pgrid_changed_any) 
THEN 
 1082               IF (tensor_3%matrix_rep%do_batched == 3) 
THEN 
 1083                  CALL dbt_tas_batched_mm_complete(tensor_3%matrix_rep)
 
 1087            IF (
ALLOCATED(tensor_2%contraction_storage) .AND. 
ALLOCATED(tensor_3%contraction_storage)) 
THEN 
 1088               CALL dbt_change_pgrid_2d(tensor_2, tensor_2%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1089                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1090                                        pgrid_changed=pgrid_changed, &
 
 1091                                        unit_nr=unit_nr_prv)
 
 1092               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1093               CALL dbt_change_pgrid_2d(tensor_3, tensor_3%pgrid%mp_comm_2d, pdims=pdims_2d_opt, &
 
 1094                                        nsplit=split_opt_avg%ngroup, dimsplit=split_opt_avg%split_rowcol, &
 
 1095                                        pgrid_changed=pgrid_changed, &
 
 1096                                        unit_nr=unit_nr_prv)
 
 1097               IF (pgrid_changed) pgrid_changed_any = .true.
 
 1099            IF (pgrid_changed_any) 
THEN 
 1100               IF (tensor_1%matrix_rep%do_batched == 3) 
THEN 
 1101                  CALL dbt_tas_batched_mm_complete(tensor_1%matrix_rep)
 
 1105         CALL dbt_tas_release_info(split_opt_avg)
 
 1108      IF ((.NOT. tensors_remapped) .AND. do_batched) 
THEN 
 1110         CALL dbt_tas_set_batched_state(tensor_1%matrix_rep, opt_grid=.true.)
 
 1111         CALL dbt_tas_set_batched_state(tensor_2%matrix_rep, opt_grid=.true.)
 
 1112         CALL dbt_tas_set_batched_state(tensor_3%matrix_rep, opt_grid=.true.)
 
 1115      CALL dbt_tas_release_info(split_opt)
 
 1117      CALL timestop(handle)
 
 1125   SUBROUTINE align_tensor(tensor_in, contract_in, notcontract_in, &
 
 1126                           tensor_out, contract_out, notcontract_out, indp_in, indp_out)
 
 1127      TYPE(dbt_type), 
INTENT(INOUT)               :: tensor_in
 
 1128      INTEGER, 
DIMENSION(:), 
INTENT(IN)            :: contract_in, notcontract_in
 
 1129      TYPE(dbt_type), 
INTENT(OUT)              :: tensor_out
 
 1130      INTEGER, 
DIMENSION(SIZE(contract_in)), &
 
 1131         INTENT(OUT)                               :: contract_out
 
 1132      INTEGER, 
DIMENSION(SIZE(notcontract_in)), &
 
 1133         INTENT(OUT)                               :: notcontract_out
 
 1134      CHARACTER(LEN=1), 
DIMENSION(ndims_tensor(tensor_in)), 
INTENT(IN) :: indp_in
 
 1135      CHARACTER(LEN=1), 
DIMENSION(ndims_tensor(tensor_in)), 
INTENT(OUT) :: indp_out
 
 1136      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)) :: align
 
 1138      CALL dbt_align_index(tensor_in, tensor_out, order=align)
 
 1139      contract_out = align(contract_in)
 
 1140      notcontract_out = align(notcontract_in)
 
 1141      indp_out(align) = indp_in
 
 1164   SUBROUTINE reshape_mm_compatible(tensor1, tensor2, tensor1_out, tensor2_out, ind1_free, ind1_linked, &
 
 1165                                    ind2_free, ind2_linked, trans1, trans2, new1, new2, ref_tensor, &
 
 1166                                    nodata1, nodata2, move_data_1, &
 
 1167                                    move_data_2, optimize_dist, unit_nr)
 
 1168      TYPE(dbt_type), 
TARGET, 
INTENT(INOUT)   :: tensor1
 
 1169      TYPE(dbt_type), 
TARGET, 
INTENT(INOUT)   :: tensor2
 
 1170      TYPE(dbt_type), 
POINTER, 
INTENT(OUT)    :: tensor1_out, tensor2_out
 
 1171      INTEGER, 
DIMENSION(:), 
INTENT(IN)           :: ind1_free, ind2_free
 
 1172      INTEGER, 
DIMENSION(:), 
INTENT(IN)           :: ind1_linked, ind2_linked
 
 1173      LOGICAL, 
INTENT(OUT)                        :: trans1, trans2
 
 1174      LOGICAL, 
INTENT(OUT)                        :: new1, new2
 
 1175      INTEGER, 
INTENT(OUT) :: ref_tensor
 
 1176      LOGICAL, 
INTENT(IN), 
OPTIONAL               :: nodata1, nodata2
 
 1177      LOGICAL, 
INTENT(INOUT), 
OPTIONAL            :: move_data_1, move_data_2
 
 1178      LOGICAL, 
INTENT(IN), 
OPTIONAL               :: optimize_dist
 
 1179      INTEGER, 
INTENT(IN), 
OPTIONAL               :: unit_nr
 
 1180      INTEGER                                     :: compat1, compat1_old, compat2, compat2_old, &
 
 1182      TYPE(mp_cart_type)                          :: comm_2d
 
 1183      TYPE(array_list)                            :: dist_list
 
 1184      INTEGER, 
DIMENSION(:), 
ALLOCATABLE          :: mp_dims
 
 1185      TYPE(dbt_distribution_type)             :: dist_in
 
 1186      INTEGER(KIND=int_8)                         :: nblkrows, nblkcols
 
 1187      LOGICAL                                     :: optimize_dist_prv
 
 1188      INTEGER, 
DIMENSION(ndims_tensor(tensor1)) :: dims1
 
 1189      INTEGER, 
DIMENSION(ndims_tensor(tensor2)) :: dims2
 
 1191      NULLIFY (tensor1_out, tensor2_out)
 
 1193      unit_nr_prv = prep_output_unit(unit_nr)
 
 1195      CALL blk_dims_tensor(tensor1, dims1)
 
 1196      CALL blk_dims_tensor(tensor2, dims2)
 
 1198      IF (product(int(dims1, int_8)) .GE. product(int(dims2, int_8))) 
THEN 
 1204      IF (
PRESENT(optimize_dist)) 
THEN 
 1205         optimize_dist_prv = optimize_dist
 
 1207         optimize_dist_prv = .false.
 
 1210      compat1 = compat_map(tensor1%nd_index, ind1_linked)
 
 1211      compat2 = compat_map(tensor2%nd_index, ind2_linked)
 
 1212      compat1_old = compat1
 
 1213      compat2_old = compat2
 
 1215      IF (unit_nr_prv > 0) 
THEN 
 1216         WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor1%name), 
":" 
 1217         SELECT CASE (compat1)
 
 1219            WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1221            WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1223            WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1225         WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor2%name), 
":" 
 1226         SELECT CASE (compat2)
 
 1228            WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1230            WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1232            WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1239      IF (compat1 == 0 .OR. optimize_dist_prv) 
THEN 
 1243      IF (compat2 == 0 .OR. optimize_dist_prv) 
THEN 
 1247      IF (ref_tensor == 1) 
THEN  
 1248         IF (compat1 == 0 .OR. optimize_dist_prv) 
THEN  
 1249            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"Redistribution of", trim(tensor1%name)
 
 1250            nblkrows = product(int(dims1(ind1_linked), kind=int_8))
 
 1251            nblkcols = product(int(dims1(ind1_free), kind=int_8))
 
 1252            comm_2d = dbt_tas_mp_comm(tensor1%pgrid%mp_comm_2d, nblkrows, nblkcols)
 
 1253            ALLOCATE (tensor1_out)
 
 1254            CALL dbt_remap(tensor1, ind1_linked, ind1_free, tensor1_out, comm_2d=comm_2d, &
 
 1255                           nodata=nodata1, move_data=move_data_1)
 
 1259            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"No redistribution of", trim(tensor1%name)
 
 1260            tensor1_out => tensor1
 
 1262         IF (compat2 == 0 .OR. optimize_dist_prv) 
THEN  
 1263            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A,1X,A,1X,A)') 
"Redistribution of", &
 
 1264               trim(tensor2%name), 
"compatible with", trim(tensor1%name)
 
 1265            dist_in = dbt_distribution(tensor1_out)
 
 1266            dist_list = array_sublist(dist_in%nd_dist, ind1_linked)
 
 1267            IF (compat1 == 1) 
THEN  
 1270               ALLOCATE (mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid)))
 
 1271               CALL dbt_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims)
 
 1272               ALLOCATE (tensor2_out)
 
 1273               CALL dbt_remap(tensor2, ind2_linked, ind2_free, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, &
 
 1274                              dist1=dist_list, mp_dims_1=mp_dims, nodata=nodata2, move_data=move_data_2)
 
 1275            ELSEIF (compat1 == 2) 
THEN  
 1278               ALLOCATE (mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid)))
 
 1279               CALL dbt_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims)
 
 1280               ALLOCATE (tensor2_out)
 
 1281               CALL dbt_remap(tensor2, ind2_free, ind2_linked, tensor2_out, comm_2d=dist_in%pgrid%mp_comm_2d, &
 
 1282                              dist2=dist_list, mp_dims_2=mp_dims, nodata=nodata2, move_data=move_data_2)
 
 1284               cpabort(
"should not happen")
 
 1288            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"No redistribution of", trim(tensor2%name)
 
 1289            tensor2_out => tensor2
 
 1292         IF (compat2 == 0 .OR. optimize_dist_prv) 
THEN  
 1293            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"Redistribution of", trim(tensor2%name)
 
 1294            nblkrows = product(int(dims2(ind2_linked), kind=int_8))
 
 1295            nblkcols = product(int(dims2(ind2_free), kind=int_8))
 
 1296            comm_2d = dbt_tas_mp_comm(tensor2%pgrid%mp_comm_2d, nblkrows, nblkcols)
 
 1297            ALLOCATE (tensor2_out)
 
 1298            CALL dbt_remap(tensor2, ind2_linked, ind2_free, tensor2_out, nodata=nodata2, move_data=move_data_2)
 
 1302            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"No redistribution of", trim(tensor2%name)
 
 1303            tensor2_out => tensor2
 
 1305         IF (compat1 == 0 .OR. optimize_dist_prv) 
THEN  
 1306            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A,1X,A,1X,A)') 
"Redistribution of", trim(tensor1%name), &
 
 1307               "compatible with", trim(tensor2%name)
 
 1308            dist_in = dbt_distribution(tensor2_out)
 
 1309            dist_list = array_sublist(dist_in%nd_dist, ind2_linked)
 
 1310            IF (compat2 == 1) 
THEN 
 1311               ALLOCATE (mp_dims(ndims_mapping_row(dist_in%pgrid%nd_index_grid)))
 
 1312               CALL dbt_get_mapping_info(dist_in%pgrid%nd_index_grid, dims1_2d=mp_dims)
 
 1313               ALLOCATE (tensor1_out)
 
 1314               CALL dbt_remap(tensor1, ind1_linked, ind1_free, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, &
 
 1315                              dist1=dist_list, mp_dims_1=mp_dims, nodata=nodata1, move_data=move_data_1)
 
 1316            ELSEIF (compat2 == 2) 
THEN 
 1317               ALLOCATE (mp_dims(ndims_mapping_column(dist_in%pgrid%nd_index_grid)))
 
 1318               CALL dbt_get_mapping_info(dist_in%pgrid%nd_index_grid, dims2_2d=mp_dims)
 
 1319               ALLOCATE (tensor1_out)
 
 1320               CALL dbt_remap(tensor1, ind1_free, ind1_linked, tensor1_out, comm_2d=dist_in%pgrid%mp_comm_2d, &
 
 1321                              dist2=dist_list, mp_dims_2=mp_dims, nodata=nodata1, move_data=move_data_1)
 
 1323               cpabort(
"should not happen")
 
 1327            IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"No redistribution of", trim(tensor1%name)
 
 1328            tensor1_out => tensor1
 
 1332      SELECT CASE (compat1)
 
 1338         cpabort(
"should not happen")
 
 1341      SELECT CASE (compat2)
 
 1347         cpabort(
"should not happen")
 
 1350      IF (unit_nr_prv > 0) 
THEN 
 1351         IF (compat1 .NE. compat1_old) 
THEN 
 1352            WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor1_out%name), 
":" 
 1353            SELECT CASE (compat1)
 
 1355               WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1357               WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1359               WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1362         IF (compat2 .NE. compat2_old) 
THEN 
 1363            WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor2_out%name), 
":" 
 1364            SELECT CASE (compat2)
 
 1366               WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1368               WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1370               WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1375      IF (new1 .AND. 
PRESENT(move_data_1)) move_data_1 = .true.
 
 1376      IF (new2 .AND. 
PRESENT(move_data_2)) move_data_2 = .true.
 
 1392   SUBROUTINE reshape_mm_small(tensor_in, ind1, ind2, tensor_out, trans, new, nodata, move_data, unit_nr)
 
 1393      TYPE(dbt_type), 
TARGET, 
INTENT(INOUT)   :: tensor_in
 
 1394      INTEGER, 
DIMENSION(:), 
INTENT(IN)           :: ind1, ind2
 
 1395      TYPE(dbt_type), 
POINTER, 
INTENT(OUT)    :: tensor_out
 
 1396      LOGICAL, 
INTENT(OUT)                        :: trans
 
 1397      LOGICAL, 
INTENT(OUT)                        :: new
 
 1398      LOGICAL, 
INTENT(IN), 
OPTIONAL               :: nodata, move_data
 
 1399      INTEGER, 
INTENT(IN), 
OPTIONAL               :: unit_nr
 
 1400      INTEGER                                     :: compat1, compat2, compat1_old, compat2_old, unit_nr_prv
 
 1401      LOGICAL                                     :: nodata_prv
 
 1403      NULLIFY (tensor_out)
 
 1404      IF (
PRESENT(nodata)) 
THEN 
 1407         nodata_prv = .false.
 
 1410      unit_nr_prv = prep_output_unit(unit_nr)
 
 1413      compat1 = compat_map(tensor_in%nd_index, ind1)
 
 1414      compat2 = compat_map(tensor_in%nd_index, ind2)
 
 1415      compat1_old = compat1; compat2_old = compat2
 
 1416      IF (unit_nr_prv > 0) 
THEN 
 1417         WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor_in%name), 
":" 
 1418         IF (compat1 == 1 .AND. compat2 == 2) 
THEN 
 1419            WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1420         ELSEIF (compat1 == 2 .AND. compat2 == 1) 
THEN 
 1421            WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1423            WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1426      IF (compat1 == 0 .or. compat2 == 0) 
THEN  
 1428         IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"Redistribution of", trim(tensor_in%name)
 
 1430         ALLOCATE (tensor_out)
 
 1431         CALL dbt_remap(tensor_in, ind1, ind2, tensor_out, nodata=nodata, move_data=move_data)
 
 1432         CALL dbt_copy_contraction_storage(tensor_in, tensor_out)
 
 1437         IF (unit_nr_prv > 0) 
WRITE (unit_nr_prv, 
'(T2,A,1X,A)') 
"No redistribution of", trim(tensor_in%name)
 
 1438         tensor_out => tensor_in
 
 1441      IF (compat1 == 1 .AND. compat2 == 2) 
THEN 
 1443      ELSEIF (compat1 == 2 .AND. compat2 == 1) 
THEN 
 1446         cpabort(
"this should not happen")
 
 1449      IF (unit_nr_prv > 0) 
THEN 
 1450         IF (compat1_old .NE. compat1 .OR. compat2_old .NE. compat2) 
THEN 
 1451            WRITE (unit_nr_prv, 
'(T2,A,1X,A,A,1X)', advance=
'no') 
"compatibility of", trim(tensor_out%name), 
":" 
 1452            IF (compat1 == 1 .AND. compat2 == 2) 
THEN 
 1453               WRITE (unit_nr_prv, 
'(A)') 
"Normal" 
 1454            ELSEIF (compat1 == 2 .AND. compat2 == 1) 
THEN 
 1455               WRITE (unit_nr_prv, 
'(A)') 
"Transposed" 
 1457               WRITE (unit_nr_prv, 
'(A)') 
"Not compatible" 
 1471   FUNCTION update_contraction_storage(storage, split_opt, split) 
RESULT(do_change_pgrid)
 
 1472      TYPE(dbt_contraction_storage), 
INTENT(INOUT) :: storage
 
 1473      TYPE(dbt_tas_split_info), 
INTENT(IN)           :: split_opt
 
 1474      TYPE(dbt_tas_split_info), 
INTENT(IN)           :: split
 
 1475      INTEGER, 
DIMENSION(2) :: pdims, pdims_sub
 
 1476      LOGICAL, 
DIMENSION(2) :: do_change_pgrid
 
 1477      REAL(kind=dp) :: change_criterion, pdims_ratio
 
 1478      INTEGER :: nsplit_opt, nsplit
 
 1480      cpassert(
ALLOCATED(split_opt%ngroup_opt))
 
 1481      nsplit_opt = split_opt%ngroup_opt
 
 1482      nsplit = split%ngroup
 
 1484      pdims = split%mp_comm%num_pe_cart
 
 1486      storage%ibatch = storage%ibatch + 1
 
 1488      storage%nsplit_avg = (storage%nsplit_avg*real(storage%ibatch - 1, dp) + real(nsplit_opt, dp)) &
 
 1489                           /real(storage%ibatch, dp)
 
 1491      SELECT CASE (split_opt%split_rowcol)
 
 1493         pdims_ratio = real(pdims(1), dp)/pdims(2)
 
 1495         pdims_ratio = real(pdims(2), dp)/pdims(1)
 
 1498      do_change_pgrid(:) = .false.
 
 1501      pdims_sub = split%mp_comm_group%num_pe_cart
 
 1502      change_criterion = maxval(real(pdims_sub, dp))/minval(pdims_sub)
 
 1503      IF (change_criterion > default_pdims_accept_ratio**2) do_change_pgrid(1) = .true.
 
 1506      change_criterion = max(real(nsplit, dp)/storage%nsplit_avg, real(storage%nsplit_avg, dp)/nsplit)
 
 1507      IF (change_criterion > default_nsplit_accept_ratio) do_change_pgrid(2) = .true.
 
 1515   FUNCTION compat_map(nd_index, compat_ind)
 
 1516      TYPE(nd_to_2d_mapping), 
INTENT(IN) :: nd_index
 
 1517      INTEGER, 
DIMENSION(:), 
INTENT(IN)  :: compat_ind
 
 1518      INTEGER, 
DIMENSION(ndims_mapping_row(nd_index)) :: map1
 
 1519      INTEGER, 
DIMENSION(ndims_mapping_column(nd_index)) :: map2
 
 1520      INTEGER                            :: compat_map
 
 1522      CALL dbt_get_mapping_info(nd_index, map1_2d=map1, map2_2d=map2)
 
 1525      IF (array_eq_i(map1, compat_ind)) 
THEN 
 1527      ELSEIF (array_eq_i(map2, compat_ind)) 
THEN 
 1537   SUBROUTINE index_linked_sort(ind_ref, ind_dep)
 
 1538      INTEGER, 
DIMENSION(:), 
INTENT(INOUT) :: ind_ref, ind_dep
 
 1539      INTEGER, 
DIMENSION(SIZE(ind_ref))    :: sort_indices
 
 1541      CALL sort(ind_ref, 
SIZE(ind_ref), sort_indices)
 
 1542      ind_dep(:) = ind_dep(sort_indices)
 
 1550   FUNCTION opt_pgrid(tensor, tas_split_info)
 
 1551      TYPE(dbt_type), 
INTENT(IN) :: 
tensor 
 1552      TYPE(dbt_tas_split_info), 
INTENT(IN) :: tas_split_info
 
 1553      INTEGER, 
DIMENSION(ndims_matrix_row(tensor)) :: map1
 
 1554      INTEGER, 
DIMENSION(ndims_matrix_column(tensor)) :: map2
 
 1555      TYPE(dbt_pgrid_type) :: opt_pgrid
 
 1556      INTEGER, 
DIMENSION(ndims_tensor(tensor)) :: dims
 
 1558      CALL dbt_get_mapping_info(
tensor%pgrid%nd_index_grid, map1_2d=map1, map2_2d=map2)
 
 1559      CALL blk_dims_tensor(
tensor, dims)
 
 1560      opt_pgrid = dbt_nd_mp_comm(tas_split_info%mp_comm, map1, map2, tdims=dims)
 
 1562      ALLOCATE (opt_pgrid%tas_split_info, source=tas_split_info)
 
 1563      CALL dbt_tas_info_hold(opt_pgrid%tas_split_info)
 
 1572   SUBROUTINE dbt_remap(tensor_in, map1_2d, map2_2d, tensor_out, comm_2d, dist1, dist2, &
 
 1573                        mp_dims_1, mp_dims_2, name, nodata, move_data)
 
 1574      TYPE(dbt_type), 
INTENT(INOUT)      :: tensor_in
 
 1575      INTEGER, 
DIMENSION(:), 
INTENT(IN)      :: map1_2d, map2_2d
 
 1576      TYPE(dbt_type), 
INTENT(OUT)        :: tensor_out
 
 1577      CHARACTER(len=*), 
INTENT(IN), 
OPTIONAL :: name
 
 1578      LOGICAL, 
INTENT(IN), 
OPTIONAL          :: nodata, move_data
 
 1579      CLASS(mp_comm_type), 
INTENT(IN), 
OPTIONAL          :: comm_2d
 
 1580      TYPE(array_list), 
INTENT(IN), 
OPTIONAL :: dist1, dist2
 
 1581      INTEGER, 
DIMENSION(SIZE(map1_2d)), 
OPTIONAL :: mp_dims_1
 
 1582      INTEGER, 
DIMENSION(SIZE(map2_2d)), 
OPTIONAL :: mp_dims_2
 
 1583      CHARACTER(len=default_string_length)   :: name_tmp
 
 1584      INTEGER, 
DIMENSION(:), 
ALLOCATABLE     :: blk_sizes_1, blk_sizes_2, blk_sizes_3, blk_sizes_4, &
 
 1585                                                nd_dist_1, nd_dist_2, nd_dist_3, nd_dist_4
 
 1586      TYPE(dbt_distribution_type)        :: dist
 
 1587      TYPE(mp_cart_type) :: comm_2d_prv
 
 1588      INTEGER                                :: handle, i
 
 1589      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)) :: pdims, myploc
 
 1590      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_remap' 
 1591      LOGICAL                               :: nodata_prv
 
 1592      TYPE(dbt_pgrid_type)              :: comm_nd
 
 1594      CALL timeset(routinen, handle)
 
 1596      IF (
PRESENT(name)) 
THEN 
 1599         name_tmp = tensor_in%name
 
 1601      IF (
PRESENT(dist1)) 
THEN 
 1602         cpassert(
PRESENT(mp_dims_1))
 
 1605      IF (
PRESENT(dist2)) 
THEN 
 1606         cpassert(
PRESENT(mp_dims_2))
 
 1609      IF (
PRESENT(comm_2d)) 
THEN 
 1610         comm_2d_prv = comm_2d
 
 1612         comm_2d_prv = tensor_in%pgrid%mp_comm_2d
 
 1615      comm_nd = dbt_nd_mp_comm(comm_2d_prv, map1_2d, map2_2d, dims1_nd=mp_dims_1, dims2_nd=mp_dims_2)
 
 1616      CALL mp_environ_pgrid(comm_nd, pdims, myploc)
 
 1618         IF (ndims_tensor(tensor_in) == 2) 
THEN 
 1619            CALL get_arrays(tensor_in%blk_sizes, blk_sizes_1, blk_sizes_2)
 
 1621         IF (ndims_tensor(tensor_in) == 3) 
THEN 
 1622            CALL get_arrays(tensor_in%blk_sizes, blk_sizes_1, blk_sizes_2, blk_sizes_3)
 
 1624         IF (ndims_tensor(tensor_in) == 4) 
THEN 
 1625            CALL get_arrays(tensor_in%blk_sizes, blk_sizes_1, blk_sizes_2, blk_sizes_3, blk_sizes_4)
 
 1628         IF (ndims_tensor(tensor_in) == 2) 
THEN 
 1629               IF (
PRESENT(dist1)) 
THEN 
 1630                  IF (any(map1_2d == 1)) 
THEN 
 1631                     i = minloc(map1_2d, dim=1, mask=map1_2d == 1) 
 
 1632                     CALL get_ith_array(dist1, i, nd_dist_1)
 
 1636               IF (
PRESENT(dist2)) 
THEN 
 1637                  IF (any(map2_2d == 1)) 
THEN 
 1638                     i = minloc(map2_2d, dim=1, mask=map2_2d == 1) 
 
 1639                     CALL get_ith_array(dist2, i, nd_dist_1)
 
 1643               IF (.NOT. 
ALLOCATED(nd_dist_1)) 
THEN 
 1644                  ALLOCATE (nd_dist_1(
SIZE(blk_sizes_1)))
 
 1645                  CALL dbt_default_distvec(
SIZE(blk_sizes_1), pdims(1), blk_sizes_1, nd_dist_1)
 
 1647               IF (
PRESENT(dist1)) 
THEN 
 1648                  IF (any(map1_2d == 2)) 
THEN 
 1649                     i = minloc(map1_2d, dim=1, mask=map1_2d == 2) 
 
 1650                     CALL get_ith_array(dist1, i, nd_dist_2)
 
 1654               IF (
PRESENT(dist2)) 
THEN 
 1655                  IF (any(map2_2d == 2)) 
THEN 
 1656                     i = minloc(map2_2d, dim=1, mask=map2_2d == 2) 
 
 1657                     CALL get_ith_array(dist2, i, nd_dist_2)
 
 1661               IF (.NOT. 
ALLOCATED(nd_dist_2)) 
THEN 
 1662                  ALLOCATE (nd_dist_2(
SIZE(blk_sizes_2)))
 
 1663                  CALL dbt_default_distvec(
SIZE(blk_sizes_2), pdims(2), blk_sizes_2, nd_dist_2)
 
 1665            CALL dbt_distribution_new_expert(dist, comm_nd, map1_2d, map2_2d, &
 
 1666                                             nd_dist_1, nd_dist_2, own_comm=.true.)
 
 1668         IF (ndims_tensor(tensor_in) == 3) 
THEN 
 1669               IF (
PRESENT(dist1)) 
THEN 
 1670                  IF (any(map1_2d == 1)) 
THEN 
 1671                     i = minloc(map1_2d, dim=1, mask=map1_2d == 1) 
 
 1672                     CALL get_ith_array(dist1, i, nd_dist_1)
 
 1676               IF (
PRESENT(dist2)) 
THEN 
 1677                  IF (any(map2_2d == 1)) 
THEN 
 1678                     i = minloc(map2_2d, dim=1, mask=map2_2d == 1) 
 
 1679                     CALL get_ith_array(dist2, i, nd_dist_1)
 
 1683               IF (.NOT. 
ALLOCATED(nd_dist_1)) 
THEN 
 1684                  ALLOCATE (nd_dist_1(
SIZE(blk_sizes_1)))
 
 1685                  CALL dbt_default_distvec(
SIZE(blk_sizes_1), pdims(1), blk_sizes_1, nd_dist_1)
 
 1687               IF (
PRESENT(dist1)) 
THEN 
 1688                  IF (any(map1_2d == 2)) 
THEN 
 1689                     i = minloc(map1_2d, dim=1, mask=map1_2d == 2) 
 
 1690                     CALL get_ith_array(dist1, i, nd_dist_2)
 
 1694               IF (
PRESENT(dist2)) 
THEN 
 1695                  IF (any(map2_2d == 2)) 
THEN 
 1696                     i = minloc(map2_2d, dim=1, mask=map2_2d == 2) 
 
 1697                     CALL get_ith_array(dist2, i, nd_dist_2)
 
 1701               IF (.NOT. 
ALLOCATED(nd_dist_2)) 
THEN 
 1702                  ALLOCATE (nd_dist_2(
SIZE(blk_sizes_2)))
 
 1703                  CALL dbt_default_distvec(
SIZE(blk_sizes_2), pdims(2), blk_sizes_2, nd_dist_2)
 
 1705               IF (
PRESENT(dist1)) 
THEN 
 1706                  IF (any(map1_2d == 3)) 
THEN 
 1707                     i = minloc(map1_2d, dim=1, mask=map1_2d == 3) 
 
 1708                     CALL get_ith_array(dist1, i, nd_dist_3)
 
 1712               IF (
PRESENT(dist2)) 
THEN 
 1713                  IF (any(map2_2d == 3)) 
THEN 
 1714                     i = minloc(map2_2d, dim=1, mask=map2_2d == 3) 
 
 1715                     CALL get_ith_array(dist2, i, nd_dist_3)
 
 1719               IF (.NOT. 
ALLOCATED(nd_dist_3)) 
THEN 
 1720                  ALLOCATE (nd_dist_3(
SIZE(blk_sizes_3)))
 
 1721                  CALL dbt_default_distvec(
SIZE(blk_sizes_3), pdims(3), blk_sizes_3, nd_dist_3)
 
 1723            CALL dbt_distribution_new_expert(dist, comm_nd, map1_2d, map2_2d, &
 
 1724                                             nd_dist_1, nd_dist_2, nd_dist_3, own_comm=.true.)
 
 1726         IF (ndims_tensor(tensor_in) == 4) 
THEN 
 1727               IF (
PRESENT(dist1)) 
THEN 
 1728                  IF (any(map1_2d == 1)) 
THEN 
 1729                     i = minloc(map1_2d, dim=1, mask=map1_2d == 1) 
 
 1730                     CALL get_ith_array(dist1, i, nd_dist_1)
 
 1734               IF (
PRESENT(dist2)) 
THEN 
 1735                  IF (any(map2_2d == 1)) 
THEN 
 1736                     i = minloc(map2_2d, dim=1, mask=map2_2d == 1) 
 
 1737                     CALL get_ith_array(dist2, i, nd_dist_1)
 
 1741               IF (.NOT. 
ALLOCATED(nd_dist_1)) 
THEN 
 1742                  ALLOCATE (nd_dist_1(
SIZE(blk_sizes_1)))
 
 1743                  CALL dbt_default_distvec(
SIZE(blk_sizes_1), pdims(1), blk_sizes_1, nd_dist_1)
 
 1745               IF (
PRESENT(dist1)) 
THEN 
 1746                  IF (any(map1_2d == 2)) 
THEN 
 1747                     i = minloc(map1_2d, dim=1, mask=map1_2d == 2) 
 
 1748                     CALL get_ith_array(dist1, i, nd_dist_2)
 
 1752               IF (
PRESENT(dist2)) 
THEN 
 1753                  IF (any(map2_2d == 2)) 
THEN 
 1754                     i = minloc(map2_2d, dim=1, mask=map2_2d == 2) 
 
 1755                     CALL get_ith_array(dist2, i, nd_dist_2)
 
 1759               IF (.NOT. 
ALLOCATED(nd_dist_2)) 
THEN 
 1760                  ALLOCATE (nd_dist_2(
SIZE(blk_sizes_2)))
 
 1761                  CALL dbt_default_distvec(
SIZE(blk_sizes_2), pdims(2), blk_sizes_2, nd_dist_2)
 
 1763               IF (
PRESENT(dist1)) 
THEN 
 1764                  IF (any(map1_2d == 3)) 
THEN 
 1765                     i = minloc(map1_2d, dim=1, mask=map1_2d == 3) 
 
 1766                     CALL get_ith_array(dist1, i, nd_dist_3)
 
 1770               IF (
PRESENT(dist2)) 
THEN 
 1771                  IF (any(map2_2d == 3)) 
THEN 
 1772                     i = minloc(map2_2d, dim=1, mask=map2_2d == 3) 
 
 1773                     CALL get_ith_array(dist2, i, nd_dist_3)
 
 1777               IF (.NOT. 
ALLOCATED(nd_dist_3)) 
THEN 
 1778                  ALLOCATE (nd_dist_3(
SIZE(blk_sizes_3)))
 
 1779                  CALL dbt_default_distvec(
SIZE(blk_sizes_3), pdims(3), blk_sizes_3, nd_dist_3)
 
 1781               IF (
PRESENT(dist1)) 
THEN 
 1782                  IF (any(map1_2d == 4)) 
THEN 
 1783                     i = minloc(map1_2d, dim=1, mask=map1_2d == 4) 
 
 1784                     CALL get_ith_array(dist1, i, nd_dist_4)
 
 1788               IF (
PRESENT(dist2)) 
THEN 
 1789                  IF (any(map2_2d == 4)) 
THEN 
 1790                     i = minloc(map2_2d, dim=1, mask=map2_2d == 4) 
 
 1791                     CALL get_ith_array(dist2, i, nd_dist_4)
 
 1795               IF (.NOT. 
ALLOCATED(nd_dist_4)) 
THEN 
 1796                  ALLOCATE (nd_dist_4(
SIZE(blk_sizes_4)))
 
 1797                  CALL dbt_default_distvec(
SIZE(blk_sizes_4), pdims(4), blk_sizes_4, nd_dist_4)
 
 1799            CALL dbt_distribution_new_expert(dist, comm_nd, map1_2d, map2_2d, &
 
 1800                                             nd_dist_1, nd_dist_2, nd_dist_3, nd_dist_4, own_comm=.true.)
 
 1803         IF (ndims_tensor(tensor_in) == 2) 
THEN 
 1804            CALL dbt_create(tensor_out, name_tmp, dist, map1_2d, map2_2d, &
 
 1805                            blk_sizes_1, blk_sizes_2)
 
 1807         IF (ndims_tensor(tensor_in) == 3) 
THEN 
 1808            CALL dbt_create(tensor_out, name_tmp, dist, map1_2d, map2_2d, &
 
 1809                            blk_sizes_1, blk_sizes_2, blk_sizes_3)
 
 1811         IF (ndims_tensor(tensor_in) == 4) 
THEN 
 1812            CALL dbt_create(tensor_out, name_tmp, dist, map1_2d, map2_2d, &
 
 1813                            blk_sizes_1, blk_sizes_2, blk_sizes_3, blk_sizes_4)
 
 1816      IF (
PRESENT(nodata)) 
THEN 
 1819         nodata_prv = .false.
 
 1822      IF (.NOT. nodata_prv) 
CALL dbt_copy_expert(tensor_in, tensor_out, move_data=move_data)
 
 1823      CALL dbt_distribution_destroy(dist)
 
 1825      CALL timestop(handle)
 
 1833   SUBROUTINE dbt_align_index(tensor_in, tensor_out, order)
 
 1834      TYPE(dbt_type), 
INTENT(INOUT)               :: tensor_in
 
 1835      TYPE(dbt_type), 
INTENT(OUT)                 :: tensor_out
 
 1836      INTEGER, 
DIMENSION(ndims_matrix_row(tensor_in)) :: map1_2d
 
 1837      INTEGER, 
DIMENSION(ndims_matrix_column(tensor_in)) :: map2_2d
 
 1838      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)), &
 
 1839         INTENT(OUT), 
OPTIONAL                        :: order
 
 1840      INTEGER, 
DIMENSION(ndims_tensor(tensor_in))     :: order_prv
 
 1841      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_align_index' 
 1844      CALL timeset(routinen, handle)
 
 1846      CALL dbt_get_mapping_info(tensor_in%nd_index_blk, map1_2d=map1_2d, map2_2d=map2_2d)
 
 1847      order_prv = dbt_inverse_order([map1_2d, map2_2d])
 
 1848      CALL dbt_permute_index(tensor_in, tensor_out, order=order_prv)
 
 1850      IF (
PRESENT(order)) order = order_prv
 
 1852      CALL timestop(handle)
 
 1859   SUBROUTINE dbt_permute_index(tensor_in, tensor_out, order)
 
 1860      TYPE(dbt_type), 
INTENT(INOUT)                  :: tensor_in
 
 1861      TYPE(dbt_type), 
INTENT(OUT)                 :: tensor_out
 
 1862      INTEGER, 
DIMENSION(ndims_tensor(tensor_in)), &
 
 1865      TYPE(nd_to_2d_mapping)                          :: nd_index_blk_rs, nd_index_rs
 
 1866      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_permute_index' 
 1870      CALL timeset(routinen, handle)
 
 1872      ndims = ndims_tensor(tensor_in)
 
 1874      CALL permute_index(tensor_in%nd_index, nd_index_rs, order)
 
 1875      CALL permute_index(tensor_in%nd_index_blk, nd_index_blk_rs, order)
 
 1876      CALL permute_index(tensor_in%pgrid%nd_index_grid, tensor_out%pgrid%nd_index_grid, order)
 
 1878      tensor_out%matrix_rep => tensor_in%matrix_rep
 
 1879      tensor_out%owns_matrix = .false.
 
 1881      tensor_out%nd_index = nd_index_rs
 
 1882      tensor_out%nd_index_blk = nd_index_blk_rs
 
 1883      tensor_out%pgrid%mp_comm_2d = tensor_in%pgrid%mp_comm_2d
 
 1884      IF (
ALLOCATED(tensor_in%pgrid%tas_split_info)) 
THEN 
 1885         ALLOCATE (tensor_out%pgrid%tas_split_info, source=tensor_in%pgrid%tas_split_info)
 
 1887      tensor_out%refcount => tensor_in%refcount
 
 1888      CALL dbt_hold(tensor_out)
 
 1890      CALL reorder_arrays(tensor_in%blk_sizes, tensor_out%blk_sizes, order)
 
 1891      CALL reorder_arrays(tensor_in%blk_offsets, tensor_out%blk_offsets, order)
 
 1892      CALL reorder_arrays(tensor_in%nd_dist, tensor_out%nd_dist, order)
 
 1893      CALL reorder_arrays(tensor_in%blks_local, tensor_out%blks_local, order)
 
 1894      ALLOCATE (tensor_out%nblks_local(ndims))
 
 1895      ALLOCATE (tensor_out%nfull_local(ndims))
 
 1896      tensor_out%nblks_local(order) = tensor_in%nblks_local(:)
 
 1897      tensor_out%nfull_local(order) = tensor_in%nfull_local(:)
 
 1898      tensor_out%name = tensor_in%name
 
 1899      tensor_out%valid = .true.
 
 1901      IF (
ALLOCATED(tensor_in%contraction_storage)) 
THEN 
 1902         ALLOCATE (tensor_out%contraction_storage, source=tensor_in%contraction_storage)
 
 1903         CALL destroy_array_list(tensor_out%contraction_storage%batch_ranges)
 
 1904         CALL reorder_arrays(tensor_in%contraction_storage%batch_ranges, tensor_out%contraction_storage%batch_ranges, order)
 
 1907      CALL timestop(handle)
 
 1919   SUBROUTINE dbt_map_bounds_to_tensors(tensor_1, tensor_2, &
 
 1920                                        contract_1, notcontract_1, &
 
 1921                                        contract_2, notcontract_2, &
 
 1922                                        bounds_t1, bounds_t2, &
 
 1923                                        bounds_1, bounds_2, bounds_3, &
 
 1924                                        do_crop_1, do_crop_2)
 
 1926      TYPE(dbt_type), 
INTENT(IN)      :: tensor_1, tensor_2
 
 1927      INTEGER, 
DIMENSION(:), 
INTENT(IN)   :: contract_1, contract_2, &
 
 1928                                             notcontract_1, notcontract_2
 
 1929      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_1)), &
 
 1930         INTENT(OUT)                                 :: bounds_t1
 
 1931      INTEGER, 
DIMENSION(2, ndims_tensor(tensor_2)), &
 
 1932         INTENT(OUT)                                 :: bounds_t2
 
 1933      INTEGER, 
DIMENSION(2, SIZE(contract_1)), &
 
 1934         INTENT(IN), 
OPTIONAL                        :: bounds_1
 
 1935      INTEGER, 
DIMENSION(2, SIZE(notcontract_1)), &
 
 1936         INTENT(IN), 
OPTIONAL                        :: bounds_2
 
 1937      INTEGER, 
DIMENSION(2, SIZE(notcontract_2)), &
 
 1938         INTENT(IN), 
OPTIONAL                        :: bounds_3
 
 1939      LOGICAL, 
INTENT(OUT), 
OPTIONAL                 :: do_crop_1, do_crop_2
 
 1940      LOGICAL, 
DIMENSION(2)                          :: do_crop
 
 1945      CALL dbt_get_info(tensor_1, nfull_total=bounds_t1(2, :))
 
 1948      CALL dbt_get_info(tensor_2, nfull_total=bounds_t2(2, :))
 
 1950      IF (
PRESENT(bounds_1)) 
THEN 
 1951         bounds_t1(:, contract_1) = bounds_1
 
 1953         bounds_t2(:, contract_2) = bounds_1
 
 1957      IF (
PRESENT(bounds_2)) 
THEN 
 1958         bounds_t1(:, notcontract_1) = bounds_2
 
 1962      IF (
PRESENT(bounds_3)) 
THEN 
 1963         bounds_t2(:, notcontract_2) = bounds_3
 
 1967      IF (
PRESENT(do_crop_1)) do_crop_1 = do_crop(1)
 
 1968      IF (
PRESENT(do_crop_2)) do_crop_2 = do_crop(2)
 
 1980   SUBROUTINE dbt_print_contraction_index(tensor_1, indchar1, tensor_2, indchar2, tensor_3, indchar3, unit_nr)
 
 1981      TYPE(dbt_type), 
INTENT(IN) :: tensor_1, tensor_2, tensor_3
 
 1982      CHARACTER(LEN=1), 
DIMENSION(ndims_tensor(tensor_1)), 
INTENT(IN) :: indchar1
 
 1983      CHARACTER(LEN=1), 
DIMENSION(ndims_tensor(tensor_2)), 
INTENT(IN) :: indchar2
 
 1984      CHARACTER(LEN=1), 
DIMENSION(ndims_tensor(tensor_3)), 
INTENT(IN) :: indchar3
 
 1985      INTEGER, 
INTENT(IN) :: unit_nr
 
 1986      INTEGER, 
DIMENSION(ndims_matrix_row(tensor_1)) :: map11
 
 1987      INTEGER, 
DIMENSION(ndims_matrix_column(tensor_1)) :: map12
 
 1988      INTEGER, 
DIMENSION(ndims_matrix_row(tensor_2)) :: map21
 
 1989      INTEGER, 
DIMENSION(ndims_matrix_column(tensor_2)) :: map22
 
 1990      INTEGER, 
DIMENSION(ndims_matrix_row(tensor_3)) :: map31
 
 1991      INTEGER, 
DIMENSION(ndims_matrix_column(tensor_3)) :: map32
 
 1992      INTEGER :: ichar1, ichar2, ichar3, unit_nr_prv
 
 1994      unit_nr_prv = prep_output_unit(unit_nr)
 
 1996      IF (unit_nr_prv /= 0) 
THEN 
 1997         CALL dbt_get_mapping_info(tensor_1%nd_index_blk, map1_2d=map11, map2_2d=map12)
 
 1998         CALL dbt_get_mapping_info(tensor_2%nd_index_blk, map1_2d=map21, map2_2d=map22)
 
 1999         CALL dbt_get_mapping_info(tensor_3%nd_index_blk, map1_2d=map31, map2_2d=map32)
 
 2002      IF (unit_nr_prv > 0) 
THEN 
 2003         WRITE (unit_nr_prv, 
'(T2,A)') 
"INDEX INFO" 
 2004         WRITE (unit_nr_prv, 
'(T15,A)', advance=
'no') 
"tensor index: (" 
 2005         DO ichar1 = 1, 
SIZE(indchar1)
 
 2006            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar1(ichar1)
 
 2008         WRITE (unit_nr_prv, 
'(A)', advance=
'no') 
") x (" 
 2009         DO ichar2 = 1, 
SIZE(indchar2)
 
 2010            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar2(ichar2)
 
 2012         WRITE (unit_nr_prv, 
'(A)', advance=
'no') 
") = (" 
 2013         DO ichar3 = 1, 
SIZE(indchar3)
 
 2014            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar3(ichar3)
 
 2016         WRITE (unit_nr_prv, 
'(A)') 
")" 
 2018         WRITE (unit_nr_prv, 
'(T15,A)', advance=
'no') 
"matrix index: (" 
 2019         DO ichar1 = 1, 
SIZE(map11)
 
 2020            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar1(map11(ichar1))
 
 2022         WRITE (unit_nr_prv, 
'(A1)', advance=
'no') 
"|" 
 2023         DO ichar1 = 1, 
SIZE(map12)
 
 2024            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar1(map12(ichar1))
 
 2026         WRITE (unit_nr_prv, 
'(A)', advance=
'no') 
") x (" 
 2027         DO ichar2 = 1, 
SIZE(map21)
 
 2028            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar2(map21(ichar2))
 
 2030         WRITE (unit_nr_prv, 
'(A1)', advance=
'no') 
"|" 
 2031         DO ichar2 = 1, 
SIZE(map22)
 
 2032            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar2(map22(ichar2))
 
 2034         WRITE (unit_nr_prv, 
'(A)', advance=
'no') 
") = (" 
 2035         DO ichar3 = 1, 
SIZE(map31)
 
 2036            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar3(map31(ichar3))
 
 2038         WRITE (unit_nr_prv, 
'(A1)', advance=
'no') 
"|" 
 2039         DO ichar3 = 1, 
SIZE(map32)
 
 2040            WRITE (unit_nr_prv, 
'(A1)', advance=
'no') indchar3(map32(ichar3))
 
 2042         WRITE (unit_nr_prv, 
'(A)') 
")" 
 2091      TYPE(dbt_type), 
INTENT(INOUT) :: 
tensor 
 2092      INTEGER, 
DIMENSION(:), 
OPTIONAL, 
INTENT(IN)        :: batch_range_1, batch_range_2, batch_range_3, batch_range_4
 
 2093      INTEGER, 
DIMENSION(ndims_tensor(tensor)) :: tdims
 
 2094      INTEGER, 
DIMENSION(:), 
ALLOCATABLE                 :: batch_range_prv_1, batch_range_prv_2, batch_range_prv_3,&
 
 2096      LOGICAL :: static_range
 
 2098      CALL dbt_get_info(
tensor, nblks_total=tdims)
 
 2100      static_range = .true.
 
 2101         IF (ndims_tensor(
tensor) >= 1) 
THEN 
 2102            IF (
PRESENT(batch_range_1)) 
THEN 
 2103               ALLOCATE (batch_range_prv_1, source=batch_range_1)
 
 2104               static_range = .false.
 
 2106               ALLOCATE (batch_range_prv_1(2))
 
 2107               batch_range_prv_1(1) = 1
 
 2108               batch_range_prv_1(2) = tdims(1) + 1
 
 2111         IF (ndims_tensor(
tensor) >= 2) 
THEN 
 2112            IF (
PRESENT(batch_range_2)) 
THEN 
 2113               ALLOCATE (batch_range_prv_2, source=batch_range_2)
 
 2114               static_range = .false.
 
 2116               ALLOCATE (batch_range_prv_2(2))
 
 2117               batch_range_prv_2(1) = 1
 
 2118               batch_range_prv_2(2) = tdims(2) + 1
 
 2121         IF (ndims_tensor(
tensor) >= 3) 
THEN 
 2122            IF (
PRESENT(batch_range_3)) 
THEN 
 2123               ALLOCATE (batch_range_prv_3, source=batch_range_3)
 
 2124               static_range = .false.
 
 2126               ALLOCATE (batch_range_prv_3(2))
 
 2127               batch_range_prv_3(1) = 1
 
 2128               batch_range_prv_3(2) = tdims(3) + 1
 
 2131         IF (ndims_tensor(
tensor) >= 4) 
THEN 
 2132            IF (
PRESENT(batch_range_4)) 
THEN 
 2133               ALLOCATE (batch_range_prv_4, source=batch_range_4)
 
 2134               static_range = .false.
 
 2136               ALLOCATE (batch_range_prv_4(2))
 
 2137               batch_range_prv_4(1) = 1
 
 2138               batch_range_prv_4(2) = tdims(4) + 1
 
 2142      ALLOCATE (
tensor%contraction_storage)
 
 2143      tensor%contraction_storage%static = static_range
 
 2144      IF (static_range) 
THEN 
 2145         CALL dbt_tas_batched_mm_init(
tensor%matrix_rep)
 
 2147      tensor%contraction_storage%nsplit_avg = 0.0_dp
 
 2148      tensor%contraction_storage%ibatch = 0
 
 2150         IF (ndims_tensor(
tensor) == 1) 
THEN 
 2151            CALL create_array_list(
tensor%contraction_storage%batch_ranges, 1, &
 
 2154         IF (ndims_tensor(
tensor) == 2) 
THEN 
 2155            CALL create_array_list(
tensor%contraction_storage%batch_ranges, 2, &
 
 2156                                   batch_range_prv_1, batch_range_prv_2)
 
 2158         IF (ndims_tensor(
tensor) == 3) 
THEN 
 2159            CALL create_array_list(
tensor%contraction_storage%batch_ranges, 3, &
 
 2160                                   batch_range_prv_1, batch_range_prv_2, batch_range_prv_3)
 
 2162         IF (ndims_tensor(
tensor) == 4) 
THEN 
 2163            CALL create_array_list(
tensor%contraction_storage%batch_ranges, 4, &
 
 2164                                   batch_range_prv_1, batch_range_prv_2, batch_range_prv_3, batch_range_prv_4)
 
 
 2175      TYPE(dbt_type), 
INTENT(INOUT) :: 
tensor 
 2176      INTEGER, 
INTENT(IN), 
OPTIONAL :: unit_nr
 
 2178      INTEGER :: unit_nr_prv, handle
 
 2180      CALL tensor%pgrid%mp_comm_2d%sync()
 
 2181      CALL timeset(
"dbt_total", handle)
 
 2182      unit_nr_prv = prep_output_unit(unit_nr)
 
 2186      IF (
tensor%contraction_storage%static) 
THEN 
 2187         IF (
tensor%matrix_rep%do_batched > 0) 
THEN 
 2188            IF (
tensor%matrix_rep%mm_storage%batched_out) do_write = .true.
 
 2190         CALL dbt_tas_batched_mm_finalize(
tensor%matrix_rep)
 
 2193      IF (do_write .AND. unit_nr_prv /= 0) 
THEN 
 2194         IF (unit_nr_prv > 0) 
THEN 
 2195            WRITE (unit_nr_prv, 
"(T2,A)") &
 
 2196               "FINALIZING BATCHED PROCESSING OF MATMUL" 
 2198         CALL dbt_write_tensor_info(
tensor, unit_nr_prv)
 
 2199         CALL dbt_write_tensor_dist(
tensor, unit_nr_prv)
 
 2202      CALL destroy_array_list(
tensor%contraction_storage%batch_ranges)
 
 2203      DEALLOCATE (
tensor%contraction_storage)
 
 2204      CALL tensor%pgrid%mp_comm_2d%sync()
 
 2205      CALL timestop(handle)
 
 
 2219   SUBROUTINE dbt_change_pgrid(tensor, pgrid, batch_range_1, batch_range_2, batch_range_3, batch_range_4, &
 
 2220                               nodata, pgrid_changed, unit_nr)
 
 2221      TYPE(dbt_type), 
INTENT(INOUT)                  :: 
tensor 
 2222      TYPE(dbt_pgrid_type), 
INTENT(IN)               :: pgrid
 
 2223      INTEGER, 
DIMENSION(:), 
OPTIONAL, 
INTENT(IN)        :: batch_range_1, batch_range_2, batch_range_3, batch_range_4
 
 2225      LOGICAL, 
INTENT(IN), 
OPTIONAL                      :: nodata
 
 2226      LOGICAL, 
INTENT(OUT), 
OPTIONAL                     :: pgrid_changed
 
 2227      INTEGER, 
INTENT(IN), 
OPTIONAL                      :: unit_nr
 
 2228      CHARACTER(LEN=*), 
PARAMETER :: routinen = 
'dbt_change_pgrid' 
 2229      CHARACTER(default_string_length)                   :: name
 
 2231      INTEGER, 
ALLOCATABLE, 
DIMENSION(:)                 :: bs_1, bs_2, bs_3, bs_4, &
 
 2232                                                            dist_1, dist_2, dist_3, dist_4
 
 2233      INTEGER, 
DIMENSION(ndims_tensor(tensor))           :: pcoord, pcoord_ref, pdims, pdims_ref, &
 
 2235      TYPE(dbt_type)                                 :: t_tmp
 
 2236      TYPE(dbt_distribution_type)                    :: dist
 
 2237      INTEGER, 
DIMENSION(ndims_matrix_row(tensor)) :: map1
 
 2239         DIMENSION(ndims_matrix_column(tensor))    :: map2
 
 2240      LOGICAL, 
DIMENSION(ndims_tensor(tensor))             :: mem_aware
 
 2241      INTEGER, 
DIMENSION(ndims_tensor(tensor)) :: nbatch
 
 2242      INTEGER :: ind1, ind2, batch_size, ibatch
 
 2244      IF (
PRESENT(pgrid_changed)) pgrid_changed = .false.
 
 2245      CALL mp_environ_pgrid(pgrid, pdims, pcoord)
 
 2246      CALL mp_environ_pgrid(
tensor%pgrid, pdims_ref, pcoord_ref)
 
 2248      IF (all(pdims == pdims_ref)) 
THEN 
 2249         IF (
ALLOCATED(pgrid%tas_split_info) .AND. 
ALLOCATED(
tensor%pgrid%tas_split_info)) 
THEN 
 2250            IF (pgrid%tas_split_info%ngroup == 
tensor%pgrid%tas_split_info%ngroup) 
THEN 
 2256      CALL timeset(routinen, handle)
 
 2258         IF (ndims_tensor(
tensor) >= 1) 
THEN 
 2259            mem_aware(1) = 
PRESENT(batch_range_1)
 
 2260            IF (mem_aware(1)) nbatch(1) = 
SIZE(batch_range_1) - 1
 
 2262         IF (ndims_tensor(
tensor) >= 2) 
THEN 
 2263            mem_aware(2) = 
PRESENT(batch_range_2)
 
 2264            IF (mem_aware(2)) nbatch(2) = 
SIZE(batch_range_2) - 1
 
 2266         IF (ndims_tensor(
tensor) >= 3) 
THEN 
 2267            mem_aware(3) = 
PRESENT(batch_range_3)
 
 2268            IF (mem_aware(3)) nbatch(3) = 
SIZE(batch_range_3) - 1
 
 2270         IF (ndims_tensor(
tensor) >= 4) 
THEN 
 2271            mem_aware(4) = 
PRESENT(batch_range_4)
 
 2272            IF (mem_aware(4)) nbatch(4) = 
SIZE(batch_range_4) - 1
 
 2275      CALL dbt_get_info(
tensor, nblks_total=tdims, name=name)
 
 2277         IF (ndims_tensor(
tensor) >= 1) 
THEN 
 2278            ALLOCATE (bs_1(dbt_nblks_total(
tensor, 1)))
 
 2279            CALL get_ith_array(
tensor%blk_sizes, 1, bs_1)
 
 2280            ALLOCATE (dist_1(tdims(1)))
 
 2282            IF (mem_aware(1)) 
THEN 
 2283               DO ibatch = 1, nbatch(1)
 
 2284                  ind1 = batch_range_1(ibatch)
 
 2285                  ind2 = batch_range_1(ibatch + 1) - 1
 
 2286                  batch_size = ind2 - ind1 + 1
 
 2287                  CALL dbt_default_distvec(batch_size, pdims(1), &
 
 2288                                           bs_1(ind1:ind2), dist_1(ind1:ind2))
 
 2291               CALL dbt_default_distvec(tdims(1), pdims(1), bs_1, dist_1)
 
 2294         IF (ndims_tensor(
tensor) >= 2) 
THEN 
 2295            ALLOCATE (bs_2(dbt_nblks_total(
tensor, 2)))
 
 2296            CALL get_ith_array(
tensor%blk_sizes, 2, bs_2)
 
 2297            ALLOCATE (dist_2(tdims(2)))
 
 2299            IF (mem_aware(2)) 
THEN 
 2300               DO ibatch = 1, nbatch(2)
 
 2301                  ind1 = batch_range_2(ibatch)
 
 2302                  ind2 = batch_range_2(ibatch + 1) - 1
 
 2303                  batch_size = ind2 - ind1 + 1
 
 2304                  CALL dbt_default_distvec(batch_size, pdims(2), &
 
 2305                                           bs_2(ind1:ind2), dist_2(ind1:ind2))
 
 2308               CALL dbt_default_distvec(tdims(2), pdims(2), bs_2, dist_2)
 
 2311         IF (ndims_tensor(
tensor) >= 3) 
THEN 
 2312            ALLOCATE (bs_3(dbt_nblks_total(
tensor, 3)))
 
 2313            CALL get_ith_array(
tensor%blk_sizes, 3, bs_3)
 
 2314            ALLOCATE (dist_3(tdims(3)))
 
 2316            IF (mem_aware(3)) 
THEN 
 2317               DO ibatch = 1, nbatch(3)
 
 2318                  ind1 = batch_range_3(ibatch)
 
 2319                  ind2 = batch_range_3(ibatch + 1) - 1
 
 2320                  batch_size = ind2 - ind1 + 1
 
 2321                  CALL dbt_default_distvec(batch_size, pdims(3), &
 
 2322                                           bs_3(ind1:ind2), dist_3(ind1:ind2))
 
 2325               CALL dbt_default_distvec(tdims(3), pdims(3), bs_3, dist_3)
 
 2328         IF (ndims_tensor(
tensor) >= 4) 
THEN 
 2329            ALLOCATE (bs_4(dbt_nblks_total(
tensor, 4)))
 
 2330            CALL get_ith_array(
tensor%blk_sizes, 4, bs_4)
 
 2331            ALLOCATE (dist_4(tdims(4)))
 
 2333            IF (mem_aware(4)) 
THEN 
 2334               DO ibatch = 1, nbatch(4)
 
 2335                  ind1 = batch_range_4(ibatch)
 
 2336                  ind2 = batch_range_4(ibatch + 1) - 1
 
 2337                  batch_size = ind2 - ind1 + 1
 
 2338                  CALL dbt_default_distvec(batch_size, pdims(4), &
 
 2339                                           bs_4(ind1:ind2), dist_4(ind1:ind2))
 
 2342               CALL dbt_default_distvec(tdims(4), pdims(4), bs_4, dist_4)
 
 2346      CALL dbt_get_mapping_info(
tensor%nd_index_blk, map1_2d=map1, map2_2d=map2)
 
 2347         IF (ndims_tensor(
tensor) == 2) 
THEN 
 2348            CALL dbt_distribution_new(dist, pgrid, dist_1, dist_2)
 
 2349            CALL dbt_create(t_tmp, name, dist, map1, map2, bs_1, bs_2)
 
 2351         IF (ndims_tensor(
tensor) == 3) 
THEN 
 2352            CALL dbt_distribution_new(dist, pgrid, dist_1, dist_2, dist_3)
 
 2353            CALL dbt_create(t_tmp, name, dist, map1, map2, bs_1, bs_2, bs_3)
 
 2355         IF (ndims_tensor(
tensor) == 4) 
THEN 
 2356            CALL dbt_distribution_new(dist, pgrid, dist_1, dist_2, dist_3, dist_4)
 
 2357            CALL dbt_create(t_tmp, name, dist, map1, map2, bs_1, bs_2, bs_3, bs_4)
 
 2359      CALL dbt_distribution_destroy(dist)
 
 2361      IF (
PRESENT(nodata)) 
THEN 
 2362         IF (.NOT. nodata) 
CALL dbt_copy_expert(
tensor, t_tmp, move_data=.true.)
 
 2364         CALL dbt_copy_expert(
tensor, t_tmp, move_data=.true.)
 
 2367      CALL dbt_copy_contraction_storage(
tensor, t_tmp)
 
 2372      IF (
PRESENT(unit_nr)) 
THEN 
 2373         IF (unit_nr > 0) 
THEN 
 2374            WRITE (unit_nr, 
"(T2,A,1X,A)") 
"OPTIMIZED PGRID INFO FOR", trim(
tensor%name)
 
 2375            WRITE (unit_nr, 
"(T4,A,1X,3I6)") 
"process grid dimensions:", pdims
 
 2376            CALL dbt_write_split_info(pgrid, unit_nr)
 
 2380      IF (
PRESENT(pgrid_changed)) pgrid_changed = .true.
 
 2382      CALL timestop(handle)
 
 2389   SUBROUTINE dbt_change_pgrid_2d(tensor, mp_comm, pdims, nodata, nsplit, dimsplit, pgrid_changed, unit_nr)
 
 2390      TYPE(dbt_type), 
INTENT(INOUT)                  :: 
tensor 
 2391      TYPE(mp_cart_type), 
INTENT(IN)               :: mp_comm
 
 2392      INTEGER, 
DIMENSION(2), 
INTENT(IN), 
OPTIONAL :: pdims
 
 2393      LOGICAL, 
INTENT(IN), 
OPTIONAL                      :: nodata
 
 2394      INTEGER, 
INTENT(IN), 
OPTIONAL :: nsplit, dimsplit
 
 2395      LOGICAL, 
INTENT(OUT), 
OPTIONAL :: pgrid_changed
 
 2396      INTEGER, 
INTENT(IN), 
OPTIONAL                      :: unit_nr
 
 2397      INTEGER, 
DIMENSION(ndims_matrix_row(tensor)) :: map1
 
 2398      INTEGER, 
DIMENSION(ndims_matrix_column(tensor)) :: map2
 
 2399      INTEGER, 
DIMENSION(ndims_tensor(tensor)) :: dims, nbatches
 
 2400      TYPE(dbt_pgrid_type) :: pgrid
 
 2401      INTEGER, 
DIMENSION(:), 
ALLOCATABLE :: batch_range_1, batch_range_2, batch_range_3, batch_range_4
 
 2402      INTEGER, 
DIMENSION(:), 
ALLOCATABLE :: array
 
 2405      CALL dbt_get_mapping_info(
tensor%pgrid%nd_index_grid, map1_2d=map1, map2_2d=map2)
 
 2406      CALL blk_dims_tensor(
tensor, dims)
 
 2408      IF (
ALLOCATED(
tensor%contraction_storage)) 
THEN 
 2409         associate(batch_ranges => 
tensor%contraction_storage%batch_ranges)
 
 2410            nbatches = sizes_of_arrays(
tensor%contraction_storage%batch_ranges) - 1
 
 2414            DO idim = 1, ndims_tensor(
tensor)
 
 2415               CALL get_ith_array(
tensor%contraction_storage%batch_ranges, idim, array)
 
 2416               dims(idim) = array(nbatches(idim) + 1) - array(1)
 
 2418               dims(idim) = dims(idim)/nbatches(idim)
 
 2419               IF (dims(idim) <= 0) dims(idim) = 1
 
 2424      pgrid = dbt_nd_mp_comm(mp_comm, map1, map2, pdims_2d=pdims, tdims=dims, nsplit=nsplit, dimsplit=dimsplit)
 
 2425      IF (
ALLOCATED(
tensor%contraction_storage)) 
THEN 
 2426            IF (ndims_tensor(
tensor) == 1) 
THEN 
 2427               CALL get_arrays(
tensor%contraction_storage%batch_ranges, batch_range_1)
 
 2428               CALL dbt_change_pgrid(
tensor, pgrid, batch_range_1, &
 
 2429                                     nodata=nodata, pgrid_changed=pgrid_changed, unit_nr=unit_nr)
 
 2431            IF (ndims_tensor(
tensor) == 2) 
THEN 
 2432               CALL get_arrays(
tensor%contraction_storage%batch_ranges, batch_range_1, batch_range_2)
 
 2433               CALL dbt_change_pgrid(
tensor, pgrid, batch_range_1, batch_range_2, &
 
 2434                                     nodata=nodata, pgrid_changed=pgrid_changed, unit_nr=unit_nr)
 
 2436            IF (ndims_tensor(
tensor) == 3) 
THEN 
 2437               CALL get_arrays(
tensor%contraction_storage%batch_ranges, batch_range_1, batch_range_2, batch_range_3)
 
 2438               CALL dbt_change_pgrid(
tensor, pgrid, batch_range_1, batch_range_2, batch_range_3, &
 
 2439                                     nodata=nodata, pgrid_changed=pgrid_changed, unit_nr=unit_nr)
 
 2441            IF (ndims_tensor(
tensor) == 4) 
THEN 
 2442               CALL get_arrays(
tensor%contraction_storage%batch_ranges, batch_range_1, batch_range_2, batch_range_3, batch_range_4)
 
 2443               CALL dbt_change_pgrid(
tensor, pgrid, batch_range_1, batch_range_2, batch_range_3, batch_range_4, &
 
 2444                                     nodata=nodata, pgrid_changed=pgrid_changed, unit_nr=unit_nr)
 
 2447         CALL dbt_change_pgrid(
tensor, pgrid, nodata=nodata, pgrid_changed=pgrid_changed, unit_nr=unit_nr)
 
 2449      CALL dbt_pgrid_destroy(pgrid)
 
logical function, public dbcsr_has_symmetry(matrix)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_desymmetrize(matrix_a, matrix_b)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_clear(matrix)
...
subroutine, public dbcsr_put_block(matrix, row, col, block, summation)
...
subroutine, public dbm_clear(matrix)
Remove all blocks from given matrix, but does not release the underlying memory.
Wrapper for allocating, copying and reshaping arrays.
Representation of arbitrary number of 1d integer arrays with arbitrary sizes. This is needed for gene...
subroutine, public get_arrays(list, data_1, data_2, data_3, data_4, i_selected)
Get all arrays contained in list.
subroutine, public create_array_list(list, ndata, data_1, data_2, data_3, data_4)
collects any number of arrays of different sizes into a single array (listcol_data),...
subroutine, public destroy_array_list(list)
destroy array list.
integer function, dimension(:), allocatable, public sizes_of_arrays(list)
sizes of arrays stored in list
type(array_list) function, public array_sublist(list, i_selected)
extract a subset of arrays
subroutine, public reorder_arrays(list_in, list_out, order)
reorder array list.
logical function, public check_equal(list1, list2)
check whether two array lists are equal
Methods to operate on n-dimensional tensor blocks.
elemental logical function, public checker_tr(row, column)
Determines whether a transpose must be applied.
logical function, public dbt_iterator_blocks_left(iterator)
Generalization of block_iterator_blocks_left for tensors.
subroutine, public destroy_block(block)
pure integer function, public ndims_iterator(iterator)
Number of dimensions.
subroutine, public dbt_iterator_stop(iterator)
Generalization of block_iterator_stop for tensors.
subroutine, public dbt_iterator_start(iterator, tensor)
Generalization of block_iterator_start for tensors.
subroutine, public dbt_iterator_next_block(iterator, ind_nd, blk_size, blk_offset)
iterate over nd blocks of an nd rank tensor, index only (blocks must be retrieved by calling dbt_get_...
tensor index and mapping to DBM index
pure integer function, public ndims_mapping_row(map)
how many tensor dimensions are mapped to matrix row
pure integer function, dimension(size(order)), public dbt_inverse_order(order)
Invert order.
pure integer function, public ndims_mapping(map)
subroutine, public permute_index(map_in, map_out, order)
reorder tensor index (no data)
pure subroutine, public dbt_get_mapping_info(map, ndim_nd, ndim1_2d, ndim2_2d, dims_2d_i8, dims_2d, dims_nd, dims1_2d, dims2_2d, map1_2d, map2_2d, map_nd, base, col_major)
get mapping info
pure integer function, public ndims_mapping_column(map)
how many tensor dimensions are mapped to matrix column
pure integer function, dimension(map%ndim_nd), public get_nd_indices_tensor(map, ind_in)
transform 2d index to nd index, using info from index mapping.
DBT tensor Input / Output.
subroutine, public dbt_write_tensor_info(tensor, unit_nr, full_info)
Write tensor global info: block dimensions, full dimensions and process grid dimensions.
subroutine, public dbt_write_tensor_dist(tensor, unit_nr)
Write info on tensor distribution & load balance.
subroutine, public dbt_write_split_info(pgrid, unit_nr)
integer function, public prep_output_unit(unit_nr)
DBT tensor framework for block-sparse tensor contraction. Representation of n-rank tensors as DBT tal...
subroutine, public dbt_copy(tensor_in, tensor_out, order, summation, bounds, move_data, unit_nr)
Copy tensor data. Redistributes tensor data according to distributions of target and source tensor....
subroutine, public dbt_batched_contract_finalize(tensor, unit_nr)
finalize batched contraction. This performs all communication that has been postponed in the contract...
subroutine, public dbt_copy_matrix_to_tensor(matrix_in, tensor_out, summation)
copy matrix to tensor.
subroutine, public dbt_contract(alpha, tensor_1, tensor_2, beta, tensor_3, contract_1, notcontract_1, contract_2, notcontract_2, map_1, map_2, bounds_1, bounds_2, bounds_3, optimize_dist, pgrid_opt_1, pgrid_opt_2, pgrid_opt_3, filter_eps, flop, move_data, retain_sparsity, unit_nr, log_verbose)
Contract tensors by multiplying matrix representations. tensor_3(map_1, map_2) := alpha * tensor_1(no...
subroutine, public dbt_copy_tensor_to_matrix(tensor_in, matrix_out, summation)
copy tensor to matrix
subroutine, public dbt_batched_contract_init(tensor, batch_range_1, batch_range_2, batch_range_3, batch_range_4)
Initialize batched contraction for this tensor.
Routines to reshape / redistribute tensors.
subroutine, public dbt_reshape(tensor_in, tensor_out, summation, move_data)
copy data (involves reshape) tensor_out = tensor_out + tensor_in move_data memory optimization: trans...
Routines to split blocks and to convert between tensors with different block sizes.
subroutine, public dbt_split_copyback(tensor_split_in, tensor_out, summation)
Copy tensor with split blocks to tensor with original block sizes.
subroutine, public dbt_make_compatible_blocks(tensor1, tensor2, tensor1_split, tensor2_split, order, nodata1, nodata2, move_data)
split two tensors with same total sizes but different block sizes such that they have equal block siz...
subroutine, public dbt_crop(tensor_in, tensor_out, bounds, move_data)
Tall-and-skinny matrices: base routines similar to DBM API, mostly wrappers around existing DBM routi...
subroutine, public dbt_tas_get_info(matrix, nblkrows_total, nblkcols_total, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, distribution, name)
...
subroutine, public dbt_tas_copy(matrix_b, matrix_a, summation)
Copy matrix_a to matrix_b.
subroutine, public dbt_tas_finalize(matrix)
...
type(dbt_tas_split_info) function, pointer, public dbt_tas_info(matrix)
get info on mpi grid splitting
Matrix multiplication for tall-and-skinny matrices. This uses the k-split (non-recursive) CARMA algor...
subroutine, public dbt_tas_set_batched_state(matrix, state, opt_grid)
set state flags during batched multiplication
subroutine, public dbt_tas_batched_mm_init(matrix)
...
subroutine, public dbt_tas_batched_mm_finalize(matrix)
...
recursive subroutine, public dbt_tas_multiply(transa, transb, transc, alpha, matrix_a, matrix_b, beta, matrix_c, optimize_dist, split_opt, filter_eps, flop, move_data_a, move_data_b, retain_sparsity, simple_split, unit_nr, log_verbose)
tall-and-skinny matrix-matrix multiplication. Undocumented dummy arguments are identical to arguments...
subroutine, public dbt_tas_batched_mm_complete(matrix, warn)
...
methods to split tall-and-skinny matrices along longest dimension. Basically, we are splitting proces...
subroutine, public dbt_tas_release_info(split_info)
...
integer, parameter, public rowsplit
integer, parameter, public colsplit
subroutine, public dbt_tas_create_split(split_info, mp_comm, split_rowcol, nsplit, own_comm, opt_nsplit)
Split Cartesian process grid using a default split heuristic.
real(dp), parameter, public default_nsplit_accept_ratio
real(dp), parameter, public default_pdims_accept_ratio
subroutine, public dbt_tas_info_hold(split_info)
...
DBT tall-and-skinny base types. Mostly wrappers around existing DBM routines.
DBT tensor framework for block-sparse tensor contraction: Types and create/destroy routines.
subroutine, public dbt_pgrid_destroy(pgrid, keep_comm)
destroy process grid
subroutine, public dbt_distribution_new(dist, pgrid, nd_dist_1, nd_dist_2, nd_dist_3, nd_dist_4)
Create a tensor distribution.
subroutine, public blk_dims_tensor(tensor, dims)
tensor block dimensions
subroutine, public dims_tensor(tensor, dims)
tensor dimensions
subroutine, public dbt_copy_contraction_storage(tensor_in, tensor_out)
type(dbt_pgrid_type) function, public dbt_nd_mp_comm(comm_2d, map1_2d, map2_2d, dims_nd, dims1_nd, dims2_nd, pdims_2d, tdims, nsplit, dimsplit)
Create a default nd process topology that is consistent with a given 2d topology. Purpose: a nd tenso...
subroutine, public dbt_destroy(tensor)
Destroy a tensor.
pure integer function, public dbt_max_nblks_local(tensor)
returns an estimate of maximum number of local blocks in tensor (irrespective of the actual number of...
subroutine, public dbt_get_info(tensor, nblks_total, nfull_total, nblks_local, nfull_local, pdims, my_ploc, blks_local_1, blks_local_2, blks_local_3, blks_local_4, proc_dist_1, proc_dist_2, proc_dist_3, proc_dist_4, blk_size_1, blk_size_2, blk_size_3, blk_size_4, blk_offset_1, blk_offset_2, blk_offset_3, blk_offset_4, distribution, name)
As block_get_info but for tensors.
subroutine, public dbt_distribution_new_expert(dist, pgrid, map1_2d, map2_2d, nd_dist_1, nd_dist_2, nd_dist_3, nd_dist_4, own_comm)
Create a tensor distribution.
type(dbt_distribution_type) function, public dbt_distribution(tensor)
get distribution from tensor
pure integer function, public ndims_tensor(tensor)
tensor rank
pure integer function, public dbt_nblks_total(tensor, idim)
total numbers of blocks along dimension idim
pure integer function, public dbt_get_num_blocks(tensor)
As block_get_num_blocks: get number of local blocks.
subroutine, public dbt_default_distvec(nblk, nproc, blk_size, dist)
get a load-balanced and randomized distribution along one tensor dimension
subroutine, public dbt_hold(tensor)
reference counting for tensors (only needed for communicator handle that must be freed when no longer...
subroutine, public dbt_clear(tensor)
Clear tensor (s.t. it does not contain any blocks)
subroutine, public dbt_finalize(tensor)
Finalize tensor, as block_finalize. This should be taken care of internally in DBT tensors,...
subroutine, public mp_environ_pgrid(pgrid, dims, task_coor)
as mp_environ but for special pgrid type
subroutine, public dbt_get_stored_coordinates(tensor, ind_nd, processor)
Generalization of block_get_stored_coordinates for tensors.
integer(kind=int_8) function, public dbt_get_num_blocks_total(tensor)
Get total number of blocks.
pure integer(int_8) function, public ndims_matrix_row(tensor)
how many tensor dimensions are mapped to matrix row
pure integer(int_8) function, public ndims_matrix_column(tensor)
how many tensor dimensions are mapped to matrix column
subroutine, public dbt_filter(tensor, eps)
As block_filter.
subroutine, public dbt_distribution_destroy(dist)
Destroy tensor distribution.
subroutine, public dbt_scale(tensor, alpha)
as block_scale
Defines the basic variable types.
integer, parameter, public int_8
integer, parameter, public dp
integer, parameter, public default_string_length
Interface to the message passing library MPI.
All kind of helpful little routines.