837 node_of_domain, job_type)
841 INTENT(INOUT) :: submatrix
844 INTEGER,
DIMENSION(:),
INTENT(IN) :: node_of_domain
845 INTEGER,
INTENT(IN) :: job_type
847 CHARACTER(len=*),
PARAMETER :: routinen =
'construct_submatrices'
849 CHARACTER :: matrix_type
850 INTEGER :: block_node, block_offset, col, col_offset, col_size, dest_node, groupid, handle, &
851 iblock, icol, idomain, index_col, index_ec, index_er, index_row, index_sc, index_sr, &
852 inode, ldesc, mynode, nblkcols_tot, nblkrows_tot, ndomains, ndomains2, nnodes, &
853 recv_size2_total, recv_size_total, row, row_size, send_size2_total, send_size_total, &
854 smcol, smrow, start_data
855 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: first_col, first_row, offset2_block, offset_block, &
856 recv_data2, recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
857 send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
858 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: recv_descriptor, send_descriptor
859 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
860 LOGICAL :: found, transp
861 REAL(kind=
dp) :: antifactor
862 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_data, send_data
863 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block_p
872 CALL timeset(routinen, handle)
874 CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
875 ndomains = nblkcols_tot
879 CALL group%set_handle(groupid)
884 ALLOCATE (send_descriptor(ldesc, nnodes))
885 ALLOCATE (recv_descriptor(ldesc, nnodes))
886 send_descriptor(:, :) = 0
890 DO idomain = 1, ndomains
892 dest_node = node_of_domain(idomain)
896 IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
897 index_er = domain_map%index1(idomain) - 1
899 DO index_row = index_sr, index_er
901 row = domain_map%pairs(index_row, 1)
906 IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
907 index_ec = domain_map%index1(idomain) - 1
914 DO index_col = index_sc, index_ec
917 col = domain_map%pairs(index_col, 1)
924 row, col, block_node)
925 IF (block_node .EQ. mynode)
THEN
928 send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
929 send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
968 CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
970 ALLOCATE (send_size_cpu(nnodes), send_offset_cpu(nnodes))
971 send_offset_cpu(1) = 0
972 send_size_cpu(1) = send_descriptor(2, 1)
974 send_size_cpu(inode) = send_descriptor(2, inode)
975 send_offset_cpu(inode) = send_offset_cpu(inode - 1) + &
976 send_size_cpu(inode - 1)
978 send_size_total = send_offset_cpu(nnodes) + send_size_cpu(nnodes)
980 ALLOCATE (recv_size_cpu(nnodes), recv_offset_cpu(nnodes))
981 recv_offset_cpu(1) = 0
982 recv_size_cpu(1) = recv_descriptor(2, 1)
984 recv_size_cpu(inode) = recv_descriptor(2, inode)
985 recv_offset_cpu(inode) = recv_offset_cpu(inode - 1) + &
986 recv_size_cpu(inode - 1)
988 recv_size_total = recv_offset_cpu(nnodes) + recv_size_cpu(nnodes)
990 ALLOCATE (send_size2_cpu(nnodes), send_offset2_cpu(nnodes))
991 send_offset2_cpu(1) = 0
992 send_size2_cpu(1) = 2*send_descriptor(1, 1)
994 send_size2_cpu(inode) = 2*send_descriptor(1, inode)
995 send_offset2_cpu(inode) = send_offset2_cpu(inode - 1) + &
996 send_size2_cpu(inode - 1)
998 send_size2_total = send_offset2_cpu(nnodes) + send_size2_cpu(nnodes)
1000 ALLOCATE (recv_size2_cpu(nnodes), recv_offset2_cpu(nnodes))
1001 recv_offset2_cpu(1) = 0
1002 recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
1003 DO inode = 2, nnodes
1004 recv_size2_cpu(inode) = 2*recv_descriptor(1, inode)
1005 recv_offset2_cpu(inode) = recv_offset2_cpu(inode - 1) + &
1006 recv_size2_cpu(inode - 1)
1008 recv_size2_total = recv_offset2_cpu(nnodes) + recv_size2_cpu(nnodes)
1010 DEALLOCATE (send_descriptor)
1011 DEALLOCATE (recv_descriptor)
1014 ALLOCATE (send_data(send_size_total))
1015 ALLOCATE (recv_data(recv_size_total))
1016 ALLOCATE (send_data2(send_size2_total))
1017 ALLOCATE (recv_data2(recv_size2_total))
1018 ALLOCATE (offset_block(nnodes))
1019 ALLOCATE (offset2_block(nnodes))
1021 offset2_block(:) = 0
1023 DO idomain = 1, ndomains
1025 dest_node = node_of_domain(idomain)
1029 IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
1030 index_er = domain_map%index1(idomain) - 1
1032 DO index_row = index_sr, index_er
1034 row = domain_map%pairs(index_row, 1)
1039 IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
1040 index_ec = domain_map%index1(idomain) - 1
1047 DO index_col = index_sc, index_ec
1050 col = domain_map%pairs(index_col, 1)
1057 row, col, block_node)
1058 IF (block_node .EQ. mynode)
THEN
1070 col_offset = row_size*col_size
1071 start_data = send_offset_cpu(dest_node + 1) + &
1072 offset_block(dest_node + 1)
1073 send_data(start_data + 1:start_data + col_offset) = reshape(block_p, [col_offset])
1074 offset_block(dest_node + 1) = offset_block(dest_node + 1) + col_offset
1076 send_data2(send_offset2_cpu(dest_node + 1) + &
1077 offset2_block(dest_node + 1) + 1) = row
1078 send_data2(send_offset2_cpu(dest_node + 1) + &
1079 offset2_block(dest_node + 1) + 2) = col
1080 offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
1130 CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
1131 recv_data, recv_size_cpu, recv_offset_cpu)
1133 CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
1134 recv_data2, recv_size2_cpu, recv_offset2_cpu)
1136 DEALLOCATE (send_size_cpu, send_offset_cpu)
1137 DEALLOCATE (send_size2_cpu, send_offset2_cpu)
1138 DEALLOCATE (send_data)
1139 DEALLOCATE (send_data2)
1140 DEALLOCATE (offset_block)
1141 DEALLOCATE (offset2_block)
1144 CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
1148 ndomains2 =
SIZE(submatrix)
1149 IF (ndomains2 .NE. ndomains)
THEN
1150 cpabort(
"wrong submatrix size")
1153 submatrix(:)%nnodes = nnodes
1154 submatrix(:)%group = group
1155 submatrix(:)%nrows = 0
1156 submatrix(:)%ncols = 0
1158 ALLOCATE (first_row(nblkrows_tot), first_col(nblkcols_tot))
1159 submatrix(:)%domain = -1
1160 DO idomain = 1, ndomains
1161 dest_node = node_of_domain(idomain)
1165 IF (dest_node .EQ. mynode)
THEN
1166 submatrix(idomain)%domain = idomain
1167 submatrix(idomain)%nbrows = 0
1168 submatrix(idomain)%nbcols = 0
1173 IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
1174 index_er = domain_map%index1(idomain) - 1
1175 DO index_row = index_sr, index_er
1176 row = domain_map%pairs(index_row, 1)
1179 first_row(row) = submatrix(idomain)%nrows + 1
1180 submatrix(idomain)%nrows = submatrix(idomain)%nrows + row_blk_size(row)
1181 submatrix(idomain)%nbrows = submatrix(idomain)%nbrows + 1
1184 ALLOCATE (submatrix(idomain)%dbcsr_row(submatrix(idomain)%nbrows))
1185 ALLOCATE (submatrix(idomain)%size_brow(submatrix(idomain)%nbrows))
1189 IF (idomain .GT. 1) index_sr = domain_map%index1(idomain - 1)
1190 index_er = domain_map%index1(idomain) - 1
1191 DO index_row = index_sr, index_er
1192 row = domain_map%pairs(index_row, 1)
1195 submatrix(idomain)%dbcsr_row(smrow) = row
1196 submatrix(idomain)%size_brow(smrow) = row_blk_size(row)
1206 IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
1207 index_ec = domain_map%index1(idomain) - 1
1213 DO index_col = index_sc, index_ec
1215 col = domain_map%pairs(index_col, 1)
1226 first_col(col) = submatrix(idomain)%ncols + 1
1227 submatrix(idomain)%ncols = submatrix(idomain)%ncols + col_blk_size(col)
1228 submatrix(idomain)%nbcols = submatrix(idomain)%nbcols + 1
1232 ALLOCATE (submatrix(idomain)%dbcsr_col(submatrix(idomain)%nbcols))
1233 ALLOCATE (submatrix(idomain)%size_bcol(submatrix(idomain)%nbcols))
1240 IF (idomain .GT. 1) index_sc = domain_map%index1(idomain - 1)
1241 index_ec = domain_map%index1(idomain) - 1
1247 DO index_col = index_sc, index_ec
1249 col = domain_map%pairs(index_col, 1)
1255 submatrix(idomain)%dbcsr_col(smcol) = col
1256 submatrix(idomain)%size_bcol(smcol) = col_blk_size(col)
1261 ALLOCATE (submatrix(idomain)%mdata( &
1262 submatrix(idomain)%nrows, &
1263 submatrix(idomain)%ncols))
1264 submatrix(idomain)%mdata(:, :) = 0.0_dp
1265 DO inode = 1, nnodes
1267 DO iblock = 1, recv_size2_cpu(inode)/2
1269 row = recv_data2(recv_offset2_cpu(inode) + (iblock - 1)*2 + 1)
1270 col = recv_data2(recv_offset2_cpu(inode) + (iblock - 1)*2 + 2)
1272 IF ((first_col(col) .NE. -1) .AND. (first_row(row) .NE. -1))
THEN
1274 start_data = recv_offset_cpu(inode) + block_offset + 1
1275 DO icol = 0, col_blk_size(col) - 1
1276 submatrix(idomain)%mdata(first_row(row): &
1277 first_row(row) + row_blk_size(row) - 1, &
1278 first_col(col) + icol) = &
1279 recv_data(start_data:start_data + row_blk_size(row) - 1)
1280 start_data = start_data + row_blk_size(row)
1283 IF (matrix_type == dbcsr_type_symmetric .OR. &
1284 matrix_type == dbcsr_type_antisymmetric)
THEN
1287 IF (matrix_type == dbcsr_type_antisymmetric)
THEN
1288 antifactor = -1.0_dp
1290 start_data = recv_offset_cpu(inode) + block_offset + 1
1291 DO icol = 0, col_blk_size(col) - 1
1292 submatrix(idomain)%mdata(first_row(col) + icol, &
1294 first_col(row) + row_blk_size(row) - 1) = &
1295 antifactor*recv_data(start_data: &
1296 start_data + row_blk_size(row) - 1)
1297 start_data = start_data + row_blk_size(row)
1299 ELSE IF (matrix_type == dbcsr_type_no_symmetry)
THEN
1301 cpabort(
"matrix type is NYI")
1305 block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
1311 DEALLOCATE (recv_size_cpu, recv_offset_cpu)
1312 DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
1313 DEALLOCATE (recv_data)
1314 DEALLOCATE (recv_data2)
1316 DEALLOCATE (first_row, first_col)
1318 CALL timestop(handle)
1335 INTENT(IN) :: submatrix
1336 TYPE(
dbcsr_type),
INTENT(IN) :: distr_pattern
1338 CHARACTER(len=*),
PARAMETER :: routinen =
'construct_dbcsr_from_submatrices'
1340 CHARACTER :: matrix_type
1341 INTEGER :: block_offset, col, col_offset, colsize, dest_node, groupid, handle, iblock, icol, &
1342 idomain, inode, irow_subm, ldesc, mynode, nblkcols_tot, nblkrows_tot, ndomains, &
1343 ndomains2, nnodes, recv_size2_total, recv_size_total, row, rowsize, send_size2_total, &
1344 send_size_total, smroff, start_data, unit_nr
1345 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: offset2_block, offset_block, recv_data2, &
1346 recv_offset2_cpu, recv_offset_cpu, recv_size2_cpu, recv_size_cpu, send_data2, &
1347 send_offset2_cpu, send_offset_cpu, send_size2_cpu, send_size_cpu
1348 INTEGER,
ALLOCATABLE,
DIMENSION(:, :) :: recv_descriptor, send_descriptor
1349 INTEGER,
DIMENSION(:),
POINTER :: col_blk_size, row_blk_size
1351 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:) :: recv_data, send_data
1352 REAL(kind=
dp),
ALLOCATABLE,
DIMENSION(:, :) :: new_block
1353 REAL(kind=
dp),
DIMENSION(:, :),
POINTER :: block_p
1359 CALL timeset(routinen, handle)
1363 IF (logger%para_env%is_source())
THEN
1369 CALL dbcsr_get_info(matrix, nblkrows_total=nblkrows_tot, nblkcols_total=nblkcols_tot)
1370 ndomains = nblkcols_tot
1371 ndomains2 =
SIZE(submatrix)
1373 IF (ndomains .NE. ndomains2)
THEN
1374 cpabort(
"domain mismatch")
1380 CALL group%set_handle(groupid)
1383 IF (matrix_type .NE. dbcsr_type_no_symmetry)
THEN
1384 cpabort(
"only non-symmetric matrices so far")
1391 block_p(:, :) = 0.0_dp
1399 ALLOCATE (send_descriptor(ldesc, nnodes))
1400 ALLOCATE (recv_descriptor(ldesc, nnodes))
1401 send_descriptor(:, :) = 0
1404 DO idomain = 1, ndomains
1406 IF (submatrix(idomain)%domain .GT. 0)
THEN
1408 DO irow_subm = 1, submatrix(idomain)%nbrows
1410 IF (submatrix(idomain)%nbcols .NE. 1)
THEN
1411 cpabort(
"corrupt submatrix structure")
1414 row = submatrix(idomain)%dbcsr_row(irow_subm)
1415 col = submatrix(idomain)%dbcsr_col(1)
1417 IF (col .NE. idomain)
THEN
1418 cpabort(
"corrupt submatrix structure")
1423 row, idomain, dest_node)
1425 send_descriptor(1, dest_node + 1) = send_descriptor(1, dest_node + 1) + 1
1426 send_descriptor(2, dest_node + 1) = send_descriptor(2, dest_node + 1) + &
1427 submatrix(idomain)%size_brow(irow_subm)* &
1428 submatrix(idomain)%size_bcol(1)
1437 CALL group%alltoall(send_descriptor, recv_descriptor, ldesc)
1439 ALLOCATE (send_size_cpu(nnodes), send_offset_cpu(nnodes))
1440 send_offset_cpu(1) = 0
1441 send_size_cpu(1) = send_descriptor(2, 1)
1442 DO inode = 2, nnodes
1443 send_size_cpu(inode) = send_descriptor(2, inode)
1444 send_offset_cpu(inode) = send_offset_cpu(inode - 1) + &
1445 send_size_cpu(inode - 1)
1447 send_size_total = send_offset_cpu(nnodes) + send_size_cpu(nnodes)
1449 ALLOCATE (recv_size_cpu(nnodes), recv_offset_cpu(nnodes))
1450 recv_offset_cpu(1) = 0
1451 recv_size_cpu(1) = recv_descriptor(2, 1)
1452 DO inode = 2, nnodes
1453 recv_size_cpu(inode) = recv_descriptor(2, inode)
1454 recv_offset_cpu(inode) = recv_offset_cpu(inode - 1) + &
1455 recv_size_cpu(inode - 1)
1457 recv_size_total = recv_offset_cpu(nnodes) + recv_size_cpu(nnodes)
1459 ALLOCATE (send_size2_cpu(nnodes), send_offset2_cpu(nnodes))
1460 send_offset2_cpu(1) = 0
1461 send_size2_cpu(1) = 2*send_descriptor(1, 1)
1462 DO inode = 2, nnodes
1463 send_size2_cpu(inode) = 2*send_descriptor(1, inode)
1464 send_offset2_cpu(inode) = send_offset2_cpu(inode - 1) + &
1465 send_size2_cpu(inode - 1)
1467 send_size2_total = send_offset2_cpu(nnodes) + send_size2_cpu(nnodes)
1469 ALLOCATE (recv_size2_cpu(nnodes), recv_offset2_cpu(nnodes))
1470 recv_offset2_cpu(1) = 0
1471 recv_size2_cpu(1) = 2*recv_descriptor(1, 1)
1472 DO inode = 2, nnodes
1473 recv_size2_cpu(inode) = 2*recv_descriptor(1, inode)
1474 recv_offset2_cpu(inode) = recv_offset2_cpu(inode - 1) + &
1475 recv_size2_cpu(inode - 1)
1477 recv_size2_total = recv_offset2_cpu(nnodes) + recv_size2_cpu(nnodes)
1479 DEALLOCATE (send_descriptor)
1480 DEALLOCATE (recv_descriptor)
1483 ALLOCATE (send_data(send_size_total))
1484 ALLOCATE (recv_data(recv_size_total))
1485 ALLOCATE (send_data2(send_size2_total))
1486 ALLOCATE (recv_data2(recv_size2_total))
1487 ALLOCATE (offset_block(nnodes))
1488 ALLOCATE (offset2_block(nnodes))
1490 offset2_block(:) = 0
1492 DO idomain = 1, ndomains
1494 IF (submatrix(idomain)%domain .GT. 0)
THEN
1497 DO irow_subm = 1, submatrix(idomain)%nbrows
1499 row = submatrix(idomain)%dbcsr_row(irow_subm)
1500 col = submatrix(idomain)%dbcsr_col(1)
1502 rowsize = submatrix(idomain)%size_brow(irow_subm)
1503 colsize = submatrix(idomain)%size_bcol(1)
1507 row, idomain, dest_node)
1511 DO icol = 1, colsize
1512 start_data = send_offset_cpu(dest_node + 1) + &
1513 offset_block(dest_node + 1) + &
1515 send_data(start_data + 1:start_data + rowsize) = &
1516 submatrix(idomain)%mdata(smroff + 1:smroff + rowsize, icol)
1517 col_offset = col_offset + rowsize
1519 offset_block(dest_node + 1) = offset_block(dest_node + 1) + &
1522 send_data2(send_offset2_cpu(dest_node + 1) + &
1523 offset2_block(dest_node + 1) + 1) = row
1524 send_data2(send_offset2_cpu(dest_node + 1) + &
1525 offset2_block(dest_node + 1) + 2) = col
1526 offset2_block(dest_node + 1) = offset2_block(dest_node + 1) + 2
1528 smroff = smroff + rowsize
1537 CALL group%alltoall(send_data, send_size_cpu, send_offset_cpu, &
1538 recv_data, recv_size_cpu, recv_offset_cpu)
1540 CALL group%alltoall(send_data2, send_size2_cpu, send_offset2_cpu, &
1541 recv_data2, recv_size2_cpu, recv_offset2_cpu)
1543 DEALLOCATE (send_size_cpu, send_offset_cpu)
1544 DEALLOCATE (send_size2_cpu, send_offset2_cpu)
1545 DEALLOCATE (send_data)
1546 DEALLOCATE (send_data2)
1547 DEALLOCATE (offset_block)
1548 DEALLOCATE (offset2_block)
1551 CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, row_blk_size=row_blk_size)
1552 DO inode = 1, nnodes
1554 DO iblock = 1, recv_size2_cpu(inode)/2
1556 row = recv_data2(recv_offset2_cpu(inode) + (iblock - 1)*2 + 1)
1557 col = recv_data2(recv_offset2_cpu(inode) + (iblock - 1)*2 + 2)
1559 start_data = recv_offset_cpu(inode) + block_offset + 1
1560 ALLOCATE (new_block(row_blk_size(row), col_blk_size(col)))
1561 DO icol = 1, col_blk_size(col)
1562 new_block(:, icol) = &
1563 recv_data(start_data:start_data + row_blk_size(row) - 1)
1564 start_data = start_data + row_blk_size(row)
1567 DEALLOCATE (new_block)
1568 block_offset = block_offset + col_blk_size(col)*row_blk_size(row)
1572 DEALLOCATE (recv_size_cpu, recv_offset_cpu)
1573 DEALLOCATE (recv_size2_cpu, recv_offset2_cpu)
1574 DEALLOCATE (recv_data)
1575 DEALLOCATE (recv_data2)
1579 CALL timestop(handle)