(git:c23c79b)
Loading...
Searching...
No Matches
cp_cfm_types.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief Represents a complex full matrix distributed on many processors.
10!> \author Joost VandeVondele, based on Fawzi's cp_fm_* routines
11! **************************************************************************************************
20 USE cp_fm_types, ONLY: cp_fm_type
21 USE kinds, ONLY: dp
22 USE mathconstants, ONLY: z_one,&
23 z_zero
31#include "../base/base_uses.f90"
32
33 IMPLICIT NONE
34 PRIVATE
35
36 LOGICAL, PRIVATE, PARAMETER :: debug_this_module = .true.
37 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'cp_cfm_types'
38 INTEGER, PARAMETER, PRIVATE :: src_tag = 3, dest_tag = 5, send_tag = 7, recv_tag = 11
39
55
56 INTERFACE cp_cfm_to_cfm
57 MODULE PROCEDURE cp_cfm_to_cfm_matrix, & ! a full matrix
58 cp_cfm_to_cfm_columns ! just a number of columns
59 END INTERFACE
60
61! **************************************************************************************************
62!> \brief Represent a complex full matrix.
63!> \param name the name of the matrix, used for printing
64!> \param matrix_struct structure of this matrix
65!> \param local_data array with the data of the matrix (its content depends on
66!> the matrix type used: in parallel run it will be in
67!> ScaLAPACK format, in sequential run it will simply contain the matrix)
68! **************************************************************************************************
70 CHARACTER(len=60) :: name = ""
71 TYPE(cp_fm_struct_type), POINTER :: matrix_struct => null()
72 COMPLEX(kind=dp), DIMENSION(:, :), POINTER, CONTIGUOUS :: local_data => null()
73 END TYPE cp_cfm_type
74
75! **************************************************************************************************
76!> \brief Just to build arrays of pointers to matrices.
77!> \param matrix the pointer to the matrix
78! **************************************************************************************************
80 TYPE(cp_cfm_type), POINTER :: matrix => null()
81 END TYPE cp_cfm_p_type
82
83! **************************************************************************************************
84!> \brief Stores the state of a copy between cp_cfm_start_copy_general
85!> and cp_cfm_finish_copy_general.
86!> \par History
87!> Jan 2017 derived type 'copy_info_type' has been created [Mark T]
88!> Jan 2018 the type 'copy_info_type' has been adapted for complex matrices [Sergey Chulkov]
89! **************************************************************************************************
91 !> number of MPI processes that send data
92 INTEGER :: send_size = -1
93 !> number of locally stored rows (1) and columns (2) of the destination matrix
94 INTEGER, DIMENSION(2) :: nlocal_recv = -1
95 !> number of rows (1) and columns (2) of the ScaLAPACK block of the source matrix
96 INTEGER, DIMENSION(2) :: nblock_src = -1
97 !> BLACS process grid shape of the source matrix: (1) nproc_row, (2) nproc_col
98 INTEGER, DIMENSION(2) :: src_num_pe = -1
99 !> displacements into recv_buf
100 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_disp
101 !> MPI requests for non-blocking receive and send operations
102 TYPE(mp_request_type), ALLOCATABLE, DIMENSION(:) :: recv_request, send_request
103 !> global column and row indices of locally stored elements of the destination matrix
104 INTEGER, DIMENSION(:), POINTER :: recv_col_indices => null(), recv_row_indices => null()
105 !> rank of MPI process with BLACS coordinates (prow, pcol)
106 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: src_blacs2mpi
107 !> receiving and sending buffers for non-blocking MPI communication
108 COMPLEX(kind=dp), ALLOCATABLE, DIMENSION(:) :: recv_buf, send_buf
109 END TYPE copy_cfm_info_type
110
111CONTAINS
112
113! **************************************************************************************************
114!> \brief Creates a new full matrix with the given structure.
115!> \param matrix matrix to be created
116!> \param matrix_struct structure of the matrix
117!> \param name name of the matrix
118!> \param nrow ...
119!> \param ncol ...
120!> \param set_zero ...
121!> \note
122!> preferred allocation routine
123! **************************************************************************************************
124 SUBROUTINE cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
125 TYPE(cp_cfm_type), INTENT(OUT) :: matrix
126 TYPE(cp_fm_struct_type), INTENT(IN), TARGET :: matrix_struct
127 CHARACTER(len=*), INTENT(in), OPTIONAL :: name
128 INTEGER, INTENT(IN), OPTIONAL :: nrow, ncol
129 LOGICAL, INTENT(IN), OPTIONAL :: set_zero
130
131 INTEGER :: ncol_global, ncol_local, nrow_global, &
132 nrow_local
133 TYPE(cp_blacs_env_type), POINTER :: context
134 TYPE(cp_fm_struct_type), POINTER :: cfm_struct
135
136 IF (PRESENT(nrow) .OR. PRESENT(ncol)) THEN
137 CALL cp_fm_struct_get(matrix_struct, nrow_global=nrow_global, ncol_global=ncol_global)
138 IF (PRESENT(nrow)) nrow_global = nrow
139 IF (PRESENT(ncol)) ncol_global = ncol
140 CALL cp_fm_struct_create(cfm_struct, template_fmstruct=matrix_struct, &
141 nrow_global=nrow_global, ncol_global=ncol_global)
142
143 context => cfm_struct%context
144 matrix%matrix_struct => cfm_struct
145 CALL cp_fm_struct_retain(matrix%matrix_struct)
146
147 nrow_local = cfm_struct%local_leading_dimension
148 ncol_local = max(1, cfm_struct%ncol_locals(context%mepos(2)))
149
150 CALL cp_fm_struct_release(cfm_struct)
151 ELSE
152 context => matrix_struct%context
153 matrix%matrix_struct => matrix_struct
154 CALL cp_fm_struct_retain(matrix%matrix_struct)
155
156 nrow_local = matrix_struct%local_leading_dimension
157 ncol_local = max(1, matrix_struct%ncol_locals(context%mepos(2)))
158 END IF
159
160 NULLIFY (matrix%local_data)
161 ALLOCATE (matrix%local_data(nrow_local, ncol_local))
162
163 IF (PRESENT(set_zero)) THEN
164 IF (set_zero) THEN
165 matrix%local_data(1:nrow_local, 1:ncol_local) = z_zero
166 END IF
167 END IF
168
169 IF (PRESENT(name)) THEN
170 matrix%name = name
171 ELSE
172 matrix%name = 'full complex matrix'
173 END IF
174
175 END SUBROUTINE cp_cfm_create
176
177! **************************************************************************************************
178!> \brief Releases a full matrix.
179!> \param matrix the matrix to release
180! **************************************************************************************************
181 SUBROUTINE cp_cfm_release(matrix)
182 TYPE(cp_cfm_type), INTENT(INOUT) :: matrix
183
184 IF (ASSOCIATED(matrix%local_data)) THEN
185 DEALLOCATE (matrix%local_data)
186 END IF
187 matrix%name = ""
188 CALL cp_fm_struct_release(matrix%matrix_struct)
189 END SUBROUTINE cp_cfm_release
190
191! **************************************************************************************************
192!> \brief Set all elements of the full matrix to alpha. Besides, set all
193!> diagonal matrix elements to beta (if given explicitly).
194!> \param matrix matrix to initialise
195!> \param alpha value of off-diagonal matrix elements
196!> \param beta value of diagonal matrix elements (equal to alpha if absent)
197!> \date 12.06.2001
198!> \author Matthias Krack
199!> \version 1.0
200! **************************************************************************************************
201 SUBROUTINE cp_cfm_set_all(matrix, alpha, beta)
202 TYPE(cp_cfm_type), INTENT(IN) :: matrix
203 COMPLEX(kind=dp), INTENT(in) :: alpha
204 COMPLEX(kind=dp), INTENT(in), OPTIONAL :: beta
205
206 INTEGER :: irow_local, nrow_local
207#if defined(__parallel)
208 INTEGER :: icol_local, ncol_local
209 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
210#endif
211
212 CALL zcopy(SIZE(matrix%local_data), alpha, 0, matrix%local_data(1, 1), 1)
213
214 IF (PRESENT(beta)) THEN
215#if defined(__parallel)
216 CALL cp_cfm_get_info(matrix, nrow_local=nrow_local, ncol_local=ncol_local, &
217 row_indices=row_indices, col_indices=col_indices)
218
219 icol_local = 1
220 irow_local = 1
221
222 DO WHILE (irow_local <= nrow_local .AND. icol_local <= ncol_local)
223 IF (row_indices(irow_local) < col_indices(icol_local)) THEN
224 irow_local = irow_local + 1
225 ELSE IF (row_indices(irow_local) > col_indices(icol_local)) THEN
226 icol_local = icol_local + 1
227 ELSE
228 matrix%local_data(irow_local, icol_local) = beta
229 irow_local = irow_local + 1
230 icol_local = icol_local + 1
231 END IF
232 END DO
233#else
234 nrow_local = min(matrix%matrix_struct%nrow_global, matrix%matrix_struct%ncol_global)
235
236 DO irow_local = 1, nrow_local
237 matrix%local_data(irow_local, irow_local) = beta
238 END DO
239#endif
240 END IF
241
242 END SUBROUTINE cp_cfm_set_all
243
244! **************************************************************************************************
245!> \brief Get the matrix element by its global index.
246!> \param matrix full matrix
247!> \param irow_global global row index
248!> \param icol_global global column index
249!> \param alpha matrix element
250!> \par History
251!> , TCH, created
252!> always return the answer
253! **************************************************************************************************
254 SUBROUTINE cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
255 TYPE(cp_cfm_type), INTENT(IN) :: matrix
256 INTEGER, INTENT(in) :: irow_global, icol_global
257 COMPLEX(kind=dp), INTENT(out) :: alpha
258
259#if defined(__parallel)
260 INTEGER :: icol_local, ipcol, iprow, irow_local, &
261 mypcol, myprow, npcol, nprow
262 INTEGER, DIMENSION(9) :: desca
263 TYPE(cp_blacs_env_type), POINTER :: context
264#endif
265
266#if defined(__parallel)
267 context => matrix%matrix_struct%context
268 myprow = context%mepos(1)
269 mypcol = context%mepos(2)
270 nprow = context%num_pe(1)
271 npcol = context%num_pe(2)
272
273 desca(:) = matrix%matrix_struct%descriptor(:)
274
275 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
276 irow_local, icol_local, iprow, ipcol)
277
278 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
279 alpha = matrix%local_data(irow_local, icol_local)
280 CALL context%ZGEBS2D('All', ' ', 1, 1, alpha, 1)
281 ELSE
282 CALL context%ZGEBR2D('All', ' ', 1, 1, alpha, 1, iprow, ipcol)
283 END IF
284
285#else
286 alpha = matrix%local_data(irow_global, icol_global)
287#endif
288
289 END SUBROUTINE cp_cfm_get_element
290
291! **************************************************************************************************
292!> \brief Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
293!> \param matrix full matrix
294!> \param irow_global global row index
295!> \param icol_global global column index
296!> \param alpha value of the matrix element
297!> \date 12.06.2001
298!> \author Matthias Krack
299!> \version 1.0
300! **************************************************************************************************
301 SUBROUTINE cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
302 TYPE(cp_cfm_type), INTENT(IN) :: matrix
303 INTEGER, INTENT(in) :: irow_global, icol_global
304 COMPLEX(kind=dp), INTENT(in) :: alpha
305
306#if defined(__parallel)
307 INTEGER :: icol_local, ipcol, iprow, irow_local, &
308 mypcol, myprow, npcol, nprow
309 INTEGER, DIMENSION(9) :: desca
310 TYPE(cp_blacs_env_type), POINTER :: context
311#endif
312
313#if defined(__parallel)
314 context => matrix%matrix_struct%context
315 myprow = context%mepos(1)
316 mypcol = context%mepos(2)
317 nprow = context%num_pe(1)
318 npcol = context%num_pe(2)
319
320 desca(:) = matrix%matrix_struct%descriptor(:)
321
322 CALL infog2l(irow_global, icol_global, desca, nprow, npcol, myprow, mypcol, &
323 irow_local, icol_local, iprow, ipcol)
324
325 IF ((iprow == myprow) .AND. (ipcol == mypcol)) THEN
326 matrix%local_data(irow_local, icol_local) = alpha
327 END IF
328
329#else
330 matrix%local_data(irow_global, icol_global) = alpha
331#endif
332
333 END SUBROUTINE cp_cfm_set_element
334
335! **************************************************************************************************
336!> \brief Extract a sub-matrix from the full matrix:
337!> op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n_rows,start_col:start_col+n_cols).
338!> Sub-matrix 'target_m' is replicated on each CPU. Using this call is expensive.
339!> \param fm full matrix you want to get the elements from
340!> \param target_m 2-D array to store the extracted sub-matrix
341!> \param start_row global row index of the matrix element target_m(1,1) (defaults to 1)
342!> \param start_col global column index of the matrix element target_m(1,1) (defaults to 1)
343!> \param n_rows number of rows to extract (defaults to size(op(target_m),1))
344!> \param n_cols number of columns to extract (defaults to size(op(target_m),2))
345!> \param transpose indicates that the extracted sub-matrix target_m should be transposed:
346!> op(target_m) = target_m^T if .TRUE.,
347!> op(target_m) = target_m if .FALSE. (defaults to false)
348!> \par History
349!> * 04.2016 created borrowing from Fawzi's cp_fm_get_submatrix [Lianheng Tong]
350!> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
351!> \author Lianheng Tong
352!> \note
353!> Optimized for full column updates. The matrix target_m is replicated and valid on all CPUs.
354! **************************************************************************************************
355 SUBROUTINE cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
356 TYPE(cp_cfm_type), INTENT(IN) :: fm
357 COMPLEX(kind=dp), DIMENSION(:, :), INTENT(out) :: target_m
358 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
359 LOGICAL, INTENT(in), OPTIONAL :: transpose
360
361 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_get_submatrix'
362
363 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
364 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
365 ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
366 start_row_global, start_row_local, this_col
367 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
368 LOGICAL :: do_zero, tr_a
369 TYPE(mp_para_env_type), POINTER :: para_env
370
371 CALL timeset(routinen, handle)
372
373 IF (SIZE(target_m) /= 0) THEN
374#if defined(__parallel)
375 do_zero = .true.
376#else
377 do_zero = .false.
378#endif
379
380 tr_a = .false.
381 IF (PRESENT(transpose)) tr_a = transpose
382
383 ! find out the first and last global row/column indices
384 start_row_global = 1
385 start_col_global = 1
386 IF (PRESENT(start_row)) start_row_global = start_row
387 IF (PRESENT(start_col)) start_col_global = start_col
388
389 IF (tr_a) THEN
390 end_row_global = SIZE(target_m, 2)
391 end_col_global = SIZE(target_m, 1)
392 ELSE
393 end_row_global = SIZE(target_m, 1)
394 end_col_global = SIZE(target_m, 2)
395 END IF
396 IF (PRESENT(n_rows)) end_row_global = n_rows
397 IF (PRESENT(n_cols)) end_col_global = n_cols
398
399 end_row_global = end_row_global + start_row_global - 1
400 end_col_global = end_col_global + start_col_global - 1
401
402 CALL cp_cfm_get_info(matrix=fm, &
403 nrow_global=nrow_global, ncol_global=ncol_global, &
404 nrow_local=nrow_local, ncol_local=ncol_local, &
405 row_indices=row_indices, col_indices=col_indices)
406 IF (end_row_global > nrow_global) THEN
407 end_row_global = nrow_global
408 do_zero = .true.
409 END IF
410 IF (end_col_global > ncol_global) THEN
411 end_col_global = ncol_global
412 do_zero = .true.
413 END IF
414
415 ! find out row/column indices of locally stored matrix elements that needs to be copied.
416 ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
417 DO start_row_local = 1, nrow_local
418 IF (row_indices(start_row_local) >= start_row_global) EXIT
419 END DO
420
421 DO end_row_local = start_row_local, nrow_local
422 IF (row_indices(end_row_local) > end_row_global) EXIT
423 END DO
424 end_row_local = end_row_local - 1
425
426 DO start_col_local = 1, ncol_local
427 IF (col_indices(start_col_local) >= start_col_global) EXIT
428 END DO
429
430 DO end_col_local = start_col_local, ncol_local
431 IF (col_indices(end_col_local) > end_col_global) EXIT
432 END DO
433 end_col_local = end_col_local - 1
434
435 para_env => fm%matrix_struct%para_env
436 local_data => fm%local_data
437
438 ! wipe the content of the target matrix if:
439 ! * the source matrix is distributed across a number of processes, or
440 ! * not all elements of the target matrix will be assigned, e.g.
441 ! when the target matrix is larger then the source matrix
442 IF (do_zero) &
443 CALL zcopy(SIZE(target_m), z_zero, 0, target_m(1, 1), 1)
444
445 IF (tr_a) THEN
446 DO j = start_col_local, end_col_local
447 this_col = col_indices(j) - start_col_global + 1
448 DO i = start_row_local, end_row_local
449 target_m(this_col, row_indices(i) - start_row_global + 1) = local_data(i, j)
450 END DO
451 END DO
452 ELSE
453 DO j = start_col_local, end_col_local
454 this_col = col_indices(j) - start_col_global + 1
455 DO i = start_row_local, end_row_local
456 target_m(row_indices(i) - start_row_global + 1, this_col) = local_data(i, j)
457 END DO
458 END DO
459 END IF
460
461 CALL para_env%sum(target_m)
462 END IF
463
464 CALL timestop(handle)
465 END SUBROUTINE cp_cfm_get_submatrix
466
467! **************************************************************************************************
468!> \brief Set a sub-matrix of the full matrix:
469!> matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
470!> = alpha*op(new_values)(1:n_rows,1:n_cols) +
471!> beta*matrix(start_row:start_row+n_rows,start_col:start_col+n_cols)
472!> \param matrix full to update
473!> \param new_values replicated 2-D array that holds new elements of the updated sub-matrix
474!> \param start_row global row index of the matrix element new_values(1,1) (defaults to 1)
475!> \param start_col global column index of the matrix element new_values(1,1) (defaults to 1)
476!> \param n_rows number of rows to update (defaults to size(op(new_values),1))
477!> \param n_cols number of columns to update (defaults to size(op(new_values),2))
478!> \param alpha scale factor for the new values (defaults to (1.0,0.0))
479!> \param beta scale factor for the old values (defaults to (0.0,0.0))
480!> \param transpose indicates that the matrix new_values should be transposed:
481!> op(new_values) = new_values^T if .TRUE.,
482!> op(new_values) = new_values if .FALSE. (defaults to false)
483!> \par History
484!> * 04.2016 created borrowing from Fawzi's cp_fm_set_submatrix [Lianheng Tong]
485!> * 01.2018 drop innermost conditional branching [Sergey Chulkov]
486!> \author Lianheng Tong
487!> \note
488!> Optimized for alpha=(1.0,0.0), beta=(0.0,0.0)
489!> All matrix elements 'new_values' need to be valid on all CPUs
490! **************************************************************************************************
491 SUBROUTINE cp_cfm_set_submatrix(matrix, new_values, start_row, &
492 start_col, n_rows, n_cols, alpha, beta, transpose)
493 TYPE(cp_cfm_type), INTENT(IN) :: matrix
494 COMPLEX(kind=dp), DIMENSION(:, :), INTENT(in) :: new_values
495 INTEGER, INTENT(in), OPTIONAL :: start_row, start_col, n_rows, n_cols
496 COMPLEX(kind=dp), INTENT(in), OPTIONAL :: alpha, beta
497 LOGICAL, INTENT(in), OPTIONAL :: transpose
498
499 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_set_submatrix'
500
501 COMPLEX(kind=dp) :: al, be
502 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: local_data
503 INTEGER :: end_col_global, end_col_local, end_row_global, end_row_local, handle, i, j, &
504 ncol_global, ncol_local, nrow_global, nrow_local, start_col_global, start_col_local, &
505 start_row_global, start_row_local, this_col
506 INTEGER, DIMENSION(:), POINTER :: col_indices, row_indices
507 LOGICAL :: tr_a
508
509 CALL timeset(routinen, handle)
510
511 al = z_one
512 be = z_zero
513 IF (PRESENT(alpha)) al = alpha
514 IF (PRESENT(beta)) be = beta
515
516 ! find out the first and last global row/column indices
517 start_row_global = 1
518 start_col_global = 1
519 IF (PRESENT(start_row)) start_row_global = start_row
520 IF (PRESENT(start_col)) start_col_global = start_col
521
522 tr_a = .false.
523 IF (PRESENT(transpose)) tr_a = transpose
524
525 IF (tr_a) THEN
526 end_row_global = SIZE(new_values, 2)
527 end_col_global = SIZE(new_values, 1)
528 ELSE
529 end_row_global = SIZE(new_values, 1)
530 end_col_global = SIZE(new_values, 2)
531 END IF
532 IF (PRESENT(n_rows)) end_row_global = n_rows
533 IF (PRESENT(n_cols)) end_col_global = n_cols
534
535 end_row_global = end_row_global + start_row_global - 1
536 end_col_global = end_col_global + start_col_global - 1
537
538 CALL cp_cfm_get_info(matrix=matrix, &
539 nrow_global=nrow_global, ncol_global=ncol_global, &
540 nrow_local=nrow_local, ncol_local=ncol_local, &
541 row_indices=row_indices, col_indices=col_indices)
542 IF (end_row_global > nrow_global) end_row_global = nrow_global
543 IF (end_col_global > ncol_global) end_col_global = ncol_global
544
545 ! find out row/column indices of locally stored matrix elements that needs to be set.
546 ! Arrays row_indices and col_indices are assumed to be sorted in ascending order
547 DO start_row_local = 1, nrow_local
548 IF (row_indices(start_row_local) >= start_row_global) EXIT
549 END DO
550
551 DO end_row_local = start_row_local, nrow_local
552 IF (row_indices(end_row_local) > end_row_global) EXIT
553 END DO
554 end_row_local = end_row_local - 1
555
556 DO start_col_local = 1, ncol_local
557 IF (col_indices(start_col_local) >= start_col_global) EXIT
558 END DO
559
560 DO end_col_local = start_col_local, ncol_local
561 IF (col_indices(end_col_local) > end_col_global) EXIT
562 END DO
563 end_col_local = end_col_local - 1
564
565 local_data => matrix%local_data
566
567 IF (al == z_one .AND. be == z_zero) THEN
568 IF (tr_a) THEN
569 DO j = start_col_local, end_col_local
570 this_col = col_indices(j) - start_col_global + 1
571 DO i = start_row_local, end_row_local
572 local_data(i, j) = new_values(this_col, row_indices(i) - start_row_global + 1)
573 END DO
574 END DO
575 ELSE
576 DO j = start_col_local, end_col_local
577 this_col = col_indices(j) - start_col_global + 1
578 DO i = start_row_local, end_row_local
579 local_data(i, j) = new_values(row_indices(i) - start_row_global + 1, this_col)
580 END DO
581 END DO
582 END IF
583 ELSE
584 IF (tr_a) THEN
585 DO j = start_col_local, end_col_local
586 this_col = col_indices(j) - start_col_global + 1
587 DO i = start_row_local, end_row_local
588 local_data(i, j) = al*new_values(this_col, row_indices(i) - start_row_global + 1) + &
589 be*local_data(i, j)
590 END DO
591 END DO
592 ELSE
593 DO j = start_col_local, end_col_local
594 this_col = col_indices(j) - start_col_global + 1
595 DO i = start_row_local, end_row_local
596 local_data(i, j) = al*new_values(row_indices(i) - start_row_global + 1, this_col) + &
597 be*local_data(i, j)
598 END DO
599 END DO
600 END IF
601 END IF
602
603 CALL timestop(handle)
604 END SUBROUTINE cp_cfm_set_submatrix
605
606! **************************************************************************************************
607!> \brief Returns information about a full matrix.
608!> \param matrix matrix
609!> \param name name of the matrix
610!> \param nrow_global total number of rows
611!> \param ncol_global total number of columns
612!> \param nrow_block number of rows per ScaLAPACK block
613!> \param ncol_block number of columns per ScaLAPACK block
614!> \param nrow_local number of locally stored rows
615!> \param ncol_local number of locally stored columns
616!> \param row_indices global indices of locally stored rows
617!> \param col_indices global indices of locally stored columns
618!> \param local_data locally stored matrix elements
619!> \param context BLACS context
620!> \param matrix_struct matrix structure
621!> \param para_env parallel environment
622!> \date 12.06.2001
623!> \author Matthias Krack
624!> \version 1.0
625! **************************************************************************************************
626 SUBROUTINE cp_cfm_get_info(matrix, name, nrow_global, ncol_global, &
627 nrow_block, ncol_block, nrow_local, ncol_local, &
628 row_indices, col_indices, local_data, context, &
629 matrix_struct, para_env)
630 TYPE(cp_cfm_type), INTENT(IN) :: matrix
631 CHARACTER(len=*), INTENT(OUT), OPTIONAL :: name
632 INTEGER, INTENT(OUT), OPTIONAL :: nrow_global, ncol_global, nrow_block, &
633 ncol_block, nrow_local, ncol_local
634 INTEGER, DIMENSION(:), OPTIONAL, POINTER :: row_indices, col_indices
635 COMPLEX(kind=dp), CONTIGUOUS, DIMENSION(:, :), &
636 OPTIONAL, POINTER :: local_data
637 TYPE(cp_blacs_env_type), OPTIONAL, POINTER :: context
638 TYPE(cp_fm_struct_type), OPTIONAL, POINTER :: matrix_struct
639 TYPE(mp_para_env_type), OPTIONAL, POINTER :: para_env
640
641 IF (PRESENT(name)) name = matrix%name
642 IF (PRESENT(matrix_struct)) matrix_struct => matrix%matrix_struct
643 IF (PRESENT(local_data)) local_data => matrix%local_data ! not hiding things anymore :-(
644
645 CALL cp_fm_struct_get(matrix%matrix_struct, nrow_local=nrow_local, &
646 ncol_local=ncol_local, nrow_global=nrow_global, &
647 ncol_global=ncol_global, nrow_block=nrow_block, &
648 ncol_block=ncol_block, context=context, &
649 row_indices=row_indices, col_indices=col_indices, para_env=para_env)
650
651 END SUBROUTINE cp_cfm_get_info
652
653! **************************************************************************************************
654!> \brief Copy content of a full matrix into another full matrix of the same size.
655!> \param source source matrix
656!> \param destination destination matrix
657!> \author Joost VandeVondele
658! **************************************************************************************************
659 SUBROUTINE cp_cfm_to_cfm_matrix(source, destination)
660 TYPE(cp_cfm_type), INTENT(IN) :: source, destination
661
662 INTEGER :: npcol, nprow
663
664 nprow = source%matrix_struct%context%num_pe(1)
665 npcol = source%matrix_struct%context%num_pe(2)
666
667 IF (.NOT. cp2k_is_parallel .OR. &
668 cp_fm_struct_equivalent(source%matrix_struct, &
669 destination%matrix_struct)) THEN
670 IF (SIZE(source%local_data, 1) /= SIZE(destination%local_data, 1) .OR. &
671 SIZE(source%local_data, 2) /= SIZE(destination%local_data, 2)) &
672 cpabort("internal local_data has different sizes")
673 CALL zcopy(SIZE(source%local_data), source%local_data(1, 1), 1, destination%local_data(1, 1), 1)
674 ELSE
675 IF (source%matrix_struct%nrow_global /= destination%matrix_struct%nrow_global) &
676 cpabort("cannot copy between full matrixes of differen sizes")
677 IF (source%matrix_struct%ncol_global /= destination%matrix_struct%ncol_global) &
678 cpabort("cannot copy between full matrixes of differen sizes")
679#if defined(__parallel)
680 CALL pzcopy(source%matrix_struct%nrow_global* &
681 source%matrix_struct%ncol_global, &
682 source%local_data(1, 1), 1, 1, source%matrix_struct%descriptor, 1, &
683 destination%local_data(1, 1), 1, 1, destination%matrix_struct%descriptor, 1)
684#else
685 cpabort("")
686#endif
687 END IF
688 END SUBROUTINE cp_cfm_to_cfm_matrix
689
690! **************************************************************************************************
691!> \brief Copy a number of sequential columns of a full matrix into another full matrix.
692!> \param msource source matrix
693!> \param mtarget destination matrix
694!> \param ncol number of columns to copy
695!> \param source_start global index of the first column to copy within the source matrix
696!> \param target_start global index of the first column to copy within the destination matrix
697! **************************************************************************************************
698 SUBROUTINE cp_cfm_to_cfm_columns(msource, mtarget, ncol, source_start, &
699 target_start)
700
701 TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
702 INTEGER, INTENT(IN) :: ncol
703 INTEGER, INTENT(IN), OPTIONAL :: source_start, target_start
704
705 CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_columns'
706
707 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: a, b
708 INTEGER :: handle, n, ss, ts
709#if defined(__parallel)
710 INTEGER :: i
711 INTEGER, DIMENSION(9) :: desca, descb
712#endif
713
714 CALL timeset(routinen, handle)
715
716 ss = 1
717 ts = 1
718
719 IF (PRESENT(source_start)) ss = source_start
720 IF (PRESENT(target_start)) ts = target_start
721
722 n = msource%matrix_struct%nrow_global
723
724 a => msource%local_data
725 b => mtarget%local_data
726
727#if defined(__parallel)
728 desca(:) = msource%matrix_struct%descriptor(:)
729 descb(:) = mtarget%matrix_struct%descriptor(:)
730 DO i = 0, ncol - 1
731 CALL pzcopy(n, a(1, 1), 1, ss + i, desca, 1, b(1, 1), 1, ts + i, descb, 1)
732 END DO
733#else
734 CALL zcopy(ncol*n, a(1, ss), 1, b(1, ts), 1)
735#endif
736
737 CALL timestop(handle)
738
739 END SUBROUTINE cp_cfm_to_cfm_columns
740
741! **************************************************************************************************
742!> \brief Copy just a triangular matrix.
743!> \param msource source matrix
744!> \param mtarget target matrix
745!> \param uplo 'U' for upper triangular, 'L' for lower triangular
746! **************************************************************************************************
747 SUBROUTINE cp_cfm_to_cfm_triangular(msource, mtarget, uplo)
748 TYPE(cp_cfm_type), INTENT(IN) :: msource, mtarget
749 CHARACTER(len=*), INTENT(IN) :: uplo
750
751 CHARACTER(len=*), PARAMETER :: routineN = 'cp_cfm_to_cfm_triangular'
752
753 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: aa, bb
754 INTEGER :: handle, ncol, nrow
755#if defined(__parallel)
756 INTEGER, DIMENSION(9) :: desca, descb
757#endif
758
759 CALL timeset(routinen, handle)
760
761 nrow = msource%matrix_struct%nrow_global
762 ncol = msource%matrix_struct%ncol_global
763
764 aa => msource%local_data
765 bb => mtarget%local_data
766
767#if defined(__parallel)
768 desca(:) = msource%matrix_struct%descriptor(:)
769 descb(:) = mtarget%matrix_struct%descriptor(:)
770 CALL pzlacpy(uplo, nrow, ncol, aa(1, 1), 1, 1, desca, bb(1, 1), 1, 1, descb)
771#else
772 CALL zlacpy(uplo, nrow, ncol, aa(1, 1), nrow, bb(1, 1), nrow)
773#endif
774
775 CALL timestop(handle)
776 END SUBROUTINE cp_cfm_to_cfm_triangular
777
778! **************************************************************************************************
779!> \brief Copy real and imaginary parts of a complex full matrix into
780!> separate real-value full matrices.
781!> \param msource complex matrix
782!> \param mtargetr (optional) real part of the source matrix
783!> \param mtargeti (optional) imaginary part of the source matrix
784!> \note
785!> Matrix structures are assumed to be equivalent.
786! **************************************************************************************************
787 SUBROUTINE cp_cfm_to_fm(msource, mtargetr, mtargeti)
788
789 TYPE(cp_cfm_type), INTENT(IN) :: msource
790 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: mtargetr, mtargeti
791
792 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_to_fm'
793
794 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
795 INTEGER :: handle
796 REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
797
798 CALL timeset(routinen, handle)
799
800 zmat => msource%local_data
801 IF (PRESENT(mtargetr)) THEN
802 rmat => mtargetr%local_data
803 IF ((.NOT. cp_fm_struct_equivalent(mtargetr%matrix_struct, msource%matrix_struct)) .OR. &
804 (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
805 (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
806 cpabort("size of local_data of mtargetr differ to msource")
807 END IF
808 ! copy local data
809 rmat = real(zmat, kind=dp)
810 ELSE
811 NULLIFY (rmat)
812 END IF
813 IF (PRESENT(mtargeti)) THEN
814 imat => mtargeti%local_data
815 IF ((.NOT. cp_fm_struct_equivalent(mtargeti%matrix_struct, msource%matrix_struct)) .OR. &
816 (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
817 (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
818 cpabort("size of local_data of mtargeti differ to msource")
819 END IF
820 ! copy local data
821 imat = real(aimag(zmat), kind=dp)
822 ELSE
823 NULLIFY (imat)
824 END IF
825
826 CALL timestop(handle)
827
828 END SUBROUTINE cp_cfm_to_fm
829
830! **************************************************************************************************
831!> \brief Construct a complex full matrix by taking its real and imaginary parts from
832!> two separate real-value full matrices.
833!> \param msourcer (optional) real part of the complex matrix (defaults to 0.0)
834!> \param msourcei (optional) imaginary part of the complex matrix (defaults to 0.0)
835!> \param mtarget resulting complex matrix
836!> \note
837!> Matrix structures are assumed to be equivalent.
838! **************************************************************************************************
839 SUBROUTINE cp_fm_to_cfm(msourcer, msourcei, mtarget)
840 TYPE(cp_fm_type), INTENT(IN), OPTIONAL :: msourcer, msourcei
841 TYPE(cp_cfm_type), INTENT(IN) :: mtarget
842
843 CHARACTER(len=*), PARAMETER :: routinen = 'cp_fm_to_cfm'
844
845 COMPLEX(kind=dp), DIMENSION(:, :), POINTER :: zmat
846 INTEGER :: handle, mode
847 REAL(kind=dp), DIMENSION(:, :), POINTER :: imat, rmat
848
849 CALL timeset(routinen, handle)
850
851 mode = 0
852 zmat => mtarget%local_data
853 IF (PRESENT(msourcer)) THEN
854 rmat => msourcer%local_data
855 IF ((.NOT. cp_fm_struct_equivalent(msourcer%matrix_struct, mtarget%matrix_struct)) .OR. &
856 (SIZE(rmat, 1) /= SIZE(zmat, 1)) .OR. &
857 (SIZE(rmat, 2) /= SIZE(zmat, 2))) THEN
858 cpabort("size of local_data of msourcer differ to mtarget")
859 END IF
860 mode = mode + 1
861 ELSE
862 NULLIFY (rmat)
863 END IF
864 IF (PRESENT(msourcei)) THEN
865 imat => msourcei%local_data
866 IF ((.NOT. cp_fm_struct_equivalent(msourcei%matrix_struct, mtarget%matrix_struct)) .OR. &
867 (SIZE(imat, 1) /= SIZE(zmat, 1)) .OR. &
868 (SIZE(imat, 2) /= SIZE(zmat, 2))) THEN
869 cpabort("size of local_data of msourcei differ to mtarget")
870 END IF
871 mode = mode + 2
872 ELSE
873 NULLIFY (imat)
874 END IF
875 ! copy local data
876 SELECT CASE (mode)
877 CASE (0)
878 zmat(:, :) = z_zero
879 CASE (1)
880 zmat(:, :) = cmplx(rmat(:, :), 0.0_dp, kind=dp)
881 CASE (2)
882 zmat(:, :) = cmplx(0.0_dp, imat(:, :), kind=dp)
883 CASE (3)
884 zmat(:, :) = cmplx(rmat(:, :), imat(:, :), kind=dp)
885 END SELECT
886
887 CALL timestop(handle)
888
889 END SUBROUTINE cp_fm_to_cfm
890
891! **************************************************************************************************
892!> \brief Initiate the copy operation: get distribution data, post MPI isend and irecvs.
893!> \param source input complex-valued fm matrix
894!> \param destination output complex-valued fm matrix
895!> \param para_env parallel environment corresponding to the BLACS env that covers all parts
896!> of the input and output matrices
897!> \param info all of the data that will be needed to complete the copy operation
898!> \note a slightly modified version of the subroutine cp_fm_start_copy_general() that uses
899!> allocatable arrays instead of pointers wherever possible.
900! **************************************************************************************************
901 SUBROUTINE cp_cfm_start_copy_general(source, destination, para_env, info)
902 TYPE(cp_cfm_type), INTENT(IN) :: source, destination
903 TYPE(mp_para_env_type), INTENT(IN), POINTER :: para_env
904 TYPE(copy_cfm_info_type), INTENT(out) :: info
905
906 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_start_copy_general'
907
908 INTEGER :: dest_p_i, dest_q_j, global_rank, global_size, handle, i, j, k, mpi_rank, &
909 ncol_block_dest, ncol_block_src, ncol_local_recv, ncol_local_send, ncols, &
910 nrow_block_dest, nrow_block_src, nrow_local_recv, nrow_local_send, nrows, p, q, &
911 recv_rank, recv_size, send_rank, send_size
912 INTEGER, ALLOCATABLE, DIMENSION(:) :: all_ranks, dest2global, dest_p, dest_q, &
913 recv_count, send_count, send_disp, &
914 source2global, src_p, src_q
915 INTEGER, ALLOCATABLE, DIMENSION(:, :) :: dest_blacs2mpi
916 INTEGER, DIMENSION(2) :: dest_block, dest_block_tmp, dest_num_pe, &
917 src_block, src_block_tmp, src_num_pe
918 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices, &
919 send_col_indices, send_row_indices
920 TYPE(cp_fm_struct_type), POINTER :: recv_dist, send_dist
921 TYPE(mp_request_type), DIMENSION(6) :: recv_req, send_req
922
923 CALL timeset(routinen, handle)
924
925 IF (.NOT. cp2k_is_parallel) THEN
926 ! Just copy all of the matrix data into a 'send buffer', to be unpacked later
927 nrow_local_send = SIZE(source%local_data, 1)
928 ncol_local_send = SIZE(source%local_data, 2)
929 ALLOCATE (info%send_buf(nrow_local_send*ncol_local_send))
930 k = 0
931 DO j = 1, ncol_local_send
932 DO i = 1, nrow_local_send
933 k = k + 1
934 info%send_buf(k) = source%local_data(i, j)
935 END DO
936 END DO
937 ELSE
938 NULLIFY (recv_dist, send_dist)
939 NULLIFY (recv_col_indices, recv_row_indices, send_col_indices, send_row_indices)
940
941 ! The 'global' communicator contains both the source and destination decompositions
942 global_size = para_env%num_pe
943 global_rank = para_env%mepos
944
945 ! The source/send decomposition and destination/recv decompositions may only exist on
946 ! on a subset of the processes involved in the communication
947 ! Check if the source and/or destination arguments are .not. ASSOCIATED():
948 ! if so, skip the send / recv parts (since these processes do not participate in the sending/receiving distribution)
949 IF (ASSOCIATED(destination%matrix_struct)) THEN
950 recv_dist => destination%matrix_struct
951 recv_rank = recv_dist%para_env%mepos
952 ELSE
953 recv_rank = mp_proc_null
954 END IF
955
956 IF (ASSOCIATED(source%matrix_struct)) THEN
957 send_dist => source%matrix_struct
958 send_rank = send_dist%para_env%mepos
959 ELSE
960 send_rank = mp_proc_null
961 END IF
962
963 ! Map the rank in the source/dest communicator to the global rank
964 ALLOCATE (all_ranks(0:global_size - 1))
965
966 CALL para_env%allgather(send_rank, all_ranks)
967 IF (ASSOCIATED(destination%matrix_struct)) THEN
968 ALLOCATE (source2global(0:count(all_ranks /= mp_proc_null) - 1))
969 DO i = 0, global_size - 1
970 IF (all_ranks(i) /= mp_proc_null) THEN
971 source2global(all_ranks(i)) = i
972 END IF
973 END DO
974 END IF
975
976 CALL para_env%allgather(recv_rank, all_ranks)
977 IF (ASSOCIATED(source%matrix_struct)) THEN
978 ALLOCATE (dest2global(0:count(all_ranks /= mp_proc_null) - 1))
979 DO i = 0, global_size - 1
980 IF (all_ranks(i) /= mp_proc_null) THEN
981 dest2global(all_ranks(i)) = i
982 END IF
983 END DO
984 END IF
985 DEALLOCATE (all_ranks)
986
987 ! Some data from the two decompositions will be needed by all processes in the global group :
988 ! process grid shape, block size, and the BLACS-to-MPI mapping
989
990 ! The global root process will receive the data (from the root process in each decomposition)
991 send_req(:) = mp_request_null
992 IF (global_rank == 0) THEN
993 recv_req(:) = mp_request_null
994 CALL para_env%irecv(src_block, mp_any_source, recv_req(1), tag=src_tag)
995 CALL para_env%irecv(dest_block, mp_any_source, recv_req(2), tag=dest_tag)
996 CALL para_env%irecv(src_num_pe, mp_any_source, recv_req(3), tag=src_tag)
997 CALL para_env%irecv(dest_num_pe, mp_any_source, recv_req(4), tag=dest_tag)
998 END IF
999
1000 IF (ASSOCIATED(source%matrix_struct)) THEN
1001 IF ((send_rank == 0)) THEN
1002 ! need to use separate buffers here in case this is actually global rank 0
1003 src_block_tmp = [send_dist%nrow_block, send_dist%ncol_block]
1004 CALL para_env%isend(src_block_tmp, 0, send_req(1), tag=src_tag)
1005 CALL para_env%isend(send_dist%context%num_pe, 0, send_req(2), tag=src_tag)
1006 END IF
1007 END IF
1008
1009 IF (ASSOCIATED(destination%matrix_struct)) THEN
1010 IF ((recv_rank == 0)) THEN
1011 dest_block_tmp = [recv_dist%nrow_block, recv_dist%ncol_block]
1012 CALL para_env%isend(dest_block_tmp, 0, send_req(3), tag=dest_tag)
1013 CALL para_env%isend(recv_dist%context%num_pe, 0, send_req(4), tag=dest_tag)
1014 END IF
1015 END IF
1016
1017 IF (global_rank == 0) THEN
1018 CALL mp_waitall(recv_req(1:4))
1019 ! Now we know the process decomposition, we can allocate the arrays to hold the blacs2mpi mapping
1020 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1021 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1022 CALL para_env%irecv(info%src_blacs2mpi, mp_any_source, recv_req(5), tag=src_tag)
1023 CALL para_env%irecv(dest_blacs2mpi, mp_any_source, recv_req(6), tag=dest_tag)
1024 END IF
1025
1026 IF (ASSOCIATED(source%matrix_struct)) THEN
1027 IF ((send_rank == 0)) THEN
1028 CALL para_env%isend(send_dist%context%blacs2mpi(:, :), 0, send_req(5), tag=src_tag)
1029 END IF
1030 END IF
1031
1032 IF (ASSOCIATED(destination%matrix_struct)) THEN
1033 IF ((recv_rank == 0)) THEN
1034 CALL para_env%isend(recv_dist%context%blacs2mpi(:, :), 0, send_req(6), tag=dest_tag)
1035 END IF
1036 END IF
1037
1038 IF (global_rank == 0) THEN
1039 CALL mp_waitall(recv_req(5:6))
1040 END IF
1041
1042 ! Finally, broadcast the data to all processes in the global communicator
1043 CALL para_env%bcast(src_block, 0)
1044 CALL para_env%bcast(dest_block, 0)
1045 CALL para_env%bcast(src_num_pe, 0)
1046 CALL para_env%bcast(dest_num_pe, 0)
1047 info%src_num_pe(1:2) = src_num_pe(1:2)
1048 info%nblock_src(1:2) = src_block(1:2)
1049 IF (global_rank /= 0) THEN
1050 ALLOCATE (info%src_blacs2mpi(0:src_num_pe(1) - 1, 0:src_num_pe(2) - 1), &
1051 dest_blacs2mpi(0:dest_num_pe(1) - 1, 0:dest_num_pe(2) - 1))
1052 END IF
1053 CALL para_env%bcast(info%src_blacs2mpi, 0)
1054 CALL para_env%bcast(dest_blacs2mpi, 0)
1055
1056 recv_size = dest_num_pe(1)*dest_num_pe(2)
1057 send_size = src_num_pe(1)*src_num_pe(2)
1058 info%send_size = send_size
1059 CALL mp_waitall(send_req(:))
1060
1061 ! Setup is now complete, we can start the actual communication here.
1062 ! The order implemented here is:
1063 ! DEST_1
1064 ! compute recv sizes
1065 ! call irecv
1066 ! SRC_1
1067 ! compute send sizes
1068 ! pack send buffers
1069 ! call isend
1070 ! DEST_2
1071 ! wait for the recvs and unpack buffers (this part eventually will go into another routine to allow comms to run concurrently)
1072 ! SRC_2
1073 ! wait for the sends
1074
1075 ! DEST_1
1076 IF (ASSOCIATED(destination%matrix_struct)) THEN
1077 CALL cp_fm_struct_get(recv_dist, row_indices=recv_row_indices, &
1078 col_indices=recv_col_indices)
1079 info%recv_col_indices => recv_col_indices
1080 info%recv_row_indices => recv_row_indices
1081 nrow_block_src = src_block(1)
1082 ncol_block_src = src_block(2)
1083 ALLOCATE (recv_count(0:send_size - 1), info%recv_disp(0:send_size - 1), info%recv_request(0:send_size - 1))
1084
1085 ! Determine the recv counts, allocate the receive buffers, call mpi_irecv for all the non-zero sized receives
1086 nrow_local_recv = recv_dist%nrow_locals(recv_dist%context%mepos(1))
1087 ncol_local_recv = recv_dist%ncol_locals(recv_dist%context%mepos(2))
1088 info%nlocal_recv(1) = nrow_local_recv
1089 info%nlocal_recv(2) = ncol_local_recv
1090 ! Initialise src_p, src_q arrays (sized using number of rows/cols in the receiving distribution)
1091 ALLOCATE (src_p(nrow_local_recv), src_q(ncol_local_recv))
1092 DO i = 1, nrow_local_recv
1093 ! For each local row we will receive, we look up its global row (in recv_row_indices),
1094 ! then work out which row block it comes from, and which process row that row block comes from.
1095 src_p(i) = mod(((recv_row_indices(i) - 1)/nrow_block_src), src_num_pe(1))
1096 END DO
1097 DO j = 1, ncol_local_recv
1098 ! Similarly for the columns
1099 src_q(j) = mod(((recv_col_indices(j) - 1)/ncol_block_src), src_num_pe(2))
1100 END DO
1101 ! src_p/q now contains the process row/column ID that will send data to that row/column
1102
1103 DO q = 0, src_num_pe(2) - 1
1104 ncols = count(src_q == q)
1105 DO p = 0, src_num_pe(1) - 1
1106 nrows = count(src_p == p)
1107 ! Use the send_dist here as we are looking up the processes where the data comes from
1108 recv_count(info%src_blacs2mpi(p, q)) = nrows*ncols
1109 END DO
1110 END DO
1111 DEALLOCATE (src_p, src_q)
1112
1113 ! Use one long buffer (and displacements into that buffer)
1114 ! this prevents the need for a rectangular array where not all elements will be populated
1115 ALLOCATE (info%recv_buf(sum(recv_count(:))))
1116 info%recv_disp(0) = 0
1117 DO i = 1, send_size - 1
1118 info%recv_disp(i) = info%recv_disp(i - 1) + recv_count(i - 1)
1119 END DO
1120
1121 ! Issue receive calls on ranks which expect data
1122 DO k = 0, send_size - 1
1123 IF (recv_count(k) > 0) THEN
1124 CALL para_env%irecv(info%recv_buf(info%recv_disp(k) + 1:info%recv_disp(k) + recv_count(k)), &
1125 source2global(k), info%recv_request(k))
1126 ELSE
1127 info%recv_request(k) = mp_request_null
1128 END IF
1129 END DO
1130 DEALLOCATE (source2global)
1131 END IF ! ASSOCIATED(destination)
1132
1133 ! SRC_1
1134 IF (ASSOCIATED(source%matrix_struct)) THEN
1135 CALL cp_fm_struct_get(send_dist, row_indices=send_row_indices, &
1136 col_indices=send_col_indices)
1137 nrow_block_dest = dest_block(1)
1138 ncol_block_dest = dest_block(2)
1139 ALLOCATE (send_count(0:recv_size - 1), send_disp(0:recv_size - 1), info%send_request(0:recv_size - 1))
1140
1141 ! Determine the send counts, allocate the send buffers
1142 nrow_local_send = send_dist%nrow_locals(send_dist%context%mepos(1))
1143 ncol_local_send = send_dist%ncol_locals(send_dist%context%mepos(2))
1144
1145 ! Initialise dest_p, dest_q arrays (sized nrow_local, ncol_local)
1146 ! i.e. number of rows,cols in the sending distribution
1147 ALLOCATE (dest_p(nrow_local_send), dest_q(ncol_local_send))
1148
1149 DO i = 1, nrow_local_send
1150 ! Use the send_dist%row_indices() here (we are looping over the local rows we will send)
1151 dest_p(i) = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1152 END DO
1153 DO j = 1, ncol_local_send
1154 dest_q(j) = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1155 END DO
1156 ! dest_p/q now contain the process row/column ID that will receive data from that row/column
1157
1158 DO q = 0, dest_num_pe(2) - 1
1159 ncols = count(dest_q == q)
1160 DO p = 0, dest_num_pe(1) - 1
1161 nrows = count(dest_p == p)
1162 send_count(dest_blacs2mpi(p, q)) = nrows*ncols
1163 END DO
1164 END DO
1165 DEALLOCATE (dest_p, dest_q)
1166
1167 ! Allocate the send buffer using send_count -- and calculate the offset into the buffer for each process
1168 ALLOCATE (info%send_buf(sum(send_count(:))))
1169 send_disp(0) = 0
1170 DO k = 1, recv_size - 1
1171 send_disp(k) = send_disp(k - 1) + send_count(k - 1)
1172 END DO
1173
1174 ! Loop over the smat, pack the send buffers
1175 send_count(:) = 0
1176 DO j = 1, ncol_local_send
1177 ! Use send_col_indices and row_indices here, as we are looking up the global row/column number of local rows.
1178 dest_q_j = mod(((send_col_indices(j) - 1)/ncol_block_dest), dest_num_pe(2))
1179 DO i = 1, nrow_local_send
1180 dest_p_i = mod(((send_row_indices(i) - 1)/nrow_block_dest), dest_num_pe(1))
1181 mpi_rank = dest_blacs2mpi(dest_p_i, dest_q_j)
1182 send_count(mpi_rank) = send_count(mpi_rank) + 1
1183 info%send_buf(send_disp(mpi_rank) + send_count(mpi_rank)) = source%local_data(i, j)
1184 END DO
1185 END DO
1186
1187 ! For each non-zero send_count, call mpi_isend
1188 DO k = 0, recv_size - 1
1189 IF (send_count(k) > 0) THEN
1190 CALL para_env%isend(info%send_buf(send_disp(k) + 1:send_disp(k) + send_count(k)), &
1191 dest2global(k), info%send_request(k))
1192 ELSE
1193 info%send_request(k) = mp_request_null
1194 END IF
1195 END DO
1196 DEALLOCATE (send_count, send_disp, dest2global)
1197 END IF ! ASSOCIATED(source)
1198 DEALLOCATE (dest_blacs2mpi)
1199
1200 END IF !IF (.NOT. cp2k_is_parallel)
1201
1202 CALL timestop(handle)
1203
1204 END SUBROUTINE cp_cfm_start_copy_general
1205
1206! **************************************************************************************************
1207!> \brief Complete the copy operation: wait for comms, unpack, clean up MPI state.
1208!> \param destination output cfm matrix
1209!> \param info all of the data that will be needed to complete the copy operation
1210!> \note a slightly modified version of the subroutine cp_fm_finish_copy_general() that uses
1211!> allocatable arrays instead of pointers wherever possible.
1212! **************************************************************************************************
1213 SUBROUTINE cp_cfm_finish_copy_general(destination, info)
1214 TYPE(cp_cfm_type), INTENT(IN) :: destination
1215 TYPE(copy_cfm_info_type), INTENT(inout) :: info
1216
1217 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_finish_copy_general'
1218
1219 INTEGER :: handle, i, j, k, mpi_rank, ni, nj, &
1220 src_q_j
1221 INTEGER, ALLOCATABLE, DIMENSION(:) :: recv_count, src_p_i
1222 INTEGER, DIMENSION(:), POINTER :: recv_col_indices, recv_row_indices
1223
1224 CALL timeset(routinen, handle)
1225
1226 IF (.NOT. cp2k_is_parallel) THEN
1227 ! Now unpack the data from the 'send buffer'
1228 k = 0
1229 DO j = 1, SIZE(destination%local_data, 2)
1230 DO i = 1, SIZE(destination%local_data, 1)
1231 k = k + 1
1232 destination%local_data(i, j) = info%send_buf(k)
1233 END DO
1234 END DO
1235 DEALLOCATE (info%send_buf)
1236 ELSE
1237 ! Set up local variables ...
1238 recv_col_indices => info%recv_col_indices
1239 recv_row_indices => info%recv_row_indices
1240
1241 ! ... use the local variables to do the work
1242 ! DEST_2
1243 CALL mp_waitall(info%recv_request(:))
1244
1245 nj = info%nlocal_recv(2)
1246 ni = info%nlocal_recv(1)
1247 ALLOCATE (recv_count(0:info%send_size - 1), src_p_i(ni))
1248 ! Loop over the rmat, filling it in with data from the recv buffers
1249 ! (here the block sizes, num_pes refer to the distribution of the source matrix)
1250 recv_count(:) = 0
1251 DO i = 1, ni
1252 src_p_i(i) = mod(((recv_row_indices(i) - 1)/info%nblock_src(1)), info%src_num_pe(1))
1253 END DO
1254
1255 DO j = 1, nj
1256 src_q_j = mod(((recv_col_indices(j) - 1)/info%nblock_src(2)), info%src_num_pe(2))
1257 DO i = 1, ni
1258 mpi_rank = info%src_blacs2mpi(src_p_i(i), src_q_j)
1259 recv_count(mpi_rank) = recv_count(mpi_rank) + 1
1260 destination%local_data(i, j) = info%recv_buf(info%recv_disp(mpi_rank) + recv_count(mpi_rank))
1261 END DO
1262 END DO
1263
1264 DEALLOCATE (recv_count, src_p_i)
1265 ! Invalidate the stored state
1266 NULLIFY (info%recv_col_indices, info%recv_row_indices)
1267 DEALLOCATE (info%recv_disp, info%recv_request, info%recv_buf, info%src_blacs2mpi)
1268 END IF
1269
1270 CALL timestop(handle)
1271
1272 END SUBROUTINE cp_cfm_finish_copy_general
1273
1274! **************************************************************************************************
1275!> \brief Complete the copy operation: wait for comms clean up MPI state.
1276!> \param info all of the data that will be needed to complete the copy operation
1277!> \note a slightly modified version of the subroutine cp_fm_cleanup_copy_general() that uses
1278!> allocatable arrays instead of pointers wherever possible.
1279! **************************************************************************************************
1281 TYPE(copy_cfm_info_type), INTENT(inout) :: info
1282
1283 CHARACTER(len=*), PARAMETER :: routinen = 'cp_cfm_cleanup_copy_general'
1284
1285 INTEGER :: handle
1286
1287 CALL timeset(routinen, handle)
1288
1289 IF (cp2k_is_parallel) THEN
1290 ! SRC_2
1291 ! If this process is also in the destination decomposition, this deallocate
1292 ! Was already done in cp_fm_finish_copy_general
1293 IF (ALLOCATED(info%src_blacs2mpi)) DEALLOCATE (info%src_blacs2mpi)
1294 CALL mp_waitall(info%send_request(:))
1295 DEALLOCATE (info%send_request, info%send_buf)
1296 END IF
1297
1298 CALL timestop(handle)
1299 END SUBROUTINE cp_cfm_cleanup_copy_general
1300END MODULE cp_cfm_types
methods related to the blacs parallel environment
Represents a complex full matrix distributed on many processors.
subroutine, public cp_cfm_get_submatrix(fm, target_m, start_row, start_col, n_rows, n_cols, transpose)
Extract a sub-matrix from the full matrix: op(target_m)(1:n_rows,1:n_cols) = fm(start_row:start_row+n...
subroutine, public cp_cfm_get_element(matrix, irow_global, icol_global, alpha)
Get the matrix element by its global index.
subroutine, public cp_cfm_release(matrix)
Releases a full matrix.
subroutine, public cp_cfm_set_element(matrix, irow_global, icol_global, alpha)
Set the matrix element (irow_global,icol_global) of the full matrix to alpha.
subroutine, public cp_cfm_start_copy_general(source, destination, para_env, info)
Initiate the copy operation: get distribution data, post MPI isend and irecvs.
subroutine, public cp_fm_to_cfm(msourcer, msourcei, mtarget)
Construct a complex full matrix by taking its real and imaginary parts from two separate real-value f...
subroutine, public cp_cfm_create(matrix, matrix_struct, name, nrow, ncol, set_zero)
Creates a new full matrix with the given structure.
subroutine, public cp_cfm_cleanup_copy_general(info)
Complete the copy operation: wait for comms clean up MPI state.
subroutine, public cp_cfm_get_info(matrix, name, nrow_global, ncol_global, nrow_block, ncol_block, nrow_local, ncol_local, row_indices, col_indices, local_data, context, matrix_struct, para_env)
Returns information about a full matrix.
subroutine, public cp_cfm_set_submatrix(matrix, new_values, start_row, start_col, n_rows, n_cols, alpha, beta, transpose)
Set a sub-matrix of the full matrix: matrix(start_row:start_row+n_rows,start_col:start_col+n_cols) = ...
subroutine, public cp_cfm_set_all(matrix, alpha, beta)
Set all elements of the full matrix to alpha. Besides, set all diagonal matrix elements to beta (if g...
subroutine, public cp_cfm_to_fm(msource, mtargetr, mtargeti)
Copy real and imaginary parts of a complex full matrix into separate real-value full matrices.
subroutine, public cp_cfm_finish_copy_general(destination, info)
Complete the copy operation: wait for comms, unpack, clean up MPI state.
represent the structure of a full matrix
subroutine, public cp_fm_struct_create(fmstruct, para_env, context, nrow_global, ncol_global, nrow_block, ncol_block, descriptor, first_p_pos, local_leading_dimension, template_fmstruct, square_blocks, force_block)
allocates and initializes a full matrix structure
subroutine, public cp_fm_struct_get(fmstruct, para_env, context, descriptor, ncol_block, nrow_block, nrow_global, ncol_global, first_p_pos, row_indices, col_indices, nrow_local, ncol_local, nrow_locals, ncol_locals, local_leading_dimension)
returns the values of various attributes of the matrix structure
logical function, public cp_fm_struct_equivalent(fmstruct1, fmstruct2)
returns true if the two matrix structures are equivalent, false otherwise.
subroutine, public cp_fm_struct_retain(fmstruct)
retains a full matrix structure
subroutine, public cp_fm_struct_release(fmstruct)
releases a full matrix structure
represent a full matrix distributed on many processors
Definition cp_fm_types.F:15
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_one
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
integer, parameter, public mp_proc_null
logical, parameter, public cp2k_is_parallel
integer, parameter, public mp_any_source
type(mp_request_type), parameter, public mp_request_null
represent a blacs multidimensional parallel environment (for the mpi corrispective see cp_paratypes/m...
Stores the state of a copy between cp_cfm_start_copy_general and cp_cfm_finish_copy_general.
Just to build arrays of pointers to matrices.
Represent a complex full matrix.
keeps the information about the structure of a full matrix
represent a full matrix
stores all the informations relevant to an mpi environment