(git:ed6f26b)
Loading...
Searching...
No Matches
arnoldi_vector.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief operations for skinny matrices/vectors expressed in dbcsr form
10!> \par History
11!> 2014.10 created [Florian Schiffmann]
12!> 2023.12 Removed support for single-precision [Ole Schuett]
13!> 2024.12 Removed support for complex input matrices [Ole Schuett]
14!> \author Florian Schiffmann
15! **************************************************************************************************
17 USE cp_dbcsr_api, ONLY: &
22 dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric
24 USE kinds, ONLY: dp,&
25 real_8
27#include "../base/base_uses.f90"
28
29!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
30
31 IMPLICIT NONE
32
33 PRIVATE
34
35 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_vector'
36
37! **************************************************************************************************
38!> \brief Types needed for the hashtable.
39! **************************************************************************************************
40 TYPE ele_type
41 INTEGER :: c = 0
42 INTEGER :: p = 0
43 END TYPE ele_type
44
45 TYPE hash_table_type
46 TYPE(ele_type), DIMENSION(:), POINTER :: table => null()
47 INTEGER :: nele = 0
48 INTEGER :: nmax = 0
49 INTEGER :: prime = 0
50 END TYPE hash_table_type
51
52! **************************************************************************************************
53!> \brief Types needed for fast access to distributed dbcsr vectors.
54! **************************************************************************************************
55 TYPE block_ptr
56 REAL(real_8), DIMENSION(:, :), POINTER :: ptr => null()
57 INTEGER :: assigned_thread = -1
58 END TYPE block_ptr
59
60 TYPE fast_vec_access_type
61 TYPE(hash_table_type) :: hash_table = hash_table_type()
62 TYPE(block_ptr), DIMENSION(:), ALLOCATABLE :: blk_map
63 END TYPE
64
70
71CONTAINS
72
73! **************************************************************************************************
74!> \brief creates a dbcsr col vector like object which lives on proc_col 0
75!> and has the same row dist as the template matrix
76!> the returned matrix is fully allocated and all blocks are set to 0
77!> this is not a sparse object (and must never be)
78!> \param dbcsr_vec the vector object to create must be allocated but not initialized
79!> \param matrix a dbcsr matrix used as template
80!> \param ncol number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
81! **************************************************************************************************
82 SUBROUTINE create_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
83 TYPE(dbcsr_type) :: dbcsr_vec, matrix
84 INTEGER :: ncol
85
86 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_col_vec_from_matrix'
87
88 INTEGER :: handle, npcols
89 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
90 row_dist
91 TYPE(dbcsr_distribution_type) :: dist, dist_col_vec
92
93 CALL timeset(routinen, handle)
94
95 CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, distribution=dist)
96 CALL dbcsr_distribution_get(dist, npcols=npcols, row_dist=row_dist)
97
98 ALLOCATE (col_dist(1), col_blk_size(1))
99 col_dist(1) = 0
100 col_blk_size(1) = ncol
101 CALL dbcsr_distribution_new(dist_col_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
102
103 CALL dbcsr_create(dbcsr_vec, "D", dist_col_vec, matrix_type=dbcsr_type_no_symmetry, &
104 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
105 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
106
107 CALL dbcsr_distribution_release(dist_col_vec)
108 DEALLOCATE (col_dist, col_blk_size)
109 CALL timestop(handle)
110
111 END SUBROUTINE create_col_vec_from_matrix
112
113! **************************************************************************************************
114!> \brief creates a dbcsr row vector like object which lives on proc_row 0
115!> and has the same row dist as the template matrix
116!> the returned matrix is fully allocated and all blocks are set to 0
117!> this is not a sparse object (and must never be)
118!> \param dbcsr_vec ...
119!> \param matrix a dbcsr matrix used as template
120!> \param nrow number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
121! **************************************************************************************************
122 SUBROUTINE create_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
123 TYPE(dbcsr_type) :: dbcsr_vec, matrix
124 INTEGER :: nrow
125
126 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_row_vec_from_matrix'
127
128 INTEGER :: handle, nprows
129 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
130 row_dist
131 TYPE(dbcsr_distribution_type) :: dist, dist_row_vec
132
133 CALL timeset(routinen, handle)
134
135 CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, distribution=dist)
136 CALL dbcsr_distribution_get(dist, nprows=nprows, col_dist=col_dist)
137
138 ALLOCATE (row_dist(1), row_blk_size(1))
139 row_dist(1) = 0
140 row_blk_size(1) = nrow
141 CALL dbcsr_distribution_new(dist_row_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
142
143 CALL dbcsr_create(dbcsr_vec, "D", dist_row_vec, matrix_type=dbcsr_type_no_symmetry, &
144 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
145 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
146
147 CALL dbcsr_distribution_release(dist_row_vec)
148 DEALLOCATE (row_dist, row_blk_size)
149 CALL timestop(handle)
150
151 END SUBROUTINE create_row_vec_from_matrix
152
153! **************************************************************************************************
154!> \brief creates a col vector like object whose blocks can be replicated
155!> along the processor row and has the same row dist as the template matrix
156!> the returned matrix is fully allocated and all blocks are set to 0
157!> this is not a sparse object (and must never be)
158!> \param dbcsr_vec the vector object to create must be allocated but not initialized
159!> \param matrix a dbcsr matrix used as template
160!> \param ncol number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
161! **************************************************************************************************
162 SUBROUTINE create_replicated_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
163 TYPE(dbcsr_type) :: dbcsr_vec, matrix
164 INTEGER :: ncol
165
166 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_replicated_col_vec_from_matrix'
167
168 INTEGER :: handle, i, npcols
169 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
170 row_dist
171 TYPE(dbcsr_distribution_type) :: dist, dist_col_vec
172
173 CALL timeset(routinen, handle)
174
175 CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, distribution=dist)
176 CALL dbcsr_distribution_get(dist, npcols=npcols, row_dist=row_dist)
177
178 ALLOCATE (col_dist(npcols), col_blk_size(npcols))
179 col_blk_size(:) = ncol
180 DO i = 0, npcols - 1
181 col_dist(i + 1) = i
182 END DO
183 CALL dbcsr_distribution_new(dist_col_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
184
185 CALL dbcsr_create(dbcsr_vec, "D", dist_col_vec, matrix_type=dbcsr_type_no_symmetry, &
186 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
187 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
188
189 CALL dbcsr_distribution_release(dist_col_vec)
190 DEALLOCATE (col_dist, col_blk_size)
191 CALL timestop(handle)
192
194
195! **************************************************************************************************
196!> \brief creates a row vector like object whose blocks can be replicated
197!> along the processor col and has the same col dist as the template matrix
198!> the returned matrix is fully allocated and all blocks are set to 0
199!> this is not a sparse object (and must never be)
200!> \param dbcsr_vec the vector object to create must be allocated but not initialized
201!> \param matrix a dbcsr matrix used as template
202!> \param nrow number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
203! **************************************************************************************************
204 SUBROUTINE create_replicated_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
205 TYPE(dbcsr_type) :: dbcsr_vec, matrix
206 INTEGER :: nrow
207
208 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_replicated_row_vec_from_matrix'
209
210 INTEGER :: handle, i, nprows
211 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
212 row_dist
213 TYPE(dbcsr_distribution_type) :: dist, dist_row_vec
214
215 CALL timeset(routinen, handle)
216
217 CALL dbcsr_get_info(matrix, distribution=dist, col_blk_size=col_blk_size)
218 CALL dbcsr_distribution_get(dist, nprows=nprows, col_dist=col_dist)
219
220 ALLOCATE (row_dist(nprows), row_blk_size(nprows))
221 row_blk_size(:) = nrow
222 DO i = 0, nprows - 1
223 row_dist(i + 1) = i
224 END DO
225 CALL dbcsr_distribution_new(dist_row_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
226
227 CALL dbcsr_create(dbcsr_vec, "D", dist_row_vec, dbcsr_type_no_symmetry, &
228 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
229 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
230
231 CALL dbcsr_distribution_release(dist_row_vec)
232 DEALLOCATE (row_dist, row_blk_size)
233 CALL timestop(handle)
234
236
237! **************************************************************************************************
238!> \brief given a column vector, prepare the fast_vec_access container
239!> \param vec ...
240!> \param fast_vec_access ...
241! **************************************************************************************************
242 SUBROUTINE create_fast_col_vec_access(vec, fast_vec_access)
243 TYPE(dbcsr_type) :: vec
244 TYPE(fast_vec_access_type) :: fast_vec_access
245
246 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_fast_col_vec_access'
247
248 INTEGER :: col, handle, iblock, nblk_local, &
249 nthreads, row
250 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_bl
251 TYPE(dbcsr_iterator_type) :: iter
252
253 CALL timeset(routinen, handle)
254
255 ! figure out the number of threads
256 nthreads = 1
257!$OMP PARALLEL DEFAULT(NONE) SHARED(nthreads)
258!$OMP MASTER
259!$ nthreads = OMP_GET_NUM_THREADS()
260!$OMP END MASTER
261!$OMP END PARALLEL
262
263 CALL dbcsr_get_info(matrix=vec, nblkrows_local=nblk_local)
264 ! 4 times makes sure the table is big enough to limit collisions.
265 CALL hash_table_create(fast_vec_access%hash_table, 4*nblk_local)
266 ! include zero for effective dealing with values not in the hash table (will return 0)
267 ALLOCATE (fast_vec_access%blk_map(0:nblk_local))
268
269 CALL dbcsr_get_info(matrix=vec, nblkcols_local=col)
270 IF (col .GT. 1) cpabort("BUG")
271
272 ! go through the blocks of the vector
273 iblock = 0
274 CALL dbcsr_iterator_start(iter, vec)
275 DO WHILE (dbcsr_iterator_blocks_left(iter))
276 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl)
277 iblock = iblock + 1
278 CALL hash_table_add(fast_vec_access%hash_table, row, iblock)
279 fast_vec_access%blk_map(iblock)%ptr => vec_bl
280 fast_vec_access%blk_map(iblock)%assigned_thread = mod(iblock, nthreads)
281 END DO
282 CALL dbcsr_iterator_stop(iter)
283 CALL timestop(handle)
284
285 END SUBROUTINE create_fast_col_vec_access
286
287! **************************************************************************************************
288!> \brief given a row vector, prepare the fast_vec_access_container
289!> \param vec ...
290!> \param fast_vec_access ...
291! **************************************************************************************************
292 SUBROUTINE create_fast_row_vec_access(vec, fast_vec_access)
293 TYPE(dbcsr_type) :: vec
294 TYPE(fast_vec_access_type) :: fast_vec_access
295
296 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_fast_row_vec_access'
297
298 INTEGER :: col, handle, iblock, nblk_local, &
299 nthreads, row
300 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_bl
301 TYPE(dbcsr_iterator_type) :: iter
302
303 CALL timeset(routinen, handle)
304
305 ! figure out the number of threads
306 nthreads = 1
307!$OMP PARALLEL DEFAULT(NONE) SHARED(nthreads)
308!$OMP MASTER
309!$ nthreads = OMP_GET_NUM_THREADS()
310!$OMP END MASTER
311!$OMP END PARALLEL
312
313 CALL dbcsr_get_info(matrix=vec, nblkcols_local=nblk_local)
314 ! 4 times makes sure the table is big enough to limit collisions.
315 CALL hash_table_create(fast_vec_access%hash_table, 4*nblk_local)
316 ! include zero for effective dealing with values not in the hash table (will return 0)
317 ALLOCATE (fast_vec_access%blk_map(0:nblk_local))
318
319 ! sanity check
320 CALL dbcsr_get_info(matrix=vec, nblkrows_local=row)
321 IF (row .GT. 1) cpabort("BUG")
322
323 ! go through the blocks of the vector
324 iblock = 0
325 CALL dbcsr_iterator_start(iter, vec)
326 DO WHILE (dbcsr_iterator_blocks_left(iter))
327 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl)
328 iblock = iblock + 1
329 CALL hash_table_add(fast_vec_access%hash_table, col, iblock)
330 fast_vec_access%blk_map(iblock)%ptr => vec_bl
331 fast_vec_access%blk_map(iblock)%assigned_thread = mod(iblock, nthreads)
332 END DO
333 CALL dbcsr_iterator_stop(iter)
334
335 CALL timestop(handle)
336
337 END SUBROUTINE create_fast_row_vec_access
338
339! **************************************************************************************************
340!> \brief release all memory associated with the fast_vec_access type
341!> \param fast_vec_access ...
342! **************************************************************************************************
343 SUBROUTINE release_fast_vec_access(fast_vec_access)
344 TYPE(fast_vec_access_type) :: fast_vec_access
345
346 CHARACTER(LEN=*), PARAMETER :: routinen = 'release_fast_vec_access'
347
348 INTEGER :: handle
349
350 CALL timeset(routinen, handle)
351
352 CALL hash_table_release(fast_vec_access%hash_table)
353 DEALLOCATE (fast_vec_access%blk_map)
354
355 CALL timestop(handle)
356
357 END SUBROUTINE release_fast_vec_access
358
359! --------------------------------------------------------------------------------------------------
360! Beginning of hashtable.
361! this file can be 'INCLUDE'ed verbatim in various place, where it needs to be
362! part of the module to guarantee inlining
363! hashes (c,p) pairs, where p is assumed to be >0
364! on return (0 is used as a flag for not present)
365!
366!
367! **************************************************************************************************
368!> \brief finds a prime equal or larger than i, needed at creation
369!> \param i ...
370!> \return ...
371! **************************************************************************************************
372 FUNCTION matching_prime(i) RESULT(res)
373 INTEGER, INTENT(IN) :: i
374 INTEGER :: res
375
376 INTEGER :: j
377
378 res = i
379 j = 0
380 DO WHILE (j < res)
381 DO j = 2, res - 1
382 IF (mod(res, j) == 0) THEN
383 res = res + 1
384 EXIT
385 END IF
386 END DO
387 END DO
388 END FUNCTION
389
390! **************************************************************************************************
391!> \brief create a hash_table of given initial size.
392!> the hash table will expand as needed (but this requires rehashing)
393!> \param hash_table ...
394!> \param table_size ...
395! **************************************************************************************************
396 SUBROUTINE hash_table_create(hash_table, table_size)
397 TYPE(hash_table_type) :: hash_table
398 INTEGER, INTENT(IN) :: table_size
399
400 INTEGER :: j
401
402 ! guarantee a minimal hash table size (8), so that expansion works
403
404 j = 3
405 DO WHILE (2**j - 1 < table_size)
406 j = j + 1
407 END DO
408 hash_table%nmax = 2**j - 1
409 hash_table%prime = matching_prime(hash_table%nmax)
410 hash_table%nele = 0
411 ALLOCATE (hash_table%table(0:hash_table%nmax))
412 END SUBROUTINE hash_table_create
413
414! **************************************************************************************************
415!> \brief ...
416!> \param hash_table ...
417! **************************************************************************************************
418 SUBROUTINE hash_table_release(hash_table)
419 TYPE(hash_table_type) :: hash_table
420
421 hash_table%nmax = 0
422 hash_table%nele = 0
423 DEALLOCATE (hash_table%table)
424
425 END SUBROUTINE hash_table_release
426
427! **************************************************************************************************
428!> \brief add a pair (c,p) to the hash table
429!> \param hash_table ...
430!> \param c this value is being hashed
431!> \param p this is being stored
432! **************************************************************************************************
433 RECURSIVE SUBROUTINE hash_table_add(hash_table, c, p)
434 TYPE(hash_table_type), INTENT(INOUT) :: hash_table
435 INTEGER, INTENT(IN) :: c, p
436
437 REAL(kind=real_8), PARAMETER :: hash_table_expand = 1.5_real_8, &
438 inv_hash_table_fill = 2.5_real_8
439
440 INTEGER :: i, j
441 TYPE(ele_type), ALLOCATABLE, DIMENSION(:) :: tmp_hash
442
443 ! if too small, make a copy and rehash in a larger table
444
445 IF (hash_table%nele*inv_hash_table_fill > hash_table%nmax) THEN
446 ALLOCATE (tmp_hash(lbound(hash_table%table, 1):ubound(hash_table%table, 1)))
447 tmp_hash(:) = hash_table%table
448 CALL hash_table_release(hash_table)
449 CALL hash_table_create(hash_table, int((ubound(tmp_hash, 1) + 8)*hash_table_expand))
450 DO i = lbound(tmp_hash, 1), ubound(tmp_hash, 1)
451 IF (tmp_hash(i)%c .NE. 0) THEN
452 CALL hash_table_add(hash_table, tmp_hash(i)%c, tmp_hash(i)%p)
453 END IF
454 END DO
455 DEALLOCATE (tmp_hash)
456 END IF
457
458 hash_table%nele = hash_table%nele + 1
459 i = iand(c*hash_table%prime, hash_table%nmax)
460
461 DO j = i, hash_table%nmax
462 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
463 hash_table%table(j)%c = c
464 hash_table%table(j)%p = p
465 RETURN
466 END IF
467 END DO
468 DO j = 0, i - 1
469 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
470 hash_table%table(j)%c = c
471 hash_table%table(j)%p = p
472 RETURN
473 END IF
474 END DO
475
476 END SUBROUTINE hash_table_add
477
478! **************************************************************************************************
479!> \brief ...
480!> \param hash_table ...
481!> \param c ...
482!> \return ...
483! **************************************************************************************************
484 PURE FUNCTION hash_table_get(hash_table, c) RESULT(p)
485 TYPE(hash_table_type), INTENT(IN) :: hash_table
486 INTEGER, INTENT(IN) :: c
487 INTEGER :: p
488
489 INTEGER :: i, j
490
491 i = iand(c*hash_table%prime, hash_table%nmax)
492
493 ! catch the likely case first
494 IF (hash_table%table(i)%c == c) THEN
495 p = hash_table%table(i)%p
496 RETURN
497 END IF
498
499 DO j = i, hash_table%nmax
500 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
501 p = hash_table%table(j)%p
502 RETURN
503 END IF
504 END DO
505 DO j = 0, i - 1
506 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
507 p = hash_table%table(j)%p
508 RETURN
509 END IF
510 END DO
511
512 ! we should never reach this point.
513 p = huge(p)
514
515 END FUNCTION hash_table_get
516
517! End of hashtable
518! --------------------------------------------------------------------------------------------------
519
520! **************************************************************************************************
521!> \brief the real driver routine for the multiply, not all symmetries implemented yet
522!> \param matrix ...
523!> \param vec_in ...
524!> \param vec_out ...
525!> \param alpha ...
526!> \param beta ...
527!> \param work_row ...
528!> \param work_col ...
529! **************************************************************************************************
530 SUBROUTINE dbcsr_matrix_colvec_multiply(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
531 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
532 REAL(kind=real_8) :: alpha, beta
533 TYPE(dbcsr_type) :: work_row, work_col
534
535 CHARACTER :: matrix_type
536
537 CALL dbcsr_get_info(matrix, matrix_type=matrix_type)
538
539 SELECT CASE (matrix_type)
540 CASE (dbcsr_type_no_symmetry)
541 CALL dbcsr_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
542 CASE (dbcsr_type_symmetric)
543 CALL dbcsr_sym_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
544 CASE (dbcsr_type_antisymmetric)
545 ! Not yet implemented, should mainly be some prefactor magic, but who knows how antisymmetric matrices are stored???
546 cpabort("NYI, antisymmetric matrix not permitted")
547 CASE DEFAULT
548 cpabort("Unknown matrix type, ...")
549 END SELECT
550
551 END SUBROUTINE dbcsr_matrix_colvec_multiply
552
553! **************************************************************************************************
554!> \brief low level routines for matrix vector multiplies
555!> \param matrix ...
556!> \param vec_in ...
557!> \param vec_out ...
558!> \param alpha ...
559!> \param beta ...
560!> \param work_row ...
561!> \param work_col ...
562! **************************************************************************************************
563 SUBROUTINE dbcsr_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
564 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
565 REAL(kind=real_8) :: alpha, beta
566 TYPE(dbcsr_type) :: work_row, work_col
567
568 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_matrix_vector_mult'
569
570 INTEGER :: col, handle, handle1, ithread, mypcol, &
571 myprow, ncols, nrows, pcol, prow, &
572 prow_handle, row
573 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
574 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_res
575 TYPE(dbcsr_distribution_type) :: dist
576 TYPE(dbcsr_iterator_type) :: iter
577 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row
578 TYPE(mp_comm_type) :: prow_group
579
580 CALL timeset(routinen, handle)
581 ithread = 0
582
583 ! Collect some data about the parallel environment. We will use them later to move the vector around
584 CALL dbcsr_get_info(matrix, distribution=dist)
585 CALL dbcsr_distribution_get(dist, prow_group=prow_handle, myprow=myprow, mypcol=mypcol)
586
587 CALL prow_group%set_handle(prow_handle)
588
589 CALL create_fast_row_vec_access(work_row, fast_vec_row)
590 CALL create_fast_col_vec_access(work_col, fast_vec_col)
591
592 ! Transfer the correct parts of the input vector to the correct locations so we can do a local multiply
593 CALL dbcsr_col_vec_to_rep_row(vec_in, work_col, work_row, fast_vec_col)
594
595 ! Set the work vector for the results to 0
596 CALL dbcsr_set(work_col, 0.0_real_8)
597
598 ! Perform the local multiply. Here we exploit, that we have the blocks replicated on the mpi processes
599 ! It is important to note, that the input and result vector are distributed differently (row wise, col wise respectively)
600 CALL timeset(routinen//"_local_mm", handle1)
601
602!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,ithread,pcol,prow) &
603!$OMP SHARED(matrix,fast_vec_col,fast_vec_row)
604!$ ithread = omp_get_thread_num()
605 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
606 DO WHILE (dbcsr_iterator_blocks_left(iter))
607 CALL dbcsr_iterator_next_block(iter, row, col, data_d)
608 prow = hash_table_get(fast_vec_col%hash_table, row)
609 IF (fast_vec_col%blk_map(prow)%assigned_thread .NE. ithread) cycle
610 pcol = hash_table_get(fast_vec_row%hash_table, col)
611 IF (SIZE(fast_vec_col%blk_map(prow)%ptr, 1) .EQ. 0 .OR. &
612 SIZE(fast_vec_col%blk_map(prow)%ptr, 2) .EQ. 0 .OR. &
613 SIZE(data_d, 2) .EQ. 0) cycle
614 CALL dgemm('N', 'T', SIZE(fast_vec_col%blk_map(prow)%ptr, 1), &
615 SIZE(fast_vec_col%blk_map(prow)%ptr, 2), &
616 SIZE(data_d, 2), &
617 1.0_dp, &
618 data_d, &
619 SIZE(fast_vec_col%blk_map(prow)%ptr, 1), &
620 fast_vec_row%blk_map(pcol)%ptr, &
621 SIZE(fast_vec_col%blk_map(prow)%ptr, 2), &
622 1.0_dp, &
623 fast_vec_col%blk_map(prow)%ptr, &
624 SIZE(fast_vec_col%blk_map(prow)%ptr, 1))
625 END DO
626 CALL dbcsr_iterator_stop(iter)
627!$OMP END PARALLEL
628
629 CALL timestop(handle1)
630
631 ! sum all the data onto the first processor col where the original vector is stored
632 data_vec => dbcsr_get_data_p(work_col)
633 CALL dbcsr_get_info(matrix=work_col, nfullrows_local=nrows, nfullcols_local=ncols)
634 CALL prow_group%sum(data_vec(1:nrows*ncols))
635
636 ! Local copy on the first mpi col (as this is the localtion of the vec_res blocks) of the result vector
637 ! from the replicated to the original vector. Let's play it safe and use the iterator
638 CALL dbcsr_iterator_start(iter, vec_out)
639 DO WHILE (dbcsr_iterator_blocks_left(iter))
640 CALL dbcsr_iterator_next_block(iter, row, col, vec_res)
641 prow = hash_table_get(fast_vec_col%hash_table, row)
642 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
643 vec_res(:, :) = beta*vec_res(:, :) + alpha*fast_vec_col%blk_map(prow)%ptr(:, :)
644 ELSE
645 vec_res(:, :) = beta*vec_res(:, :)
646 END IF
647 END DO
648 CALL dbcsr_iterator_stop(iter)
649
650 CALL release_fast_vec_access(fast_vec_row)
651 CALL release_fast_vec_access(fast_vec_col)
652
653 CALL timestop(handle)
654
655 END SUBROUTINE dbcsr_matrix_vector_mult
656
657! **************************************************************************************************
658!> \brief ...
659!> \param matrix ...
660!> \param vec_in ...
661!> \param vec_out ...
662!> \param alpha ...
663!> \param beta ...
664!> \param work_row ...
665!> \param work_col ...
666!> \param skip_diag ...
667! **************************************************************************************************
668 SUBROUTINE dbcsr_matrixt_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col, skip_diag)
669 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
670 REAL(kind=real_8) :: alpha, beta
671 TYPE(dbcsr_type) :: work_row, work_col
672 LOGICAL :: skip_diag
673
674 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_matrixT_vector_mult'
675
676 INTEGER :: col, col_size, handle, handle1, ithread, &
677 mypcol, myprow, ncols, nrows, pcol, &
678 pcol_handle, prow, prow_handle, row, &
679 row_size
680 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
681 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_bl, vec_res
682 TYPE(dbcsr_distribution_type) :: dist
683 TYPE(dbcsr_iterator_type) :: iter
684 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row
685 TYPE(mp_comm_type) :: pcol_group, prow_group
686
687 CALL timeset(routinen, handle)
688 ithread = 0
689
690 ! Collect some data about the parallel environment. We will use them later to move the vector around
691 CALL dbcsr_get_info(matrix, distribution=dist)
692 CALL dbcsr_distribution_get(dist, prow_group=prow_handle, pcol_group=pcol_handle, myprow=myprow, mypcol=mypcol)
693
694 CALL prow_group%set_handle(prow_handle)
695 CALL pcol_group%set_handle(pcol_handle)
696
697 CALL create_fast_row_vec_access(work_row, fast_vec_row)
698 CALL create_fast_col_vec_access(work_col, fast_vec_col)
699
700 ! Set the work vector for the results to 0
701 CALL dbcsr_set(work_row, 0.0_real_8)
702
703 ! Transfer the correct parts of the input vector to the replicated vector on proc_col 0
704 CALL dbcsr_iterator_start(iter, vec_in)
705 DO WHILE (dbcsr_iterator_blocks_left(iter))
706 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl, row_size=row_size, col_size=col_size)
707 prow = hash_table_get(fast_vec_col%hash_table, row)
708 fast_vec_col%blk_map(prow)%ptr(1:row_size, 1:col_size) = vec_bl(1:row_size, 1:col_size)
709 END DO
710 CALL dbcsr_iterator_stop(iter)
711 ! Replicate the data on all processore in the row
712 data_vec => dbcsr_get_data_p(work_col)
713 CALL prow_group%bcast(data_vec, 0)
714
715 ! Perform the local multiply. Here it is obvious why the vectors are replicated on the mpi rows and cols
716 CALL timeset(routinen//"local_mm", handle1)
717 CALL dbcsr_get_info(matrix=work_col, nfullcols_local=ncols)
718!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,row_size,col_size,ithread,prow,pcol) &
719!$OMP SHARED(matrix,fast_vec_row,fast_vec_col,skip_diag,ncols)
720!$ ithread = omp_get_thread_num()
721 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
722 DO WHILE (dbcsr_iterator_blocks_left(iter))
723 CALL dbcsr_iterator_next_block(iter, row, col, data_d, row_size=row_size, col_size=col_size)
724 IF (skip_diag .AND. col == row) cycle
725 prow = hash_table_get(fast_vec_col%hash_table, row)
726 pcol = hash_table_get(fast_vec_row%hash_table, col)
727 IF (ASSOCIATED(fast_vec_row%blk_map(pcol)%ptr) .AND. &
728 ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
729 IF (fast_vec_row%blk_map(pcol)%assigned_thread .NE. ithread) cycle
730 fast_vec_row%blk_map(pcol)%ptr = fast_vec_row%blk_map(pcol)%ptr + &
731 matmul(transpose(fast_vec_col%blk_map(prow)%ptr), data_d)
732 ELSE
733 prow = hash_table_get(fast_vec_row%hash_table, row)
734 pcol = hash_table_get(fast_vec_col%hash_table, col)
735 IF (fast_vec_row%blk_map(prow)%assigned_thread .NE. ithread) cycle
736 fast_vec_row%blk_map(prow)%ptr = fast_vec_row%blk_map(prow)%ptr + &
737 matmul(transpose(fast_vec_col%blk_map(pcol)%ptr), transpose(data_d))
738 END IF
739 END DO
740 CALL dbcsr_iterator_stop(iter)
741!$OMP END PARALLEL
742
743 CALL timestop(handle1)
744
745 ! sum all the data within a processor column to obtain the replicated result
746 data_vec => dbcsr_get_data_p(work_row)
747 ! we use the replicated vector but the final answer is only summed to proc_col 0 for efficiency
748 CALL dbcsr_get_info(matrix=work_row, nfullrows_local=nrows, nfullcols_local=ncols)
749 CALL pcol_group%sum(data_vec(1:nrows*ncols))
750
751 ! Convert the result to a column wise distribution
752 CALL dbcsr_rep_row_to_rep_col_vec(work_col, work_row, fast_vec_row)
753
754 ! Create_the final vector by summing it to the result vector which lives on proc_col 0
755 CALL dbcsr_iterator_start(iter, vec_out)
756 DO WHILE (dbcsr_iterator_blocks_left(iter))
757 CALL dbcsr_iterator_next_block(iter, row, col, vec_res, row_size=row_size)
758 prow = hash_table_get(fast_vec_col%hash_table, row)
759 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
760 vec_res(:, :) = beta*vec_res(:, :) + alpha*fast_vec_col%blk_map(prow)%ptr(:, :)
761 ELSE
762 vec_res(:, :) = beta*vec_res(:, :)
763 END IF
764 END DO
765 CALL dbcsr_iterator_stop(iter)
766
767 CALL timestop(handle)
768
769 END SUBROUTINE dbcsr_matrixt_vector_mult
770
771! **************************************************************************************************
772!> \brief ...
773!> \param vec_in ...
774!> \param rep_col_vec ...
775!> \param rep_row_vec ...
776!> \param fast_vec_col ...
777! **************************************************************************************************
778 SUBROUTINE dbcsr_col_vec_to_rep_row(vec_in, rep_col_vec, rep_row_vec, fast_vec_col)
779 TYPE(dbcsr_type) :: vec_in, rep_col_vec, rep_row_vec
780 TYPE(fast_vec_access_type), INTENT(IN) :: fast_vec_col
781
782 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_col_vec_to_rep_row'
783
784 INTEGER :: col, handle, mypcol, myprow, ncols, &
785 nrows, pcol_handle, prow_handle, row
786 INTEGER, DIMENSION(:), POINTER :: local_cols, row_dist
787 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec, data_vec_rep
788 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_row
789 TYPE(dbcsr_distribution_type) :: dist_in, dist_rep_col
790 TYPE(dbcsr_iterator_type) :: iter
791 TYPE(mp_comm_type) :: pcol_group, prow_group
792
793 CALL timeset(routinen, handle)
794
795 ! get information about the parallel environment
796 CALL dbcsr_get_info(vec_in, distribution=dist_in)
797 CALL dbcsr_distribution_get(dist_in, &
798 prow_group=prow_handle, &
799 pcol_group=pcol_handle, &
800 myprow=myprow, &
801 mypcol=mypcol)
802
803 CALL prow_group%set_handle(prow_handle)
804 CALL pcol_group%set_handle(pcol_handle)
805
806 ! Get the vector which tells us which blocks are local to which processor row in the col vec
807 CALL dbcsr_get_info(rep_col_vec, distribution=dist_rep_col)
808 CALL dbcsr_distribution_get(dist_rep_col, row_dist=row_dist)
809
810 ! Copy the local vector to the replicated on the first processor column (this is where vec_in lives)
811 CALL dbcsr_get_info(matrix=rep_col_vec, nfullrows_local=nrows, nfullcols_local=ncols)
812 data_vec_rep => dbcsr_get_data_p(rep_col_vec)
813 data_vec => dbcsr_get_data_p(vec_in)
814 IF (mypcol == 0) data_vec_rep(1:nrows*ncols) = data_vec(1:nrows*ncols)
815 ! Replicate the data along the row
816 CALL prow_group%bcast(data_vec_rep(1:nrows*ncols), 0)
817
818 ! Here it gets a bit tricky as we are dealing with two different parallel layouts:
819 ! The rep_col_vec contains all blocks local to the row distribution of the vector.
820 ! The rep_row_vec only needs the fraction which is local to the col distribution.
821 ! However in most cases this won't the complete set of block which can be obtained from col_vector p_row i
822 ! Anyway, as the blocks don't repeat in the col_vec, a different fraction of the row vec will be available
823 ! on every replica in the processor column, by summing along the column we end up with the complete vector everywhere
824 ! Hope this clarifies the idea
825 CALL dbcsr_set(rep_row_vec, 0.0_real_8)
826 CALL dbcsr_get_info(matrix=rep_row_vec, nfullrows_local=nrows, local_cols=local_cols, nfullcols_local=ncols)
827 CALL dbcsr_iterator_start(iter, rep_row_vec)
828 DO WHILE (dbcsr_iterator_blocks_left(iter))
829 CALL dbcsr_iterator_next_block(iter, row, col, vec_row)
830 IF (row_dist(col) == myprow) THEN
831 vec_row = transpose(fast_vec_col%blk_map(hash_table_get(fast_vec_col%hash_table, col))%ptr)
832 END IF
833 END DO
834 CALL dbcsr_iterator_stop(iter)
835 CALL dbcsr_get_info(matrix=rep_row_vec, nfullrows_local=nrows, nfullcols_local=ncols)
836 data_vec_rep => dbcsr_get_data_p(rep_row_vec)
837 CALL pcol_group%sum(data_vec_rep(1:ncols*nrows))
838
839 CALL timestop(handle)
840
841 END SUBROUTINE dbcsr_col_vec_to_rep_row
842
843! **************************************************************************************************
844!> \brief ...
845!> \param rep_col_vec ...
846!> \param rep_row_vec ...
847!> \param fast_vec_row ...
848!> \param fast_vec_col_add ...
849! **************************************************************************************************
850 SUBROUTINE dbcsr_rep_row_to_rep_col_vec(rep_col_vec, rep_row_vec, fast_vec_row, fast_vec_col_add)
851 TYPE(dbcsr_type) :: rep_col_vec, rep_row_vec
852 TYPE(fast_vec_access_type) :: fast_vec_row
853 TYPE(fast_vec_access_type), OPTIONAL :: fast_vec_col_add
854
855 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_rep_row_to_rep_col_vec'
856
857 INTEGER :: col, handle, mypcol, myprow, ncols, &
858 nrows, prow_handle, row
859 INTEGER, DIMENSION(:), POINTER :: col_dist
860 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec_rep
861 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_col
862 TYPE(dbcsr_distribution_type) :: dist_rep_col, dist_rep_row
863 TYPE(dbcsr_iterator_type) :: iter
864 TYPE(mp_comm_type) :: prow_group
865
866 CALL timeset(routinen, handle)
867
868 ! get information about the parallel environment
869 CALL dbcsr_get_info(matrix=rep_col_vec, distribution=dist_rep_col)
870 CALL dbcsr_distribution_get(dist_rep_col, &
871 prow_group=prow_handle, &
872 myprow=myprow, &
873 mypcol=mypcol)
874
875 CALL prow_group%set_handle(prow_handle)
876
877 ! Get the vector which tells us which blocks are local to which processor col in the row vec
878 CALL dbcsr_get_info(matrix=rep_row_vec, distribution=dist_rep_row)
879 CALL dbcsr_distribution_get(dist_rep_row, col_dist=col_dist)
880
881 ! The same trick as described above with opposite direction
882 CALL dbcsr_set(rep_col_vec, 0.0_real_8)
883 CALL dbcsr_iterator_start(iter, rep_col_vec)
884 DO WHILE (dbcsr_iterator_blocks_left(iter))
885 CALL dbcsr_iterator_next_block(iter, row, col, vec_col)
886 IF (col_dist(row) == mypcol) THEN
887 vec_col = transpose(fast_vec_row%blk_map(hash_table_get(fast_vec_row%hash_table, row))%ptr)
888 END IF
889 ! this one is special and allows to add the elements of a not yet summed replicated
890 ! column vector as it appears in M*V(row_rep) as result. Save an parallel summation in the symmetric case
891 IF (PRESENT(fast_vec_col_add)) THEN
892 vec_col = vec_col + fast_vec_col_add%blk_map(hash_table_get(fast_vec_col_add%hash_table, row))%ptr(:, :)
893 END IF
894 END DO
895 CALL dbcsr_iterator_stop(iter)
896 CALL dbcsr_get_info(matrix=rep_col_vec, nfullrows_local=nrows, nfullcols_local=ncols)
897 data_vec_rep => dbcsr_get_data_p(rep_col_vec)
898 CALL prow_group%sum(data_vec_rep(1:nrows*ncols))
899
900 CALL timestop(handle)
901
902 END SUBROUTINE dbcsr_rep_row_to_rep_col_vec
903
904! **************************************************************************************************
905!> \brief ...
906!> \param matrix ...
907!> \param vec_in ...
908!> \param vec_out ...
909!> \param alpha ...
910!> \param beta ...
911!> \param work_row ...
912!> \param work_col ...
913! **************************************************************************************************
914 SUBROUTINE dbcsr_sym_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
915 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
916 REAL(kind=real_8) :: alpha, beta
917 TYPE(dbcsr_type) :: work_row, work_col
918
919 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_sym_matrix_vector_mult'
920
921 INTEGER :: col, handle, handle1, ithread, mypcol, &
922 myprow, ncols, nrows, pcol, &
923 pcol_handle, prow, row, rpcol, rprow, &
924 vec_dim
925 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
926 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_res
927 TYPE(dbcsr_distribution_type) :: dist
928 TYPE(dbcsr_iterator_type) :: iter
929 TYPE(dbcsr_type) :: result_col, result_row
930 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row, &
931 res_fast_vec_col, res_fast_vec_row
932 TYPE(mp_comm_type) :: pcol_group
933
934 CALL timeset(routinen, handle)
935 ithread = 0
936 ! We need some work matrices as we try to exploit operations on the replicated vectors which are duplicated otherwise
937 CALL dbcsr_get_info(matrix=vec_in, nfullcols_total=vec_dim)
938 ! This is a performance hack as the new creation of a replicated vector is a fair bit more expensive
939 CALL dbcsr_set(work_col, 0.0_real_8)
940 CALL dbcsr_copy(result_col, work_col)
941 CALL dbcsr_set(work_row, 0.0_real_8)
942 CALL dbcsr_copy(result_row, work_row)
943
944 ! Collect some data about the parallel environment. We will use them later to move the vector around
945 CALL dbcsr_get_info(matrix=matrix, distribution=dist)
946 CALL dbcsr_distribution_get(dist, pcol_group=pcol_handle, myprow=myprow, mypcol=mypcol)
947
948 CALL pcol_group%set_handle(pcol_handle)
949
950 CALL create_fast_row_vec_access(work_row, fast_vec_row)
951 CALL create_fast_col_vec_access(work_col, fast_vec_col)
952 CALL create_fast_row_vec_access(result_row, res_fast_vec_row)
953 CALL create_fast_col_vec_access(result_col, res_fast_vec_col)
954
955 ! Transfer the correct parts of the input vector to the correct locations so we can do a local multiply
956 CALL dbcsr_col_vec_to_rep_row(vec_in, work_col, work_row, fast_vec_col)
957
958 ! Probably I should rename the routine above as it delivers both the replicated row and column vector
959
960 ! Perform the local multiply. Here we exploit, that we have the blocks replicated on the mpi processes
961 ! It is important to note, that the input and result vector are distributed differently (row wise, col wise respectively)
962 CALL timeset(routinen//"_local_mm", handle1)
963
964 !------ perform the multiplication, we have to take car to take the correct blocks ----------
965
966!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,ithread,pcol,prow,rpcol,rprow) &
967!$OMP SHARED(matrix,fast_vec_row,res_fast_vec_col,res_fast_vec_row,fast_vec_col)
968!$ ithread = omp_get_thread_num()
969 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
970 DO WHILE (dbcsr_iterator_blocks_left(iter))
971 CALL dbcsr_iterator_next_block(iter, row, col, data_d)
972 pcol = hash_table_get(fast_vec_row%hash_table, col)
973 rprow = hash_table_get(res_fast_vec_col%hash_table, row)
974 IF (ASSOCIATED(fast_vec_row%blk_map(pcol)%ptr) .AND. &
975 ASSOCIATED(res_fast_vec_col%blk_map(rprow)%ptr)) THEN
976 IF (res_fast_vec_col%blk_map(rprow)%assigned_thread .EQ. ithread) THEN
977 res_fast_vec_col%blk_map(rprow)%ptr = res_fast_vec_col%blk_map(rprow)%ptr + &
978 matmul(data_d, transpose(fast_vec_row%blk_map(pcol)%ptr))
979 END IF
980 prow = hash_table_get(fast_vec_col%hash_table, row)
981 rpcol = hash_table_get(res_fast_vec_row%hash_table, col)
982 IF (res_fast_vec_row%blk_map(rpcol)%assigned_thread .EQ. ithread .AND. row .NE. col) THEN
983 res_fast_vec_row%blk_map(rpcol)%ptr = res_fast_vec_row%blk_map(rpcol)%ptr + &
984 matmul(transpose(fast_vec_col%blk_map(prow)%ptr), data_d)
985 END IF
986 ELSE
987 rpcol = hash_table_get(res_fast_vec_col%hash_table, col)
988 prow = hash_table_get(fast_vec_row%hash_table, row)
989 IF (res_fast_vec_col%blk_map(rpcol)%assigned_thread .EQ. ithread) THEN
990 res_fast_vec_col%blk_map(rpcol)%ptr = res_fast_vec_col%blk_map(rpcol)%ptr + &
991 transpose(matmul(fast_vec_row%blk_map(prow)%ptr, data_d))
992 END IF
993 rprow = hash_table_get(res_fast_vec_row%hash_table, row)
994 pcol = hash_table_get(fast_vec_col%hash_table, col)
995 IF (res_fast_vec_row%blk_map(rprow)%assigned_thread .EQ. ithread .AND. row .NE. col) THEN
996 res_fast_vec_row%blk_map(rprow)%ptr = res_fast_vec_row%blk_map(rprow)%ptr + &
997 transpose(matmul(data_d, fast_vec_col%blk_map(pcol)%ptr))
998 END IF
999 END IF
1000 END DO
1001 CALL dbcsr_iterator_stop(iter)
1002!$OMP END PARALLEL
1003
1004 CALL timestop(handle1)
1005
1006 ! sum all the data within a processor column to obtain the replicated result from lower
1007 data_vec => dbcsr_get_data_p(result_row)
1008 CALL dbcsr_get_info(matrix=result_row, nfullrows_local=nrows, nfullcols_local=ncols)
1009
1010 CALL pcol_group%sum(data_vec(1:nrows*ncols))
1011
1012 ! Convert the results to a column wise distribution, this is a bit involved as the result_row is fully replicated
1013 ! While the result_col still has the partial results in parallel. The routine below takes care of that and saves a
1014 ! parallel summation. Of the res_row vectors are created only taking the appropriate element (0 otherwise) while the res_col
1015 ! parallel bits are locally added. The sum magically creates the correct vector
1016 CALL dbcsr_rep_row_to_rep_col_vec(work_col, result_row, res_fast_vec_row, res_fast_vec_col)
1017
1018 ! Create_the final vector by summing it to the result vector which lives on proc_col 0 lower
1019 CALL dbcsr_iterator_start(iter, vec_out)
1020 DO WHILE (dbcsr_iterator_blocks_left(iter))
1021 CALL dbcsr_iterator_next_block(iter, row, col, vec_res)
1022 prow = hash_table_get(fast_vec_col%hash_table, row)
1023 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
1024 vec_res(:, :) = beta*vec_res(:, :) + alpha*(fast_vec_col%blk_map(prow)%ptr(:, :))
1025 ELSE
1026 vec_res(:, :) = beta*vec_res(:, :)
1027 END IF
1028 END DO
1029 CALL dbcsr_iterator_stop(iter)
1030
1031 CALL release_fast_vec_access(fast_vec_row)
1032 CALL release_fast_vec_access(fast_vec_col)
1033 CALL release_fast_vec_access(res_fast_vec_row)
1034 CALL release_fast_vec_access(res_fast_vec_col)
1035
1036 CALL dbcsr_release(result_row); CALL dbcsr_release(result_col)
1037
1038 CALL timestop(handle)
1039
1040 END SUBROUTINE dbcsr_sym_matrix_vector_mult
1041
1042END MODULE arnoldi_vector
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
operations for skinny matrices/vectors expressed in dbcsr form
subroutine, public create_replicated_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
creates a col vector like object whose blocks can be replicated along the processor row and has the s...
subroutine, public create_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
creates a dbcsr row vector like object which lives on proc_row 0 and has the same row dist as the tem...
subroutine, public create_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
creates a dbcsr col vector like object which lives on proc_col 0 and has the same row dist as the tem...
subroutine, public create_replicated_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
creates a row vector like object whose blocks can be replicated along the processor col and has the s...
subroutine, public dbcsr_matrix_colvec_multiply(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
the real driver routine for the multiply, not all symmetries implemented yet
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
real(kind=dp) function, dimension(:), pointer, public dbcsr_get_data_p(matrix, lb, ub)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_set(matrix, alpha)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
subroutine, public dbcsr_reserve_all_blocks(matrix)
Reserves all blocks.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public real_8
Definition kinds.F:41
Interface to the message passing library MPI.