(git:da6e80d)
Loading...
Searching...
No Matches
arnoldi_vector.F
Go to the documentation of this file.
1!--------------------------------------------------------------------------------------------------!
2! CP2K: A general program to perform molecular dynamics simulations !
3! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4! !
5! SPDX-License-Identifier: GPL-2.0-or-later !
6!--------------------------------------------------------------------------------------------------!
7
8! **************************************************************************************************
9!> \brief operations for skinny matrices/vectors expressed in dbcsr form
10!> \par History
11!> 2014.10 created [Florian Schiffmann]
12!> 2023.12 Removed support for single-precision [Ole Schuett]
13!> 2024.12 Removed support for complex input matrices [Ole Schuett]
14!> \author Florian Schiffmann
15! **************************************************************************************************
17 USE cp_dbcsr_api, ONLY: &
22 dbcsr_type_antisymmetric, dbcsr_type_no_symmetry, dbcsr_type_symmetric
24 USE kinds, ONLY: dp,&
25 real_8
27#include "../base/base_uses.f90"
28
29!$ USE OMP_LIB, ONLY: omp_get_max_threads, omp_get_thread_num, omp_get_num_threads
30
31 IMPLICIT NONE
32
33 PRIVATE
34
35 CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'arnoldi_vector'
36
37! **************************************************************************************************
38!> \brief Types needed for the hashtable.
39! **************************************************************************************************
40 TYPE ele_type
41 INTEGER :: c = 0
42 INTEGER :: p = 0
43 END TYPE ele_type
44
45 TYPE hash_table_type
46 TYPE(ele_type), DIMENSION(:), POINTER :: table => null()
47 INTEGER :: nele = 0
48 INTEGER :: nmax = 0
49 INTEGER :: prime = 0
50 END TYPE hash_table_type
51
52! **************************************************************************************************
53!> \brief Types needed for fast access to distributed dbcsr vectors.
54! **************************************************************************************************
55 TYPE block_ptr
56 REAL(real_8), DIMENSION(:, :), POINTER :: ptr => null()
57 INTEGER :: assigned_thread = -1
58 END TYPE block_ptr
59
60 TYPE fast_vec_access_type
61 TYPE(hash_table_type) :: hash_table = hash_table_type()
62 TYPE(block_ptr), DIMENSION(:), ALLOCATABLE :: blk_map
63 END TYPE
64
70
71CONTAINS
72
73! **************************************************************************************************
74!> \brief creates a dbcsr col vector like object which lives on proc_col 0
75!> and has the same row dist as the template matrix
76!> the returned matrix is fully allocated and all blocks are set to 0
77!> this is not a sparse object (and must never be)
78!> \param dbcsr_vec the vector object to create must be allocated but not initialized
79!> \param matrix a dbcsr matrix used as template
80!> \param ncol number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
81! **************************************************************************************************
82 SUBROUTINE create_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
83 TYPE(dbcsr_type) :: dbcsr_vec, matrix
84 INTEGER :: ncol
85
86 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_col_vec_from_matrix'
87
88 INTEGER :: handle, npcols
89 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
90 row_dist
91 TYPE(dbcsr_distribution_type) :: dist, dist_col_vec
92
93 CALL timeset(routinen, handle)
94
95 CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, distribution=dist)
96 CALL dbcsr_distribution_get(dist, npcols=npcols, row_dist=row_dist)
97
98 ALLOCATE (col_dist(1), col_blk_size(1))
99 col_dist(1) = 0
100 col_blk_size(1) = ncol
101 CALL dbcsr_distribution_new(dist_col_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
102
103 CALL dbcsr_create(dbcsr_vec, "D", dist_col_vec, matrix_type=dbcsr_type_no_symmetry, &
104 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
105 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
106
107 CALL dbcsr_distribution_release(dist_col_vec)
108 DEALLOCATE (col_dist, col_blk_size)
109 CALL timestop(handle)
110
111 END SUBROUTINE create_col_vec_from_matrix
112
113! **************************************************************************************************
114!> \brief creates a dbcsr row vector like object which lives on proc_row 0
115!> and has the same row dist as the template matrix
116!> the returned matrix is fully allocated and all blocks are set to 0
117!> this is not a sparse object (and must never be)
118!> \param dbcsr_vec ...
119!> \param matrix a dbcsr matrix used as template
120!> \param nrow number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
121! **************************************************************************************************
122 SUBROUTINE create_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
123 TYPE(dbcsr_type) :: dbcsr_vec, matrix
124 INTEGER :: nrow
125
126 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_row_vec_from_matrix'
127
128 INTEGER :: handle, nprows
129 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
130 row_dist
131 TYPE(dbcsr_distribution_type) :: dist, dist_row_vec
132
133 CALL timeset(routinen, handle)
134
135 CALL dbcsr_get_info(matrix, col_blk_size=col_blk_size, distribution=dist)
136 CALL dbcsr_distribution_get(dist, nprows=nprows, col_dist=col_dist)
137
138 ALLOCATE (row_dist(1), row_blk_size(1))
139 row_dist(1) = 0
140 row_blk_size(1) = nrow
141 CALL dbcsr_distribution_new(dist_row_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
142
143 CALL dbcsr_create(dbcsr_vec, "D", dist_row_vec, matrix_type=dbcsr_type_no_symmetry, &
144 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
145 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
146
147 CALL dbcsr_distribution_release(dist_row_vec)
148 DEALLOCATE (row_dist, row_blk_size)
149 CALL timestop(handle)
150
151 END SUBROUTINE create_row_vec_from_matrix
152
153! **************************************************************************************************
154!> \brief creates a col vector like object whose blocks can be replicated
155!> along the processor row and has the same row dist as the template matrix
156!> the returned matrix is fully allocated and all blocks are set to 0
157!> this is not a sparse object (and must never be)
158!> \param dbcsr_vec the vector object to create must be allocated but not initialized
159!> \param matrix a dbcsr matrix used as template
160!> \param ncol number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
161! **************************************************************************************************
162 SUBROUTINE create_replicated_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
163 TYPE(dbcsr_type) :: dbcsr_vec, matrix
164 INTEGER :: ncol
165
166 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_replicated_col_vec_from_matrix'
167
168 INTEGER :: handle, i, npcols
169 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
170 row_dist
171 TYPE(dbcsr_distribution_type) :: dist, dist_col_vec
172
173 CALL timeset(routinen, handle)
174
175 CALL dbcsr_get_info(matrix, row_blk_size=row_blk_size, distribution=dist)
176 CALL dbcsr_distribution_get(dist, npcols=npcols, row_dist=row_dist)
177
178 ALLOCATE (col_dist(npcols), col_blk_size(npcols))
179 col_blk_size(:) = ncol
180 DO i = 0, npcols - 1
181 col_dist(i + 1) = i
182 END DO
183 CALL dbcsr_distribution_new(dist_col_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
184
185 CALL dbcsr_create(dbcsr_vec, "D", dist_col_vec, matrix_type=dbcsr_type_no_symmetry, &
186 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
187 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
188
189 CALL dbcsr_distribution_release(dist_col_vec)
190 DEALLOCATE (col_dist, col_blk_size)
191 CALL timestop(handle)
192
194
195! **************************************************************************************************
196!> \brief creates a row vector like object whose blocks can be replicated
197!> along the processor col and has the same col dist as the template matrix
198!> the returned matrix is fully allocated and all blocks are set to 0
199!> this is not a sparse object (and must never be)
200!> \param dbcsr_vec the vector object to create must be allocated but not initialized
201!> \param matrix a dbcsr matrix used as template
202!> \param nrow number of vectors in the dbcsr_typeect (1 for vector, n for skinny matrix)
203! **************************************************************************************************
204 SUBROUTINE create_replicated_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
205 TYPE(dbcsr_type) :: dbcsr_vec, matrix
206 INTEGER :: nrow
207
208 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_replicated_row_vec_from_matrix'
209
210 INTEGER :: handle, i, nprows
211 INTEGER, DIMENSION(:), POINTER :: col_blk_size, col_dist, row_blk_size, &
212 row_dist
213 TYPE(dbcsr_distribution_type) :: dist, dist_row_vec
214
215 CALL timeset(routinen, handle)
216
217 CALL dbcsr_get_info(matrix, distribution=dist, col_blk_size=col_blk_size)
218 CALL dbcsr_distribution_get(dist, nprows=nprows, col_dist=col_dist)
219
220 ALLOCATE (row_dist(nprows), row_blk_size(nprows))
221 row_blk_size(:) = nrow
222 DO i = 0, nprows - 1
223 row_dist(i + 1) = i
224 END DO
225 CALL dbcsr_distribution_new(dist_row_vec, template=dist, row_dist=row_dist, col_dist=col_dist)
226
227 CALL dbcsr_create(dbcsr_vec, "D", dist_row_vec, dbcsr_type_no_symmetry, &
228 row_blk_size=row_blk_size, col_blk_size=col_blk_size)
229 CALL dbcsr_reserve_all_blocks(dbcsr_vec)
230
231 CALL dbcsr_distribution_release(dist_row_vec)
232 DEALLOCATE (row_dist, row_blk_size)
233 CALL timestop(handle)
234
236
237! **************************************************************************************************
238!> \brief given a column vector, prepare the fast_vec_access container
239!> \param vec ...
240!> \param fast_vec_access ...
241! **************************************************************************************************
242 SUBROUTINE create_fast_col_vec_access(vec, fast_vec_access)
243 TYPE(dbcsr_type) :: vec
244 TYPE(fast_vec_access_type) :: fast_vec_access
245
246 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_fast_col_vec_access'
247
248 INTEGER :: col, handle, iblock, nblk_local, &
249 nthreads, row
250 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_bl
251 TYPE(dbcsr_iterator_type) :: iter
252
253 CALL timeset(routinen, handle)
254
255 ! figure out the number of threads
256 nthreads = 1
257!$OMP PARALLEL DEFAULT(NONE) SHARED(nthreads)
258!$OMP MASTER
259!$ nthreads = OMP_GET_NUM_THREADS()
260!$OMP END MASTER
261!$OMP END PARALLEL
262
263 CALL dbcsr_get_info(matrix=vec, nblkrows_local=nblk_local)
264 ! 4 times makes sure the table is big enough to limit collisions.
265 CALL hash_table_create(fast_vec_access%hash_table, 4*nblk_local)
266 ! include zero for effective dealing with values not in the hash table (will return 0)
267 ALLOCATE (fast_vec_access%blk_map(0:nblk_local))
268
269 CALL dbcsr_get_info(matrix=vec, nblkcols_local=col)
270 IF (col .GT. 1) cpabort("BUG")
271
272 ! go through the blocks of the vector
273 iblock = 0
274 CALL dbcsr_iterator_start(iter, vec)
275 DO WHILE (dbcsr_iterator_blocks_left(iter))
276 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl)
277 iblock = iblock + 1
278 CALL hash_table_add(fast_vec_access%hash_table, row, iblock)
279 fast_vec_access%blk_map(iblock)%ptr => vec_bl
280 fast_vec_access%blk_map(iblock)%assigned_thread = mod(iblock, nthreads)
281 END DO
282 CALL dbcsr_iterator_stop(iter)
283 CALL timestop(handle)
284
285 END SUBROUTINE create_fast_col_vec_access
286
287! **************************************************************************************************
288!> \brief given a row vector, prepare the fast_vec_access_container
289!> \param vec ...
290!> \param fast_vec_access ...
291! **************************************************************************************************
292 SUBROUTINE create_fast_row_vec_access(vec, fast_vec_access)
293 TYPE(dbcsr_type) :: vec
294 TYPE(fast_vec_access_type) :: fast_vec_access
295
296 CHARACTER(LEN=*), PARAMETER :: routinen = 'create_fast_row_vec_access'
297
298 INTEGER :: col, handle, iblock, nblk_local, &
299 nthreads, row
300 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_bl
301 TYPE(dbcsr_iterator_type) :: iter
302
303 CALL timeset(routinen, handle)
304
305 ! figure out the number of threads
306 nthreads = 1
307!$OMP PARALLEL DEFAULT(NONE) SHARED(nthreads)
308!$OMP MASTER
309!$ nthreads = OMP_GET_NUM_THREADS()
310!$OMP END MASTER
311!$OMP END PARALLEL
312
313 CALL dbcsr_get_info(matrix=vec, nblkcols_local=nblk_local)
314 ! 4 times makes sure the table is big enough to limit collisions.
315 CALL hash_table_create(fast_vec_access%hash_table, 4*nblk_local)
316 ! include zero for effective dealing with values not in the hash table (will return 0)
317 ALLOCATE (fast_vec_access%blk_map(0:nblk_local))
318
319 ! sanity check
320 CALL dbcsr_get_info(matrix=vec, nblkrows_local=row)
321 IF (row .GT. 1) cpabort("BUG")
322
323 ! go through the blocks of the vector
324 iblock = 0
325 CALL dbcsr_iterator_start(iter, vec)
326 DO WHILE (dbcsr_iterator_blocks_left(iter))
327 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl)
328 iblock = iblock + 1
329 CALL hash_table_add(fast_vec_access%hash_table, col, iblock)
330 fast_vec_access%blk_map(iblock)%ptr => vec_bl
331 fast_vec_access%blk_map(iblock)%assigned_thread = mod(iblock, nthreads)
332 END DO
333 CALL dbcsr_iterator_stop(iter)
334
335 CALL timestop(handle)
336
337 END SUBROUTINE create_fast_row_vec_access
338
339! **************************************************************************************************
340!> \brief release all memory associated with the fast_vec_access type
341!> \param fast_vec_access ...
342! **************************************************************************************************
343 SUBROUTINE release_fast_vec_access(fast_vec_access)
344 TYPE(fast_vec_access_type) :: fast_vec_access
345
346 CHARACTER(LEN=*), PARAMETER :: routinen = 'release_fast_vec_access'
347
348 INTEGER :: handle
349
350 CALL timeset(routinen, handle)
351
352 CALL hash_table_release(fast_vec_access%hash_table)
353 DEALLOCATE (fast_vec_access%blk_map)
354
355 CALL timestop(handle)
356
357 END SUBROUTINE release_fast_vec_access
358
359! --------------------------------------------------------------------------------------------------
360! Beginning of hashtable.
361! this file can be 'INCLUDE'ed verbatim in various place, where it needs to be
362! part of the module to guarantee inlining
363! hashes (c,p) pairs, where p is assumed to be >0
364! on return (0 is used as a flag for not present)
365!
366!
367! **************************************************************************************************
368!> \brief finds a prime equal or larger than i, needed at creation
369!> \param i ...
370!> \return ...
371! **************************************************************************************************
372 FUNCTION matching_prime(i) RESULT(res)
373 INTEGER, INTENT(IN) :: i
374 INTEGER :: res
375
376 INTEGER :: j
377
378 res = i
379 j = 0
380 DO WHILE (j < res)
381 DO j = 2, res - 1
382 IF (mod(res, j) == 0) THEN
383 res = res + 1
384 EXIT
385 END IF
386 END DO
387 END DO
388 END FUNCTION
389
390! **************************************************************************************************
391!> \brief create a hash_table of given initial size.
392!> the hash table will expand as needed (but this requires rehashing)
393!> \param hash_table ...
394!> \param table_size ...
395! **************************************************************************************************
396 SUBROUTINE hash_table_create(hash_table, table_size)
397 TYPE(hash_table_type) :: hash_table
398 INTEGER, INTENT(IN) :: table_size
399
400 INTEGER :: j
401
402 ! guarantee a minimal hash table size (8), so that expansion works
403
404 j = 3
405 DO WHILE (2**j - 1 < table_size)
406 j = j + 1
407 END DO
408 hash_table%nmax = 2**j - 1
409 hash_table%prime = matching_prime(hash_table%nmax)
410 hash_table%nele = 0
411 ALLOCATE (hash_table%table(0:hash_table%nmax))
412 END SUBROUTINE hash_table_create
413
414! **************************************************************************************************
415!> \brief ...
416!> \param hash_table ...
417! **************************************************************************************************
418 SUBROUTINE hash_table_release(hash_table)
419 TYPE(hash_table_type) :: hash_table
420
421 hash_table%nmax = 0
422 hash_table%nele = 0
423 DEALLOCATE (hash_table%table)
424
425 END SUBROUTINE hash_table_release
426
427! **************************************************************************************************
428!> \brief add a pair (c,p) to the hash table
429!> \param hash_table ...
430!> \param c this value is being hashed
431!> \param p this is being stored
432! **************************************************************************************************
433 RECURSIVE SUBROUTINE hash_table_add(hash_table, c, p)
434 TYPE(hash_table_type), INTENT(INOUT) :: hash_table
435 INTEGER, INTENT(IN) :: c, p
436
437 REAL(kind=real_8), PARAMETER :: hash_table_expand = 1.5_real_8, &
438 inv_hash_table_fill = 2.5_real_8
439
440 INTEGER :: i, j
441 TYPE(ele_type), ALLOCATABLE, DIMENSION(:) :: tmp_hash
442
443 ! if too small, make a copy and rehash in a larger table
444
445 IF (hash_table%nele*inv_hash_table_fill > hash_table%nmax) THEN
446 ALLOCATE (tmp_hash(lbound(hash_table%table, 1):ubound(hash_table%table, 1)))
447 tmp_hash(:) = hash_table%table
448 CALL hash_table_release(hash_table)
449 CALL hash_table_create(hash_table, int((ubound(tmp_hash, 1) + 8)*hash_table_expand))
450 DO i = lbound(tmp_hash, 1), ubound(tmp_hash, 1)
451 IF (tmp_hash(i)%c .NE. 0) THEN
452 CALL hash_table_add(hash_table, tmp_hash(i)%c, tmp_hash(i)%p)
453 END IF
454 END DO
455 DEALLOCATE (tmp_hash)
456 END IF
457
458 hash_table%nele = hash_table%nele + 1
459 i = iand(c*hash_table%prime, hash_table%nmax)
460
461 DO j = i, hash_table%nmax
462 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
463 hash_table%table(j)%c = c
464 hash_table%table(j)%p = p
465 RETURN
466 END IF
467 END DO
468 DO j = 0, i - 1
469 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
470 hash_table%table(j)%c = c
471 hash_table%table(j)%p = p
472 RETURN
473 END IF
474 END DO
475
476 END SUBROUTINE hash_table_add
477
478! **************************************************************************************************
479!> \brief ...
480!> \param hash_table ...
481!> \param c ...
482!> \return ...
483! **************************************************************************************************
484 PURE FUNCTION hash_table_get(hash_table, c) RESULT(p)
485 TYPE(hash_table_type), INTENT(IN) :: hash_table
486 INTEGER, INTENT(IN) :: c
487 INTEGER :: p
488
489 INTEGER :: i, j
490
491 i = iand(c*hash_table%prime, hash_table%nmax)
492
493 ! catch the likely case first
494 IF (hash_table%table(i)%c == c) THEN
495 p = hash_table%table(i)%p
496 RETURN
497 END IF
498
499 DO j = i, hash_table%nmax
500 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
501 p = hash_table%table(j)%p
502 RETURN
503 END IF
504 END DO
505 DO j = 0, i - 1
506 IF (hash_table%table(j)%c == 0 .OR. hash_table%table(j)%c == c) THEN
507 p = hash_table%table(j)%p
508 RETURN
509 END IF
510 END DO
511
512 ! we should never reach this point.
513 p = huge(p)
514
515 END FUNCTION hash_table_get
516
517! End of hashtable
518! --------------------------------------------------------------------------------------------------
519
520! **************************************************************************************************
521!> \brief the real driver routine for the multiply, not all symmetries implemented yet
522!> \param matrix ...
523!> \param vec_in ...
524!> \param vec_out ...
525!> \param alpha ...
526!> \param beta ...
527!> \param work_row ...
528!> \param work_col ...
529! **************************************************************************************************
530 SUBROUTINE dbcsr_matrix_colvec_multiply(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
531 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
532 REAL(kind=real_8) :: alpha, beta
533 TYPE(dbcsr_type) :: work_row, work_col
534
535 CHARACTER :: matrix_type
536
537 CALL dbcsr_get_info(matrix, matrix_type=matrix_type)
538
539 SELECT CASE (matrix_type)
540 CASE (dbcsr_type_no_symmetry)
541 CALL dbcsr_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
542 CASE (dbcsr_type_symmetric)
543 CALL dbcsr_sym_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
544 CASE (dbcsr_type_antisymmetric)
545 ! Not yet implemented, should mainly be some prefactor magic, but who knows how antisymmetric matrices are stored???
546 cpabort("NYI, antisymmetric matrix not permitted")
547 CASE DEFAULT
548 cpabort("Unknown matrix type, ...")
549 END SELECT
550
551 END SUBROUTINE dbcsr_matrix_colvec_multiply
552
553! **************************************************************************************************
554!> \brief low level routines for matrix vector multiplies
555!> \param matrix ...
556!> \param vec_in ...
557!> \param vec_out ...
558!> \param alpha ...
559!> \param beta ...
560!> \param work_row ...
561!> \param work_col ...
562! **************************************************************************************************
563 SUBROUTINE dbcsr_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
564 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
565 REAL(kind=real_8) :: alpha, beta
566 TYPE(dbcsr_type) :: work_row, work_col
567
568 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_matrix_vector_mult'
569
570 INTEGER :: col, handle, handle1, ithread, k, m, &
571 mypcol, myprow, n, ncols, nrows, pcol, &
572 prow, prow_handle, row
573 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
574 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_res
575 TYPE(dbcsr_distribution_type) :: dist
576 TYPE(dbcsr_iterator_type) :: iter
577 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row
578 TYPE(mp_comm_type) :: prow_group
579
580 CALL timeset(routinen, handle)
581 ithread = 0
582
583 ! Collect some data about the parallel environment. We will use them later to move the vector around
584 CALL dbcsr_get_info(matrix, distribution=dist)
585 CALL dbcsr_distribution_get(dist, prow_group=prow_handle, myprow=myprow, mypcol=mypcol)
586
587 CALL prow_group%set_handle(prow_handle)
588
589 CALL create_fast_row_vec_access(work_row, fast_vec_row)
590 CALL create_fast_col_vec_access(work_col, fast_vec_col)
591
592 ! Transfer the correct parts of the input vector to the correct locations so we can do a local multiply
593 CALL dbcsr_col_vec_to_rep_row(vec_in, work_col, work_row, fast_vec_col)
594
595 ! Set the work vector for the results to 0
596 CALL dbcsr_set(work_col, 0.0_real_8)
597
598 ! Perform the local multiply. Here we exploit, that we have the blocks replicated on the mpi processes
599 ! It is important to note, that the input and result vector are distributed differently (row wise, col wise respectively)
600 CALL timeset(routinen//"_local_mm", handle1)
601
602!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,ithread,pcol,prow,m,n,k) &
603!$OMP SHARED(matrix,fast_vec_col,fast_vec_row)
604!$ ithread = omp_get_thread_num()
605 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
606 DO WHILE (dbcsr_iterator_blocks_left(iter))
607 CALL dbcsr_iterator_next_block(iter, row, col, data_d)
608 prow = hash_table_get(fast_vec_col%hash_table, row)
609 IF (fast_vec_col%blk_map(prow)%assigned_thread .NE. ithread) cycle
610 m = SIZE(fast_vec_col%blk_map(prow)%ptr, 1)
611 n = SIZE(fast_vec_col%blk_map(prow)%ptr, 2)
612 k = SIZE(data_d, 2)
613 IF ((m .EQ. 0) .OR. (n .EQ. 0) .OR. (k .EQ. 0)) cycle
614 cpassert(n .EQ. 1)
615 pcol = hash_table_get(fast_vec_row%hash_table, col)
616 CALL dgemm('N', 'T', m, n, k, &
617 1.0_dp, data_d, m, &
618 fast_vec_row%blk_map(pcol)%ptr, n, &
619 1.0_dp, &
620 fast_vec_col%blk_map(prow)%ptr, m)
621 END DO
622 CALL dbcsr_iterator_stop(iter)
623!$OMP END PARALLEL
624
625 CALL timestop(handle1)
626
627 ! sum all the data onto the first processor col where the original vector is stored
628 data_vec => dbcsr_get_data_p(work_col)
629 CALL dbcsr_get_info(matrix=work_col, nfullrows_local=nrows, nfullcols_local=ncols)
630 CALL prow_group%sum(data_vec(1:nrows*ncols))
631
632 ! Local copy on the first mpi col (as this is the localtion of the vec_res blocks) of the result vector
633 ! from the replicated to the original vector. Let's play it safe and use the iterator
634 CALL dbcsr_iterator_start(iter, vec_out)
635 DO WHILE (dbcsr_iterator_blocks_left(iter))
636 CALL dbcsr_iterator_next_block(iter, row, col, vec_res)
637 prow = hash_table_get(fast_vec_col%hash_table, row)
638 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
639 vec_res(:, :) = beta*vec_res(:, :) + alpha*fast_vec_col%blk_map(prow)%ptr(:, :)
640 ELSE
641 vec_res(:, :) = beta*vec_res(:, :)
642 END IF
643 END DO
644 CALL dbcsr_iterator_stop(iter)
645
646 CALL release_fast_vec_access(fast_vec_row)
647 CALL release_fast_vec_access(fast_vec_col)
648
649 CALL timestop(handle)
650
651 END SUBROUTINE dbcsr_matrix_vector_mult
652
653! **************************************************************************************************
654!> \brief ...
655!> \param matrix ...
656!> \param vec_in ...
657!> \param vec_out ...
658!> \param alpha ...
659!> \param beta ...
660!> \param work_row ...
661!> \param work_col ...
662!> \param skip_diag ...
663! **************************************************************************************************
664 SUBROUTINE dbcsr_matrixt_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col, skip_diag)
665 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
666 REAL(kind=real_8) :: alpha, beta
667 TYPE(dbcsr_type) :: work_row, work_col
668 LOGICAL :: skip_diag
669
670 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_matrixT_vector_mult'
671
672 INTEGER :: col, col_size, handle, handle1, ithread, &
673 mypcol, myprow, ncols, nrows, pcol, &
674 pcol_handle, prow, prow_handle, row, &
675 row_size
676 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
677 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_bl, vec_res
678 TYPE(dbcsr_distribution_type) :: dist
679 TYPE(dbcsr_iterator_type) :: iter
680 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row
681 TYPE(mp_comm_type) :: pcol_group, prow_group
682
683 CALL timeset(routinen, handle)
684 ithread = 0
685
686 ! Collect some data about the parallel environment. We will use them later to move the vector around
687 CALL dbcsr_get_info(matrix, distribution=dist)
688 CALL dbcsr_distribution_get(dist, prow_group=prow_handle, pcol_group=pcol_handle, myprow=myprow, mypcol=mypcol)
689
690 CALL prow_group%set_handle(prow_handle)
691 CALL pcol_group%set_handle(pcol_handle)
692
693 CALL create_fast_row_vec_access(work_row, fast_vec_row)
694 CALL create_fast_col_vec_access(work_col, fast_vec_col)
695
696 ! Set the work vector for the results to 0
697 CALL dbcsr_set(work_row, 0.0_real_8)
698
699 ! Transfer the correct parts of the input vector to the replicated vector on proc_col 0
700 CALL dbcsr_iterator_start(iter, vec_in)
701 DO WHILE (dbcsr_iterator_blocks_left(iter))
702 CALL dbcsr_iterator_next_block(iter, row, col, vec_bl, row_size=row_size, col_size=col_size)
703 prow = hash_table_get(fast_vec_col%hash_table, row)
704 fast_vec_col%blk_map(prow)%ptr(1:row_size, 1:col_size) = vec_bl(1:row_size, 1:col_size)
705 END DO
706 CALL dbcsr_iterator_stop(iter)
707 ! Replicate the data on all processore in the row
708 data_vec => dbcsr_get_data_p(work_col)
709 CALL prow_group%bcast(data_vec, 0)
710
711 ! Perform the local multiply. Here it is obvious why the vectors are replicated on the mpi rows and cols
712 CALL timeset(routinen//"local_mm", handle1)
713 CALL dbcsr_get_info(matrix=work_col, nfullcols_local=ncols)
714!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,row_size,col_size,ithread,prow,pcol) &
715!$OMP SHARED(matrix,fast_vec_row,fast_vec_col,skip_diag,ncols)
716!$ ithread = omp_get_thread_num()
717 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
718 DO WHILE (dbcsr_iterator_blocks_left(iter))
719 CALL dbcsr_iterator_next_block(iter, row, col, data_d, row_size=row_size, col_size=col_size)
720 IF (skip_diag .AND. col == row) cycle
721 prow = hash_table_get(fast_vec_col%hash_table, row)
722 pcol = hash_table_get(fast_vec_row%hash_table, col)
723 IF (ASSOCIATED(fast_vec_row%blk_map(pcol)%ptr) .AND. &
724 ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
725 IF (fast_vec_row%blk_map(pcol)%assigned_thread .NE. ithread) cycle
726 fast_vec_row%blk_map(pcol)%ptr = fast_vec_row%blk_map(pcol)%ptr + &
727 matmul(transpose(fast_vec_col%blk_map(prow)%ptr), data_d)
728 ELSE
729 prow = hash_table_get(fast_vec_row%hash_table, row)
730 pcol = hash_table_get(fast_vec_col%hash_table, col)
731 IF (fast_vec_row%blk_map(prow)%assigned_thread .NE. ithread) cycle
732 fast_vec_row%blk_map(prow)%ptr = fast_vec_row%blk_map(prow)%ptr + &
733 matmul(transpose(fast_vec_col%blk_map(pcol)%ptr), transpose(data_d))
734 END IF
735 END DO
736 CALL dbcsr_iterator_stop(iter)
737!$OMP END PARALLEL
738
739 CALL timestop(handle1)
740
741 ! sum all the data within a processor column to obtain the replicated result
742 data_vec => dbcsr_get_data_p(work_row)
743 ! we use the replicated vector but the final answer is only summed to proc_col 0 for efficiency
744 CALL dbcsr_get_info(matrix=work_row, nfullrows_local=nrows, nfullcols_local=ncols)
745 CALL pcol_group%sum(data_vec(1:nrows*ncols))
746
747 ! Convert the result to a column wise distribution
748 CALL dbcsr_rep_row_to_rep_col_vec(work_col, work_row, fast_vec_row)
749
750 ! Create_the final vector by summing it to the result vector which lives on proc_col 0
751 CALL dbcsr_iterator_start(iter, vec_out)
752 DO WHILE (dbcsr_iterator_blocks_left(iter))
753 CALL dbcsr_iterator_next_block(iter, row, col, vec_res, row_size=row_size)
754 prow = hash_table_get(fast_vec_col%hash_table, row)
755 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
756 vec_res(:, :) = beta*vec_res(:, :) + alpha*fast_vec_col%blk_map(prow)%ptr(:, :)
757 ELSE
758 vec_res(:, :) = beta*vec_res(:, :)
759 END IF
760 END DO
761 CALL dbcsr_iterator_stop(iter)
762
763 CALL timestop(handle)
764
765 END SUBROUTINE dbcsr_matrixt_vector_mult
766
767! **************************************************************************************************
768!> \brief ...
769!> \param vec_in ...
770!> \param rep_col_vec ...
771!> \param rep_row_vec ...
772!> \param fast_vec_col ...
773! **************************************************************************************************
774 SUBROUTINE dbcsr_col_vec_to_rep_row(vec_in, rep_col_vec, rep_row_vec, fast_vec_col)
775 TYPE(dbcsr_type) :: vec_in, rep_col_vec, rep_row_vec
776 TYPE(fast_vec_access_type), INTENT(IN) :: fast_vec_col
777
778 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_col_vec_to_rep_row'
779
780 INTEGER :: col, handle, mypcol, myprow, ncols, &
781 nrows, pcol_handle, prow_handle, row
782 INTEGER, DIMENSION(:), POINTER :: local_cols, row_dist
783 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec, data_vec_rep
784 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_row
785 TYPE(dbcsr_distribution_type) :: dist_in, dist_rep_col
786 TYPE(dbcsr_iterator_type) :: iter
787 TYPE(mp_comm_type) :: pcol_group, prow_group
788
789 CALL timeset(routinen, handle)
790
791 ! get information about the parallel environment
792 CALL dbcsr_get_info(vec_in, distribution=dist_in)
793 CALL dbcsr_distribution_get(dist_in, &
794 prow_group=prow_handle, &
795 pcol_group=pcol_handle, &
796 myprow=myprow, &
797 mypcol=mypcol)
798
799 CALL prow_group%set_handle(prow_handle)
800 CALL pcol_group%set_handle(pcol_handle)
801
802 ! Get the vector which tells us which blocks are local to which processor row in the col vec
803 CALL dbcsr_get_info(rep_col_vec, distribution=dist_rep_col)
804 CALL dbcsr_distribution_get(dist_rep_col, row_dist=row_dist)
805
806 ! Copy the local vector to the replicated on the first processor column (this is where vec_in lives)
807 CALL dbcsr_get_info(matrix=rep_col_vec, nfullrows_local=nrows, nfullcols_local=ncols)
808 data_vec_rep => dbcsr_get_data_p(rep_col_vec)
809 data_vec => dbcsr_get_data_p(vec_in)
810 IF (mypcol == 0) data_vec_rep(1:nrows*ncols) = data_vec(1:nrows*ncols)
811 ! Replicate the data along the row
812 CALL prow_group%bcast(data_vec_rep(1:nrows*ncols), 0)
813
814 ! Here it gets a bit tricky as we are dealing with two different parallel layouts:
815 ! The rep_col_vec contains all blocks local to the row distribution of the vector.
816 ! The rep_row_vec only needs the fraction which is local to the col distribution.
817 ! However in most cases this won't the complete set of block which can be obtained from col_vector p_row i
818 ! Anyway, as the blocks don't repeat in the col_vec, a different fraction of the row vec will be available
819 ! on every replica in the processor column, by summing along the column we end up with the complete vector everywhere
820 ! Hope this clarifies the idea
821 CALL dbcsr_set(rep_row_vec, 0.0_real_8)
822 CALL dbcsr_get_info(matrix=rep_row_vec, nfullrows_local=nrows, local_cols=local_cols, nfullcols_local=ncols)
823 CALL dbcsr_iterator_start(iter, rep_row_vec)
824 DO WHILE (dbcsr_iterator_blocks_left(iter))
825 CALL dbcsr_iterator_next_block(iter, row, col, vec_row)
826 IF (row_dist(col) == myprow) THEN
827 vec_row = transpose(fast_vec_col%blk_map(hash_table_get(fast_vec_col%hash_table, col))%ptr)
828 END IF
829 END DO
830 CALL dbcsr_iterator_stop(iter)
831 CALL dbcsr_get_info(matrix=rep_row_vec, nfullrows_local=nrows, nfullcols_local=ncols)
832 data_vec_rep => dbcsr_get_data_p(rep_row_vec)
833 CALL pcol_group%sum(data_vec_rep(1:ncols*nrows))
834
835 CALL timestop(handle)
836
837 END SUBROUTINE dbcsr_col_vec_to_rep_row
838
839! **************************************************************************************************
840!> \brief ...
841!> \param rep_col_vec ...
842!> \param rep_row_vec ...
843!> \param fast_vec_row ...
844!> \param fast_vec_col_add ...
845! **************************************************************************************************
846 SUBROUTINE dbcsr_rep_row_to_rep_col_vec(rep_col_vec, rep_row_vec, fast_vec_row, fast_vec_col_add)
847 TYPE(dbcsr_type) :: rep_col_vec, rep_row_vec
848 TYPE(fast_vec_access_type) :: fast_vec_row
849 TYPE(fast_vec_access_type), OPTIONAL :: fast_vec_col_add
850
851 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_rep_row_to_rep_col_vec'
852
853 INTEGER :: col, handle, mypcol, myprow, ncols, &
854 nrows, prow_handle, row
855 INTEGER, DIMENSION(:), POINTER :: col_dist
856 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec_rep
857 REAL(kind=real_8), DIMENSION(:, :), POINTER :: vec_col
858 TYPE(dbcsr_distribution_type) :: dist_rep_col, dist_rep_row
859 TYPE(dbcsr_iterator_type) :: iter
860 TYPE(mp_comm_type) :: prow_group
861
862 CALL timeset(routinen, handle)
863
864 ! get information about the parallel environment
865 CALL dbcsr_get_info(matrix=rep_col_vec, distribution=dist_rep_col)
866 CALL dbcsr_distribution_get(dist_rep_col, &
867 prow_group=prow_handle, &
868 myprow=myprow, &
869 mypcol=mypcol)
870
871 CALL prow_group%set_handle(prow_handle)
872
873 ! Get the vector which tells us which blocks are local to which processor col in the row vec
874 CALL dbcsr_get_info(matrix=rep_row_vec, distribution=dist_rep_row)
875 CALL dbcsr_distribution_get(dist_rep_row, col_dist=col_dist)
876
877 ! The same trick as described above with opposite direction
878 CALL dbcsr_set(rep_col_vec, 0.0_real_8)
879 CALL dbcsr_iterator_start(iter, rep_col_vec)
880 DO WHILE (dbcsr_iterator_blocks_left(iter))
881 CALL dbcsr_iterator_next_block(iter, row, col, vec_col)
882 IF (col_dist(row) == mypcol) THEN
883 vec_col = transpose(fast_vec_row%blk_map(hash_table_get(fast_vec_row%hash_table, row))%ptr)
884 END IF
885 ! this one is special and allows to add the elements of a not yet summed replicated
886 ! column vector as it appears in M*V(row_rep) as result. Save an parallel summation in the symmetric case
887 IF (PRESENT(fast_vec_col_add)) THEN
888 vec_col = vec_col + fast_vec_col_add%blk_map(hash_table_get(fast_vec_col_add%hash_table, row))%ptr(:, :)
889 END IF
890 END DO
891 CALL dbcsr_iterator_stop(iter)
892 CALL dbcsr_get_info(matrix=rep_col_vec, nfullrows_local=nrows, nfullcols_local=ncols)
893 data_vec_rep => dbcsr_get_data_p(rep_col_vec)
894 CALL prow_group%sum(data_vec_rep(1:nrows*ncols))
895
896 CALL timestop(handle)
897
898 END SUBROUTINE dbcsr_rep_row_to_rep_col_vec
899
900! **************************************************************************************************
901!> \brief ...
902!> \param matrix ...
903!> \param vec_in ...
904!> \param vec_out ...
905!> \param alpha ...
906!> \param beta ...
907!> \param work_row ...
908!> \param work_col ...
909! **************************************************************************************************
910 SUBROUTINE dbcsr_sym_matrix_vector_mult(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
911 TYPE(dbcsr_type) :: matrix, vec_in, vec_out
912 REAL(kind=real_8) :: alpha, beta
913 TYPE(dbcsr_type) :: work_row, work_col
914
915 CHARACTER(LEN=*), PARAMETER :: routinen = 'dbcsr_sym_matrix_vector_mult'
916
917 INTEGER :: col, handle, handle1, ithread, mypcol, &
918 myprow, ncols, nrows, pcol, &
919 pcol_handle, prow, row, rpcol, rprow, &
920 vec_dim
921 REAL(kind=real_8), DIMENSION(:), POINTER :: data_vec
922 REAL(kind=real_8), DIMENSION(:, :), POINTER :: data_d, vec_res
923 TYPE(dbcsr_distribution_type) :: dist
924 TYPE(dbcsr_iterator_type) :: iter
925 TYPE(dbcsr_type) :: result_col, result_row
926 TYPE(fast_vec_access_type) :: fast_vec_col, fast_vec_row, &
927 res_fast_vec_col, res_fast_vec_row
928 TYPE(mp_comm_type) :: pcol_group
929
930 CALL timeset(routinen, handle)
931 ithread = 0
932 ! We need some work matrices as we try to exploit operations on the replicated vectors which are duplicated otherwise
933 CALL dbcsr_get_info(matrix=vec_in, nfullcols_total=vec_dim)
934 ! This is a performance hack as the new creation of a replicated vector is a fair bit more expensive
935 CALL dbcsr_set(work_col, 0.0_real_8)
936 CALL dbcsr_copy(result_col, work_col)
937 CALL dbcsr_set(work_row, 0.0_real_8)
938 CALL dbcsr_copy(result_row, work_row)
939
940 ! Collect some data about the parallel environment. We will use them later to move the vector around
941 CALL dbcsr_get_info(matrix=matrix, distribution=dist)
942 CALL dbcsr_distribution_get(dist, pcol_group=pcol_handle, myprow=myprow, mypcol=mypcol)
943
944 CALL pcol_group%set_handle(pcol_handle)
945
946 CALL create_fast_row_vec_access(work_row, fast_vec_row)
947 CALL create_fast_col_vec_access(work_col, fast_vec_col)
948 CALL create_fast_row_vec_access(result_row, res_fast_vec_row)
949 CALL create_fast_col_vec_access(result_col, res_fast_vec_col)
950
951 ! Transfer the correct parts of the input vector to the correct locations so we can do a local multiply
952 CALL dbcsr_col_vec_to_rep_row(vec_in, work_col, work_row, fast_vec_col)
953
954 ! Probably I should rename the routine above as it delivers both the replicated row and column vector
955
956 ! Perform the local multiply. Here we exploit, that we have the blocks replicated on the mpi processes
957 ! It is important to note, that the input and result vector are distributed differently (row wise, col wise respectively)
958 CALL timeset(routinen//"_local_mm", handle1)
959
960 !------ perform the multiplication, we have to take car to take the correct blocks ----------
961
962!$OMP PARALLEL DEFAULT(NONE) PRIVATE(row,col,iter,data_d,ithread,pcol,prow,rpcol,rprow) &
963!$OMP SHARED(matrix,fast_vec_row,res_fast_vec_col,res_fast_vec_row,fast_vec_col)
964!$ ithread = omp_get_thread_num()
965 CALL dbcsr_iterator_start(iter, matrix, shared=.false.)
966 DO WHILE (dbcsr_iterator_blocks_left(iter))
967 CALL dbcsr_iterator_next_block(iter, row, col, data_d)
968 pcol = hash_table_get(fast_vec_row%hash_table, col)
969 rprow = hash_table_get(res_fast_vec_col%hash_table, row)
970 IF (ASSOCIATED(fast_vec_row%blk_map(pcol)%ptr) .AND. &
971 ASSOCIATED(res_fast_vec_col%blk_map(rprow)%ptr)) THEN
972 IF (res_fast_vec_col%blk_map(rprow)%assigned_thread .EQ. ithread) THEN
973 res_fast_vec_col%blk_map(rprow)%ptr = res_fast_vec_col%blk_map(rprow)%ptr + &
974 matmul(data_d, transpose(fast_vec_row%blk_map(pcol)%ptr))
975 END IF
976 prow = hash_table_get(fast_vec_col%hash_table, row)
977 rpcol = hash_table_get(res_fast_vec_row%hash_table, col)
978 IF (res_fast_vec_row%blk_map(rpcol)%assigned_thread .EQ. ithread .AND. row .NE. col) THEN
979 res_fast_vec_row%blk_map(rpcol)%ptr = res_fast_vec_row%blk_map(rpcol)%ptr + &
980 matmul(transpose(fast_vec_col%blk_map(prow)%ptr), data_d)
981 END IF
982 ELSE
983 rpcol = hash_table_get(res_fast_vec_col%hash_table, col)
984 prow = hash_table_get(fast_vec_row%hash_table, row)
985 IF (res_fast_vec_col%blk_map(rpcol)%assigned_thread .EQ. ithread) THEN
986 res_fast_vec_col%blk_map(rpcol)%ptr = res_fast_vec_col%blk_map(rpcol)%ptr + &
987 transpose(matmul(fast_vec_row%blk_map(prow)%ptr, data_d))
988 END IF
989 rprow = hash_table_get(res_fast_vec_row%hash_table, row)
990 pcol = hash_table_get(fast_vec_col%hash_table, col)
991 IF (res_fast_vec_row%blk_map(rprow)%assigned_thread .EQ. ithread .AND. row .NE. col) THEN
992 res_fast_vec_row%blk_map(rprow)%ptr = res_fast_vec_row%blk_map(rprow)%ptr + &
993 transpose(matmul(data_d, fast_vec_col%blk_map(pcol)%ptr))
994 END IF
995 END IF
996 END DO
997 CALL dbcsr_iterator_stop(iter)
998!$OMP END PARALLEL
999
1000 CALL timestop(handle1)
1001
1002 ! sum all the data within a processor column to obtain the replicated result from lower
1003 data_vec => dbcsr_get_data_p(result_row)
1004 CALL dbcsr_get_info(matrix=result_row, nfullrows_local=nrows, nfullcols_local=ncols)
1005
1006 CALL pcol_group%sum(data_vec(1:nrows*ncols))
1007
1008 ! Convert the results to a column wise distribution, this is a bit involved as the result_row is fully replicated
1009 ! While the result_col still has the partial results in parallel. The routine below takes care of that and saves a
1010 ! parallel summation. Of the res_row vectors are created only taking the appropriate element (0 otherwise) while the res_col
1011 ! parallel bits are locally added. The sum magically creates the correct vector
1012 CALL dbcsr_rep_row_to_rep_col_vec(work_col, result_row, res_fast_vec_row, res_fast_vec_col)
1013
1014 ! Create_the final vector by summing it to the result vector which lives on proc_col 0 lower
1015 CALL dbcsr_iterator_start(iter, vec_out)
1016 DO WHILE (dbcsr_iterator_blocks_left(iter))
1017 CALL dbcsr_iterator_next_block(iter, row, col, vec_res)
1018 prow = hash_table_get(fast_vec_col%hash_table, row)
1019 IF (ASSOCIATED(fast_vec_col%blk_map(prow)%ptr)) THEN
1020 vec_res(:, :) = beta*vec_res(:, :) + alpha*(fast_vec_col%blk_map(prow)%ptr(:, :))
1021 ELSE
1022 vec_res(:, :) = beta*vec_res(:, :)
1023 END IF
1024 END DO
1025 CALL dbcsr_iterator_stop(iter)
1026
1027 CALL release_fast_vec_access(fast_vec_row)
1028 CALL release_fast_vec_access(fast_vec_col)
1029 CALL release_fast_vec_access(res_fast_vec_row)
1030 CALL release_fast_vec_access(res_fast_vec_col)
1031
1032 CALL dbcsr_release(result_row); CALL dbcsr_release(result_col)
1033
1034 CALL timestop(handle)
1035
1036 END SUBROUTINE dbcsr_sym_matrix_vector_mult
1037
1038END MODULE arnoldi_vector
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
operations for skinny matrices/vectors expressed in dbcsr form
subroutine, public create_replicated_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
creates a col vector like object whose blocks can be replicated along the processor row and has the s...
subroutine, public create_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
creates a dbcsr row vector like object which lives on proc_row 0 and has the same row dist as the tem...
subroutine, public create_col_vec_from_matrix(dbcsr_vec, matrix, ncol)
creates a dbcsr col vector like object which lives on proc_col 0 and has the same row dist as the tem...
subroutine, public create_replicated_row_vec_from_matrix(dbcsr_vec, matrix, nrow)
creates a row vector like object whose blocks can be replicated along the processor col and has the s...
subroutine, public dbcsr_matrix_colvec_multiply(matrix, vec_in, vec_out, alpha, beta, work_row, work_col)
the real driver routine for the multiply, not all symmetries implemented yet
subroutine, public dbcsr_distribution_release(dist)
...
subroutine, public dbcsr_distribution_new(dist, template, group, pgrid, row_dist, col_dist, reuse_arrays)
...
subroutine, public dbcsr_iterator_next_block(iterator, row, column, block, block_number_argument_has_been_removed, row_size, col_size, row_offset, col_offset)
...
logical function, public dbcsr_iterator_blocks_left(iterator)
...
subroutine, public dbcsr_iterator_stop(iterator)
...
subroutine, public dbcsr_copy(matrix_b, matrix_a, name, keep_sparsity, keep_imaginary)
...
subroutine, public dbcsr_get_info(matrix, nblkrows_total, nblkcols_total, nfullrows_total, nfullcols_total, nblkrows_local, nblkcols_local, nfullrows_local, nfullcols_local, my_prow, my_pcol, local_rows, local_cols, proc_row_dist, proc_col_dist, row_blk_size, col_blk_size, row_blk_offset, col_blk_offset, distribution, name, matrix_type, group)
...
real(kind=dp) function, dimension(:), pointer, public dbcsr_get_data_p(matrix, lb, ub)
...
subroutine, public dbcsr_iterator_start(iterator, matrix, shared, dynamic, dynamic_byrows)
...
subroutine, public dbcsr_set(matrix, alpha)
...
subroutine, public dbcsr_release(matrix)
...
subroutine, public dbcsr_distribution_get(dist, row_dist, col_dist, nrows, ncols, has_threads, group, mynode, numnodes, nprows, npcols, myprow, mypcol, pgrid, subgroups_defined, prow_group, pcol_group)
...
subroutine, public dbcsr_reserve_all_blocks(matrix)
Reserves all blocks.
Defines the basic variable types.
Definition kinds.F:23
integer, parameter, public dp
Definition kinds.F:34
integer, parameter, public real_8
Definition kinds.F:41
Interface to the message passing library MPI.