27 USE iso_c_binding,
ONLY: c_double,&
41 #include "../base/base_uses.f90"
53 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'pw_gpu'
54 LOGICAL,
PARAMETER,
PRIVATE :: debug_this_module = .false.
65 SUBROUTINE pw_gpu_init_c()
BIND(C, name="pw_gpu_init")
66 END SUBROUTINE pw_gpu_init_c
70 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
84 SUBROUTINE pw_gpu_finalize_c()
BIND(C, name="pw_gpu_finalize")
85 END SUBROUTINE pw_gpu_finalize_c
89 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
90 CALL pw_gpu_finalize_c()
104 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw1
105 TYPE(pw_c1d_gs_type),
INTENT(INOUT) :: pw2
106 REAL(kind=
dp),
INTENT(IN) :: scale
108 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_r3dc1d_3d'
110 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
111 INTEGER :: handle, l1, l2, l3, ngpts
112 INTEGER,
DIMENSION(:),
POINTER :: npts
113 INTEGER,
POINTER :: ptr_ghatmap
114 REAL(kind=
dp),
POINTER :: ptr_pwin
116 SUBROUTINE pw_gpu_cfffg_c(din, zout, ghatmap, npts, ngpts, scale)
BIND(C, name="pw_gpu_cfffg")
118 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
119 TYPE(c_ptr),
VALUE :: zout
120 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
121 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
122 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: ngpts
123 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
125 END SUBROUTINE pw_gpu_cfffg_c
128 CALL timeset(routinen, handle)
130 ngpts =
SIZE(pw2%pw_grid%gsq)
131 l1 = lbound(pw1%array, 1)
132 l2 = lbound(pw1%array, 2)
133 l3 = lbound(pw1%array, 3)
134 npts => pw1%pw_grid%npts
137 ptr_pwin => pw1%array(l1, l2, l3)
138 ptr_pwout => pw2%array(1)
141 ptr_ghatmap => pw2%pw_grid%g_hatmap(1, 1)
144 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
145 CALL pw_gpu_cfffg_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, ngpts, scale)
148 cpabort(
"Compiled without pw offloading.")
151 CALL timestop(handle)
162 TYPE(pw_c1d_gs_type),
INTENT(IN) :: pw1
163 TYPE(pw_r3d_rs_type),
INTENT(INOUT) :: pw2
164 REAL(kind=
dp),
INTENT(IN) :: scale
166 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_c1dr3d_3d'
168 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
169 INTEGER :: handle, l1, l2, l3, ngpts, nmaps
170 INTEGER,
DIMENSION(:),
POINTER :: npts
171 INTEGER,
POINTER :: ptr_ghatmap
172 REAL(kind=
dp),
POINTER :: ptr_pwout
174 SUBROUTINE pw_gpu_sfffc_c(zin, dout, ghatmap, npts, ngpts, nmaps, scale)
BIND(C, name="pw_gpu_sfffc")
176 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
177 TYPE(c_ptr),
VALUE :: dout
178 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
179 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
180 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: ngpts, nmaps
181 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
182 END SUBROUTINE pw_gpu_sfffc_c
185 CALL timeset(routinen, handle)
187 ngpts =
SIZE(pw1%pw_grid%gsq)
188 l1 = lbound(pw2%array, 1)
189 l2 = lbound(pw2%array, 2)
190 l3 = lbound(pw2%array, 3)
191 npts => pw1%pw_grid%npts
194 ptr_pwin => pw1%array(1)
195 ptr_pwout => pw2%array(l1, l2, l3)
198 nmaps =
SIZE(pw1%pw_grid%g_hatmap, 2)
199 ptr_ghatmap => pw1%pw_grid%g_hatmap(1, 1)
202 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
203 CALL pw_gpu_sfffc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, ngpts, nmaps, scale)
206 cpabort(
"Compiled without pw offloading")
209 CALL timestop(handle)
220 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw1
221 TYPE(pw_c1d_gs_type),
INTENT(INOUT) :: pw2
222 REAL(kind=
dp),
INTENT(IN) :: scale
224 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_r3dc1d_3d_ps'
226 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: grays, pbuf, qbuf, rbuf, sbuf
227 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER :: tbuf
228 INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, &
229 mz2, n1, n2, ngpts, nmax, numtask, &
230 numtask_g, numtask_r, rp
231 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
232 INTEGER,
DIMENSION(2) :: r_dim, r_pos
233 INTEGER,
DIMENSION(:),
POINTER :: n, nloc, nyzray
234 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: bo
237 TYPE(mp_cart_type) :: rs_group
238 TYPE(mp_comm_type) :: gs_group
240 CALL timeset(routinen, handle)
243 n => pw1%pw_grid%npts
244 nloc => pw1%pw_grid%npts_local
245 grays => pw1%pw_grid%grays
246 ngpts = nloc(1)*nloc(2)*nloc(3)
249 IF (pw1%pw_grid%para%ray_distribution)
THEN
250 gs_group = pw1%pw_grid%para%group
251 rs_group = pw1%pw_grid%para%rs_group
252 nyzray => pw1%pw_grid%para%nyzray
253 bo => pw1%pw_grid%para%bo
255 numtask_g = gs_group%num_pe
256 g_pos = gs_group%mepos
257 numtask_r = rs_group%num_pe
258 r_dim = rs_group%num_pe_cart
259 r_pos = rs_group%mepos_cart
260 IF (numtask_g /= numtask_r)
THEN
261 cpabort(
"Real space and G space groups are different.")
268 lmax = max(lg, (ngpts/mmax + 1))
270 ALLOCATE (p2p(0:numtask - 1))
272 CALL gs_group%rank_compare(rs_group, p2p)
275 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
276 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
277 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
278 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
279 nmax = max((2*n2)/numtask, 2)*mx2*mz2
280 nmax = max(nmax, n1*maxval(nyzray))
282 fft_scratch_size%nx = nloc(1)
283 fft_scratch_size%ny = nloc(2)
284 fft_scratch_size%nz = nloc(3)
285 fft_scratch_size%lmax = lmax
286 fft_scratch_size%mmax = mmax
287 fft_scratch_size%mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
288 fft_scratch_size%mx2 = mx2
289 fft_scratch_size%my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
290 fft_scratch_size%mz2 = mz2
291 fft_scratch_size%lg = lg
292 fft_scratch_size%mg = mg
293 fft_scratch_size%nbx = maxval(bo(2, 1, :, 2))
294 fft_scratch_size%nbz = maxval(bo(2, 3, :, 2))
295 fft_scratch_size%mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
296 fft_scratch_size%mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
297 fft_scratch_size%mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
298 fft_scratch_size%nmax = nmax
299 fft_scratch_size%nmray = maxval(nyzray)
300 fft_scratch_size%nyzray = nyzray(g_pos)
301 fft_scratch_size%gs_group = gs_group
302 fft_scratch_size%rs_group = rs_group
303 fft_scratch_size%g_pos = g_pos
304 fft_scratch_size%r_pos = r_pos
305 fft_scratch_size%r_dim = r_dim
306 fft_scratch_size%numtask = numtask
308 IF (r_dim(2) > 1)
THEN
314 cpabort(
"This processor distribution is not supported.")
316 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
319 qbuf => fft_scratch%p2buf
320 rbuf => fft_scratch%p3buf
321 pbuf => fft_scratch%p4buf
322 sbuf => fft_scratch%p5buf
325 CALL pw_gpu_cf(pw1, qbuf)
328 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
334 CALL pw_gpu_f(rbuf, pbuf, +1, n(2), mx2*mz2)
337 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
338 bo(:, :, :, 2), sbuf, fft_scratch)
341 CALL pw_gpu_fg(sbuf, pw2, scale)
352 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
355 tbuf => fft_scratch%tbuf
356 sbuf => fft_scratch%r1buf
359 CALL pw_gpu_cff(pw1, tbuf)
362 CALL yz_to_x(tbuf, gs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
363 bo(:, :, :, 2), sbuf, fft_scratch)
366 CALL pw_gpu_fg(sbuf, pw2, scale)
376 cpabort(
"Not implemented (no ray_distr.) in: pw_gpu_r3dc1d_3d_ps.")
381 CALL timestop(handle)
392 TYPE(pw_c1d_gs_type),
INTENT(IN) :: pw1
393 TYPE(pw_r3d_rs_type),
INTENT(INOUT) :: pw2
394 REAL(kind=
dp),
INTENT(IN) :: scale
396 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_c1dr3d_3d_ps'
398 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: grays, pbuf, qbuf, rbuf, sbuf
399 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER :: tbuf
400 INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, &
401 mz2, n1, n2, ngpts, nmax, numtask, &
402 numtask_g, numtask_r, rp
403 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
404 INTEGER,
DIMENSION(2) :: r_dim, r_pos
405 INTEGER,
DIMENSION(:),
POINTER :: n, nloc, nyzray
406 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: bo
409 TYPE(mp_cart_type) :: rs_group
410 TYPE(mp_comm_type) :: gs_group
412 CALL timeset(routinen, handle)
415 n => pw1%pw_grid%npts
416 nloc => pw1%pw_grid%npts_local
417 grays => pw1%pw_grid%grays
418 ngpts = nloc(1)*nloc(2)*nloc(3)
421 IF (pw1%pw_grid%para%ray_distribution)
THEN
422 gs_group = pw1%pw_grid%para%group
423 rs_group = pw1%pw_grid%para%rs_group
424 nyzray => pw1%pw_grid%para%nyzray
425 bo => pw1%pw_grid%para%bo
427 numtask_g = gs_group%num_pe
428 g_pos = gs_group%mepos
429 numtask_r = rs_group%num_pe
430 r_dim = rs_group%num_pe_cart
431 r_pos = rs_group%mepos_cart
432 IF (numtask_g /= numtask_r)
THEN
433 cpabort(
"Real space and G space groups are different.")
440 lmax = max(lg, (ngpts/mmax + 1))
442 ALLOCATE (p2p(0:numtask - 1))
444 CALL gs_group%rank_compare(rs_group, p2p)
447 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
448 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
449 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
450 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
451 nmax = max((2*n2)/numtask, 2)*mx2*mz2
452 nmax = max(nmax, n1*maxval(nyzray))
454 fft_scratch_size%nx = nloc(1)
455 fft_scratch_size%ny = nloc(2)
456 fft_scratch_size%nz = nloc(3)
457 fft_scratch_size%lmax = lmax
458 fft_scratch_size%mmax = mmax
459 fft_scratch_size%mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
460 fft_scratch_size%mx2 = mx2
461 fft_scratch_size%my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
462 fft_scratch_size%mz2 = mz2
463 fft_scratch_size%lg = lg
464 fft_scratch_size%mg = mg
465 fft_scratch_size%nbx = maxval(bo(2, 1, :, 2))
466 fft_scratch_size%nbz = maxval(bo(2, 3, :, 2))
467 fft_scratch_size%mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
468 fft_scratch_size%mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
469 fft_scratch_size%mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
470 fft_scratch_size%nmax = nmax
471 fft_scratch_size%nmray = maxval(nyzray)
472 fft_scratch_size%nyzray = nyzray(g_pos)
473 fft_scratch_size%gs_group = gs_group
474 fft_scratch_size%rs_group = rs_group
475 fft_scratch_size%g_pos = g_pos
476 fft_scratch_size%r_pos = r_pos
477 fft_scratch_size%r_dim = r_dim
478 fft_scratch_size%numtask = numtask
480 IF (r_dim(2) > 1)
THEN
486 cpabort(
"This processor distribution is not supported.")
488 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
491 pbuf => fft_scratch%p7buf
492 qbuf => fft_scratch%p4buf
493 rbuf => fft_scratch%p3buf
494 sbuf => fft_scratch%p2buf
497 CALL pw_gpu_sf(pw1, pbuf, scale)
501 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
502 bo(:, :, :, 2), qbuf, fft_scratch)
508 CALL pw_gpu_f(qbuf, rbuf, -1, n(2), mx2*mz2)
513 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), sbuf, fft_scratch)
516 CALL pw_gpu_fc(sbuf, pw2)
527 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
530 sbuf => fft_scratch%r1buf
531 tbuf => fft_scratch%tbuf
534 CALL pw_gpu_sf(pw1, sbuf, scale)
538 CALL x_to_yz(sbuf, gs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
539 bo(:, :, :, 2), tbuf, fft_scratch)
542 CALL pw_gpu_ffc(tbuf, pw2)
552 cpabort(
"Not implemented (no ray_distr.) in: pw_gpu_c1dr3d_3d_ps.")
557 CALL timestop(handle)
566 SUBROUTINE pw_gpu_cff(pw1, pwbuf)
567 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw1
568 COMPLEX(KIND=dp),
DIMENSION(:, :, :), &
569 INTENT(INOUT),
TARGET :: pwbuf
571 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_cff'
573 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
574 INTEGER :: handle, l1, l2, l3
575 INTEGER,
DIMENSION(:),
POINTER :: npts
576 REAL(kind=
dp),
POINTER :: ptr_pwin
578 SUBROUTINE pw_gpu_cff_c(din, zout, npts)
BIND(C, name="pw_gpu_cff")
580 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
581 TYPE(c_ptr),
VALUE :: zout
582 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
583 END SUBROUTINE pw_gpu_cff_c
586 CALL timeset(routinen, handle)
589 npts => pw1%pw_grid%npts_local
590 l1 = lbound(pw1%array, 1)
591 l2 = lbound(pw1%array, 2)
592 l3 = lbound(pw1%array, 3)
595 ptr_pwin => pw1%array(l1, l2, l3)
596 ptr_pwout => pwbuf(1, 1, 1)
599 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
600 CALL pw_gpu_cff_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
602 cpabort(
"Compiled without pw offloading")
605 CALL timestop(handle)
606 END SUBROUTINE pw_gpu_cff
614 SUBROUTINE pw_gpu_ffc(pwbuf, pw2)
615 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
INTENT(IN), &
617 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw2
619 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_ffc'
621 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
622 INTEGER :: handle, l1, l2, l3
623 INTEGER,
DIMENSION(:),
POINTER :: npts
624 REAL(kind=
dp),
POINTER :: ptr_pwout
626 SUBROUTINE pw_gpu_ffc_c(zin, dout, npts)
BIND(C, name="pw_gpu_ffc")
628 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
629 TYPE(c_ptr),
VALUE :: dout
630 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
631 END SUBROUTINE pw_gpu_ffc_c
634 CALL timeset(routinen, handle)
637 npts => pw2%pw_grid%npts_local
638 l1 = lbound(pw2%array, 1)
639 l2 = lbound(pw2%array, 2)
640 l3 = lbound(pw2%array, 3)
643 ptr_pwin => pwbuf(1, 1, 1)
644 ptr_pwout => pw2%array(l1, l2, l3)
647 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
648 CALL pw_gpu_ffc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
650 cpabort(
"Compiled without pw offloading")
653 CALL timestop(handle)
654 END SUBROUTINE pw_gpu_ffc
662 SUBROUTINE pw_gpu_cf(pw1, pwbuf)
663 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw1
664 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
667 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_cf'
669 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
670 INTEGER :: handle, l1, l2, l3
671 INTEGER,
DIMENSION(:),
POINTER :: npts
672 REAL(kind=
dp),
POINTER :: ptr_pwin
674 SUBROUTINE pw_gpu_cf_c(din, zout, npts)
BIND(C, name="pw_gpu_cf")
676 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
677 TYPE(c_ptr),
VALUE :: zout
678 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
679 END SUBROUTINE pw_gpu_cf_c
682 CALL timeset(routinen, handle)
685 npts => pw1%pw_grid%npts_local
686 l1 = lbound(pw1%array, 1)
687 l2 = lbound(pw1%array, 2)
688 l3 = lbound(pw1%array, 3)
691 ptr_pwin => pw1%array(l1, l2, l3)
692 ptr_pwout => pwbuf(1, 1)
695 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
696 CALL pw_gpu_cf_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
698 cpabort(
"Compiled without pw offloading")
700 CALL timestop(handle)
701 END SUBROUTINE pw_gpu_cf
709 SUBROUTINE pw_gpu_fc(pwbuf, pw2)
710 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
712 TYPE(pw_r3d_rs_type),
INTENT(IN) :: pw2
714 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_fc'
716 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
717 INTEGER :: handle, l1, l2, l3
718 INTEGER,
DIMENSION(:),
POINTER :: npts
719 REAL(kind=
dp),
POINTER :: ptr_pwout
721 SUBROUTINE pw_gpu_fc_c(zin, dout, npts)
BIND(C, name="pw_gpu_fc")
723 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
724 TYPE(c_ptr),
VALUE :: dout
725 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
726 END SUBROUTINE pw_gpu_fc_c
729 CALL timeset(routinen, handle)
731 npts => pw2%pw_grid%npts_local
732 l1 = lbound(pw2%array, 1)
733 l2 = lbound(pw2%array, 2)
734 l3 = lbound(pw2%array, 3)
737 ptr_pwin => pwbuf(1, 1)
738 ptr_pwout => pw2%array(l1, l2, l3)
741 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
742 CALL pw_gpu_fc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
744 cpabort(
"Compiled without pw offloading")
747 CALL timestop(handle)
748 END SUBROUTINE pw_gpu_fc
759 SUBROUTINE pw_gpu_f(pwbuf1, pwbuf2, dir, n, m)
760 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
762 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
764 INTEGER,
INTENT(IN) :: dir, n, m
766 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_f'
768 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
771 SUBROUTINE pw_gpu_f_c(zin, zout, dir, n, m)
BIND(C, name="pw_gpu_f")
773 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
774 TYPE(c_ptr),
VALUE :: zout
775 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: dir, n, m
776 END SUBROUTINE pw_gpu_f_c
779 CALL timeset(routinen, handle)
783 ptr_pwin => pwbuf1(1, 1)
784 ptr_pwout => pwbuf2(1, 1)
787 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
788 CALL pw_gpu_f_c(c_loc(ptr_pwin), c_loc(ptr_pwout), dir, n, m)
791 cpabort(
"Compiled without pw offloading")
795 CALL timestop(handle)
796 END SUBROUTINE pw_gpu_f
804 SUBROUTINE pw_gpu_fg(pwbuf, pw2, scale)
805 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
807 TYPE(pw_c1d_gs_type),
INTENT(IN) :: pw2
808 REAL(kind=
dp),
INTENT(IN) :: scale
810 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_fg'
812 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
813 INTEGER :: handle, mg, mmax, ngpts
814 INTEGER,
DIMENSION(:),
POINTER :: npts
815 INTEGER,
POINTER :: ptr_ghatmap
817 SUBROUTINE pw_gpu_fg_c(zin, zout, ghatmap, npts, mmax, ngpts, scale)
BIND(C, name="pw_gpu_fg")
819 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
820 TYPE(c_ptr),
VALUE :: zout
821 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
822 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
823 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: mmax, ngpts
824 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
826 END SUBROUTINE pw_gpu_fg_c
829 CALL timeset(routinen, handle)
831 ngpts =
SIZE(pw2%pw_grid%gsq)
832 npts => pw2%pw_grid%npts
834 IF ((npts(1) /= 0) .AND. (ngpts /= 0))
THEN
835 mg =
SIZE(pw2%pw_grid%grays, 2)
839 ptr_pwin => pwbuf(1, 1)
840 ptr_pwout => pw2%array(1)
843 ptr_ghatmap => pw2%pw_grid%g_hatmap(1, 1)
846 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
847 CALL pw_gpu_fg_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, mmax, ngpts, scale)
850 cpabort(
"Compiled without pw offloading")
854 CALL timestop(handle)
855 END SUBROUTINE pw_gpu_fg
864 SUBROUTINE pw_gpu_sf(pw1, pwbuf, scale)
865 TYPE(pw_c1d_gs_type),
INTENT(IN) :: pw1
866 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
868 REAL(kind=
dp),
INTENT(IN) :: scale
870 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_sf'
872 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
873 INTEGER :: handle, mg, mmax, ngpts, nmaps
874 INTEGER,
DIMENSION(:),
POINTER :: npts
875 INTEGER,
POINTER :: ptr_ghatmap
877 SUBROUTINE pw_gpu_sf_c(zin, zout, ghatmap, npts, mmax, ngpts, nmaps, scale)
BIND(C, name="pw_gpu_sf")
879 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
880 TYPE(c_ptr),
VALUE :: zout
881 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
882 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
883 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: mmax, ngpts, nmaps
884 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
886 END SUBROUTINE pw_gpu_sf_c
889 CALL timeset(routinen, handle)
891 ngpts =
SIZE(pw1%pw_grid%gsq)
892 npts => pw1%pw_grid%npts
894 IF ((npts(1) /= 0) .AND. (ngpts /= 0))
THEN
895 mg =
SIZE(pw1%pw_grid%grays, 2)
899 ptr_pwin => pw1%array(1)
900 ptr_pwout => pwbuf(1, 1)
903 nmaps =
SIZE(pw1%pw_grid%g_hatmap, 2)
904 ptr_ghatmap => pw1%pw_grid%g_hatmap(1, 1)
907 #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
908 CALL pw_gpu_sf_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, mmax, ngpts, nmaps, scale)
911 cpabort(
"Compiled without pw offloading")
915 CALL timestop(handle)
916 END SUBROUTINE pw_gpu_sf
Defines the basic variable types.
integer, parameter, public dp
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
subroutine, public pw_gpu_c1dr3d_3d(pw1, pw2, scale)
perform an scatter followed by a fft on the gpu
subroutine, public pw_gpu_c1dr3d_3d_ps(pw1, pw2, scale)
perform an parallel scatter followed by a fft on the gpu
subroutine, public pw_gpu_r3dc1d_3d(pw1, pw2, scale)
perform an fft followed by a gather on the gpu
subroutine, public pw_gpu_init()
Allocates resources on the gpu device for gpu fft acceleration.
subroutine, public pw_gpu_finalize()
Releases resources on the gpu device for gpu fft acceleration.
subroutine, public pw_gpu_r3dc1d_3d_ps(pw1, pw2, scale)
perform an parallel fft followed by a gather on the gpu
integer, parameter, public fullspace