27 USE iso_c_binding,
ONLY: c_double,&
40#include "../base/base_uses.f90"
52 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'pw_gpu'
53 LOGICAL,
PARAMETER,
PRIVATE :: debug_this_module = .false.
64 SUBROUTINE pw_gpu_init_c()
BIND(C, name="pw_gpu_init")
65 END SUBROUTINE pw_gpu_init_c
69#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
83 SUBROUTINE pw_gpu_finalize_c()
BIND(C, name="pw_gpu_finalize")
84 END SUBROUTINE pw_gpu_finalize_c
88#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
89 CALL pw_gpu_finalize_c()
105 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_r3dc1d_3d'
107 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
108 INTEGER :: handle, l1, l2, l3, ngpts
109 INTEGER,
DIMENSION(:),
POINTER :: npts
110 INTEGER,
POINTER :: ptr_ghatmap
111 REAL(kind=
dp) :: scale
112 REAL(kind=
dp),
POINTER :: ptr_pwin
114 SUBROUTINE pw_gpu_cfffg_c(din, zout, ghatmap, npts, ngpts, scale)
BIND(C, name="pw_gpu_cfffg")
116 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
117 TYPE(c_ptr),
VALUE :: zout
118 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
119 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
120 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: ngpts
121 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
123 END SUBROUTINE pw_gpu_cfffg_c
126 CALL timeset(routinen, handle)
128 scale = 1.0_dp/real(pw1%pw_grid%ngpts, kind=
dp)
130 ngpts =
SIZE(pw2%pw_grid%gsq)
131 l1 = lbound(pw1%array, 1)
132 l2 = lbound(pw1%array, 2)
133 l3 = lbound(pw1%array, 3)
134 npts => pw1%pw_grid%npts
137 ptr_pwin => pw1%array(l1, l2, l3)
138 ptr_pwout => pw2%array(1)
141 ptr_ghatmap => pw2%pw_grid%g_hatmap(1, 1)
144#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
145 CALL pw_gpu_cfffg_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, ngpts, scale)
147 cpabort(
"Compiled without pw offloading.")
150 CALL timestop(handle)
163 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_c1dr3d_3d'
165 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
166 INTEGER :: handle, l1, l2, l3, ngpts, nmaps
167 INTEGER,
DIMENSION(:),
POINTER :: npts
168 INTEGER,
POINTER :: ptr_ghatmap
169 REAL(kind=
dp) :: scale
170 REAL(kind=
dp),
POINTER :: ptr_pwout
172 SUBROUTINE pw_gpu_sfffc_c(zin, dout, ghatmap, npts, ngpts, nmaps, scale)
BIND(C, name="pw_gpu_sfffc")
174 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
175 TYPE(c_ptr),
VALUE :: dout
176 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
177 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
178 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: ngpts, nmaps
179 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
180 END SUBROUTINE pw_gpu_sfffc_c
183 CALL timeset(routinen, handle)
187 ngpts =
SIZE(pw1%pw_grid%gsq)
188 l1 = lbound(pw2%array, 1)
189 l2 = lbound(pw2%array, 2)
190 l3 = lbound(pw2%array, 3)
191 npts => pw1%pw_grid%npts
194 ptr_pwin => pw1%array(1)
195 ptr_pwout => pw2%array(l1, l2, l3)
198 nmaps =
SIZE(pw1%pw_grid%g_hatmap, 2)
199 ptr_ghatmap => pw1%pw_grid%g_hatmap(1, 1)
202#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
203 CALL pw_gpu_sfffc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, ngpts, nmaps, scale)
205 cpabort(
"Compiled without pw offloading")
208 CALL timestop(handle)
221 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_r3dc1d_3d_ps'
223 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: grays, pbuf, qbuf, rbuf, sbuf
224 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER :: tbuf
225 INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, &
226 mz2, n1, n2, ngpts, nmax, numtask, rp
227 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
228 INTEGER,
DIMENSION(2) :: r_dim, r_pos
229 INTEGER,
DIMENSION(:),
POINTER :: n, nloc, nyzray
230 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: bo
231 REAL(kind=
dp) :: scale
236 CALL timeset(routinen, handle)
238 scale = 1.0_dp/real(pw1%pw_grid%ngpts, kind=
dp)
241 n => pw1%pw_grid%npts
242 nloc => pw1%pw_grid%npts_local
243 grays => pw1%pw_grid%grays
244 ngpts = nloc(1)*nloc(2)*nloc(3)
247 IF (pw1%pw_grid%para%ray_distribution)
THEN
248 rs_group = pw1%pw_grid%para%group
249 nyzray => pw1%pw_grid%para%nyzray
250 bo => pw1%pw_grid%para%bo
252 g_pos = rs_group%mepos
253 numtask = rs_group%num_pe
254 r_dim = rs_group%num_pe_cart
255 r_pos = rs_group%mepos_cart
260 lmax = max(lg, (ngpts/mmax + 1))
262 ALLOCATE (p2p(0:numtask - 1))
264 CALL rs_group%rank_compare(rs_group, p2p)
267 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
268 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
269 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
270 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
271 nmax = max((2*n2)/numtask, 2)*mx2*mz2
272 nmax = max(nmax, n1*maxval(nyzray))
274 fft_scratch_size%nx = nloc(1)
275 fft_scratch_size%ny = nloc(2)
276 fft_scratch_size%nz = nloc(3)
277 fft_scratch_size%lmax = lmax
278 fft_scratch_size%mmax = mmax
279 fft_scratch_size%mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
280 fft_scratch_size%mx2 = mx2
281 fft_scratch_size%my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
282 fft_scratch_size%mz2 = mz2
283 fft_scratch_size%lg = lg
284 fft_scratch_size%mg = mg
285 fft_scratch_size%nbx = maxval(bo(2, 1, :, 2))
286 fft_scratch_size%nbz = maxval(bo(2, 3, :, 2))
287 fft_scratch_size%mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
288 fft_scratch_size%mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
289 fft_scratch_size%mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
290 fft_scratch_size%nmax = nmax
291 fft_scratch_size%nmray = maxval(nyzray)
292 fft_scratch_size%nyzray = nyzray(g_pos)
293 fft_scratch_size%rs_group = rs_group
294 fft_scratch_size%g_pos = g_pos
295 fft_scratch_size%r_pos = r_pos
296 fft_scratch_size%r_dim = r_dim
297 fft_scratch_size%numtask = numtask
299 IF (r_dim(2) > 1)
THEN
305 cpabort(
"This processor distribution is not supported.")
307 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
310 qbuf => fft_scratch%p2buf
311 rbuf => fft_scratch%p3buf
312 pbuf => fft_scratch%p4buf
313 sbuf => fft_scratch%p5buf
316 CALL pw_gpu_cf(pw1, qbuf)
319 CALL cube_transpose_2(qbuf, bo(:, :, :, 1), bo(:, :, :, 2), rbuf, fft_scratch)
325 CALL pw_gpu_f(rbuf, pbuf, +1, n(2), mx2*mz2)
328 CALL xz_to_yz(pbuf, rs_group, r_dim, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
329 bo(:, :, :, 2), sbuf, fft_scratch)
332 CALL pw_gpu_fg(sbuf, pw2, scale)
343 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
346 tbuf => fft_scratch%tbuf
347 sbuf => fft_scratch%r1buf
350 CALL pw_gpu_cff(pw1, tbuf)
353 CALL yz_to_x(tbuf, rs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
354 bo(:, :, :, 2), sbuf, fft_scratch)
357 CALL pw_gpu_fg(sbuf, pw2, scale)
367 cpabort(
"Not implemented (no ray_distr.) in: pw_gpu_r3dc1d_3d_ps.")
370 CALL timestop(handle)
383 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_c1dr3d_3d_ps'
385 COMPLEX(KIND=dp),
DIMENSION(:, :),
POINTER :: grays, pbuf, qbuf, rbuf, sbuf
386 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
POINTER :: tbuf
387 INTEGER :: g_pos, handle, lg, lmax, mg, mmax, mx2, &
388 mz2, n1, n2, ngpts, nmax, numtask, rp
389 INTEGER,
ALLOCATABLE,
DIMENSION(:) :: p2p
390 INTEGER,
DIMENSION(2) :: r_dim, r_pos
391 INTEGER,
DIMENSION(:),
POINTER :: n, nloc, nyzray
392 INTEGER,
DIMENSION(:, :, :, :),
POINTER :: bo
393 REAL(kind=
dp) :: scale
398 CALL timeset(routinen, handle)
403 n => pw1%pw_grid%npts
404 nloc => pw1%pw_grid%npts_local
405 grays => pw1%pw_grid%grays
406 ngpts = nloc(1)*nloc(2)*nloc(3)
409 IF (pw1%pw_grid%para%ray_distribution)
THEN
410 rs_group = pw1%pw_grid%para%group
411 nyzray => pw1%pw_grid%para%nyzray
412 bo => pw1%pw_grid%para%bo
414 g_pos = rs_group%mepos
415 numtask = rs_group%num_pe
416 r_dim = rs_group%num_pe_cart
417 r_pos = rs_group%mepos_cart
422 lmax = max(lg, (ngpts/mmax + 1))
424 ALLOCATE (p2p(0:numtask - 1))
426 CALL rs_group%rank_compare(rs_group, p2p)
429 mx2 = bo(2, 1, rp, 2) - bo(1, 1, rp, 2) + 1
430 mz2 = bo(2, 3, rp, 2) - bo(1, 3, rp, 2) + 1
431 n1 = maxval(bo(2, 1, :, 1) - bo(1, 1, :, 1) + 1)
432 n2 = maxval(bo(2, 2, :, 1) - bo(1, 2, :, 1) + 1)
433 nmax = max((2*n2)/numtask, 2)*mx2*mz2
434 nmax = max(nmax, n1*maxval(nyzray))
436 fft_scratch_size%nx = nloc(1)
437 fft_scratch_size%ny = nloc(2)
438 fft_scratch_size%nz = nloc(3)
439 fft_scratch_size%lmax = lmax
440 fft_scratch_size%mmax = mmax
441 fft_scratch_size%mx1 = bo(2, 1, rp, 1) - bo(1, 1, rp, 1) + 1
442 fft_scratch_size%mx2 = mx2
443 fft_scratch_size%my1 = bo(2, 2, rp, 1) - bo(1, 2, rp, 1) + 1
444 fft_scratch_size%mz2 = mz2
445 fft_scratch_size%lg = lg
446 fft_scratch_size%mg = mg
447 fft_scratch_size%nbx = maxval(bo(2, 1, :, 2))
448 fft_scratch_size%nbz = maxval(bo(2, 3, :, 2))
449 fft_scratch_size%mcz1 = maxval(bo(2, 3, :, 1) - bo(1, 3, :, 1) + 1)
450 fft_scratch_size%mcx2 = maxval(bo(2, 1, :, 2) - bo(1, 1, :, 2) + 1)
451 fft_scratch_size%mcz2 = maxval(bo(2, 3, :, 2) - bo(1, 3, :, 2) + 1)
452 fft_scratch_size%nmax = nmax
453 fft_scratch_size%nmray = maxval(nyzray)
454 fft_scratch_size%nyzray = nyzray(g_pos)
455 fft_scratch_size%rs_group = rs_group
456 fft_scratch_size%g_pos = g_pos
457 fft_scratch_size%r_pos = r_pos
458 fft_scratch_size%r_dim = r_dim
459 fft_scratch_size%numtask = numtask
461 IF (r_dim(2) > 1)
THEN
467 cpabort(
"This processor distribution is not supported.")
469 CALL get_fft_scratch(fft_scratch, tf_type=300, n=n, fft_sizes=fft_scratch_size)
472 pbuf => fft_scratch%p7buf
473 qbuf => fft_scratch%p4buf
474 rbuf => fft_scratch%p3buf
475 sbuf => fft_scratch%p2buf
478 CALL pw_gpu_sf(pw1, pbuf, scale)
482 CALL yz_to_xz(pbuf, rs_group, r_dim, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
483 bo(:, :, :, 2), qbuf, fft_scratch)
489 CALL pw_gpu_f(qbuf, rbuf, -1, n(2), mx2*mz2)
494 CALL cube_transpose_1(rbuf, bo(:, :, :, 2), bo(:, :, :, 1), sbuf, fft_scratch)
497 CALL pw_gpu_fc(sbuf, pw2)
508 CALL get_fft_scratch(fft_scratch, tf_type=200, n=n, fft_sizes=fft_scratch_size)
511 sbuf => fft_scratch%r1buf
512 tbuf => fft_scratch%tbuf
515 CALL pw_gpu_sf(pw1, sbuf, scale)
519 CALL x_to_yz(sbuf, rs_group, g_pos, p2p, pw1%pw_grid%para%yzp, nyzray, &
520 bo(:, :, :, 2), tbuf, fft_scratch)
523 CALL pw_gpu_ffc(tbuf, pw2)
533 cpabort(
"Not implemented (no ray_distr.) in: pw_gpu_c1dr3d_3d_ps.")
536 CALL timestop(handle)
545 SUBROUTINE pw_gpu_cff(pw1, pwbuf)
547 COMPLEX(KIND=dp),
DIMENSION(:, :, :), &
548 INTENT(INOUT),
TARGET :: pwbuf
550 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_cff'
552 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
553 INTEGER :: handle, l1, l2, l3
554 INTEGER,
DIMENSION(:),
POINTER :: npts
555 REAL(kind=
dp),
POINTER :: ptr_pwin
557 SUBROUTINE pw_gpu_cff_c(din, zout, npts)
BIND(C, name="pw_gpu_cff")
559 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
560 TYPE(c_ptr),
VALUE :: zout
561 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
562 END SUBROUTINE pw_gpu_cff_c
565 CALL timeset(routinen, handle)
568 npts => pw1%pw_grid%npts_local
569 l1 = lbound(pw1%array, 1)
570 l2 = lbound(pw1%array, 2)
571 l3 = lbound(pw1%array, 3)
574 ptr_pwin => pw1%array(l1, l2, l3)
575 ptr_pwout => pwbuf(1, 1, 1)
578#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
579 CALL pw_gpu_cff_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
581 cpabort(
"Compiled without pw offloading")
584 CALL timestop(handle)
585 END SUBROUTINE pw_gpu_cff
593 SUBROUTINE pw_gpu_ffc(pwbuf, pw2)
594 COMPLEX(KIND=dp),
DIMENSION(:, :, :),
INTENT(IN), &
598 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_ffc'
600 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
601 INTEGER :: handle, l1, l2, l3
602 INTEGER,
DIMENSION(:),
POINTER :: npts
603 REAL(kind=
dp),
POINTER :: ptr_pwout
605 SUBROUTINE pw_gpu_ffc_c(zin, dout, npts)
BIND(C, name="pw_gpu_ffc")
607 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
608 TYPE(c_ptr),
VALUE :: dout
609 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
610 END SUBROUTINE pw_gpu_ffc_c
613 CALL timeset(routinen, handle)
616 npts => pw2%pw_grid%npts_local
617 l1 = lbound(pw2%array, 1)
618 l2 = lbound(pw2%array, 2)
619 l3 = lbound(pw2%array, 3)
622 ptr_pwin => pwbuf(1, 1, 1)
623 ptr_pwout => pw2%array(l1, l2, l3)
626#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
627 CALL pw_gpu_ffc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
629 cpabort(
"Compiled without pw offloading")
632 CALL timestop(handle)
633 END SUBROUTINE pw_gpu_ffc
641 SUBROUTINE pw_gpu_cf(pw1, pwbuf)
643 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
646 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_cf'
648 COMPLEX(KIND=dp),
POINTER :: ptr_pwout
649 INTEGER :: handle, l1, l2, l3
650 INTEGER,
DIMENSION(:),
POINTER :: npts
651 REAL(kind=
dp),
POINTER :: ptr_pwin
653 SUBROUTINE pw_gpu_cf_c(din, zout, npts)
BIND(C, name="pw_gpu_cf")
655 TYPE(c_ptr),
INTENT(IN),
VALUE :: din
656 TYPE(c_ptr),
VALUE :: zout
657 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
658 END SUBROUTINE pw_gpu_cf_c
661 CALL timeset(routinen, handle)
664 npts => pw1%pw_grid%npts_local
665 l1 = lbound(pw1%array, 1)
666 l2 = lbound(pw1%array, 2)
667 l3 = lbound(pw1%array, 3)
670 ptr_pwin => pw1%array(l1, l2, l3)
671 ptr_pwout => pwbuf(1, 1)
674#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
675 CALL pw_gpu_cf_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
677 cpabort(
"Compiled without pw offloading")
679 CALL timestop(handle)
680 END SUBROUTINE pw_gpu_cf
688 SUBROUTINE pw_gpu_fc(pwbuf, pw2)
689 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
693 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_fc'
695 COMPLEX(KIND=dp),
POINTER :: ptr_pwin
696 INTEGER :: handle, l1, l2, l3
697 INTEGER,
DIMENSION(:),
POINTER :: npts
698 REAL(kind=
dp),
POINTER :: ptr_pwout
700 SUBROUTINE pw_gpu_fc_c(zin, dout, npts)
BIND(C, name="pw_gpu_fc")
702 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
703 TYPE(c_ptr),
VALUE :: dout
704 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
705 END SUBROUTINE pw_gpu_fc_c
708 CALL timeset(routinen, handle)
710 npts => pw2%pw_grid%npts_local
711 l1 = lbound(pw2%array, 1)
712 l2 = lbound(pw2%array, 2)
713 l3 = lbound(pw2%array, 3)
716 ptr_pwin => pwbuf(1, 1)
717 ptr_pwout => pw2%array(l1, l2, l3)
720#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
721 CALL pw_gpu_fc_c(c_loc(ptr_pwin), c_loc(ptr_pwout), npts)
723 cpabort(
"Compiled without pw offloading")
726 CALL timestop(handle)
727 END SUBROUTINE pw_gpu_fc
738 SUBROUTINE pw_gpu_f(pwbuf1, pwbuf2, dir, n, m)
739 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
741 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
743 INTEGER,
INTENT(IN) :: dir, n, m
745 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_f'
747 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
750 SUBROUTINE pw_gpu_f_c(zin, zout, dir, n, m)
BIND(C, name="pw_gpu_f")
752 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
753 TYPE(c_ptr),
VALUE :: zout
754 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: dir, n, m
755 END SUBROUTINE pw_gpu_f_c
758 CALL timeset(routinen, handle)
762 ptr_pwin => pwbuf1(1, 1)
763 ptr_pwout => pwbuf2(1, 1)
766#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
767 CALL pw_gpu_f_c(c_loc(ptr_pwin), c_loc(ptr_pwout), dir, n, m)
770 cpabort(
"Compiled without pw offloading")
774 CALL timestop(handle)
775 END SUBROUTINE pw_gpu_f
783 SUBROUTINE pw_gpu_fg(pwbuf, pw2, scale)
784 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(IN), &
787 REAL(kind=
dp),
INTENT(IN) :: scale
789 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_fg'
791 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
792 INTEGER :: handle, mg, mmax, ngpts
793 INTEGER,
DIMENSION(:),
POINTER :: npts
794 INTEGER,
POINTER :: ptr_ghatmap
796 SUBROUTINE pw_gpu_fg_c(zin, zout, ghatmap, npts, mmax, ngpts, scale)
BIND(C, name="pw_gpu_fg")
798 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
799 TYPE(c_ptr),
VALUE :: zout
800 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
801 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
802 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: mmax, ngpts
803 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
805 END SUBROUTINE pw_gpu_fg_c
808 CALL timeset(routinen, handle)
810 ngpts =
SIZE(pw2%pw_grid%gsq)
811 npts => pw2%pw_grid%npts
813 IF ((npts(1) /= 0) .AND. (ngpts /= 0))
THEN
814 mg =
SIZE(pw2%pw_grid%grays, 2)
818 ptr_pwin => pwbuf(1, 1)
819 ptr_pwout => pw2%array(1)
822 ptr_ghatmap => pw2%pw_grid%g_hatmap(1, 1)
825#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
826 CALL pw_gpu_fg_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, mmax, ngpts, scale)
829 cpabort(
"Compiled without pw offloading")
833 CALL timestop(handle)
834 END SUBROUTINE pw_gpu_fg
843 SUBROUTINE pw_gpu_sf(pw1, pwbuf, scale)
845 COMPLEX(KIND=dp),
DIMENSION(:, :),
INTENT(INOUT), &
847 REAL(kind=
dp),
INTENT(IN) :: scale
849 CHARACTER(len=*),
PARAMETER :: routinen =
'pw_gpu_sf'
851 COMPLEX(KIND=dp),
POINTER :: ptr_pwin, ptr_pwout
852 INTEGER :: handle, mg, mmax, ngpts, nmaps
853 INTEGER,
DIMENSION(:),
POINTER :: npts
854 INTEGER,
POINTER :: ptr_ghatmap
856 SUBROUTINE pw_gpu_sf_c(zin, zout, ghatmap, npts, mmax, ngpts, nmaps, scale)
BIND(C, name="pw_gpu_sf")
858 TYPE(c_ptr),
INTENT(IN),
VALUE :: zin
859 TYPE(c_ptr),
VALUE :: zout
860 TYPE(c_ptr),
INTENT(IN),
VALUE :: ghatmap
861 INTEGER(KIND=C_INT),
DIMENSION(*),
INTENT(IN):: npts
862 INTEGER(KIND=C_INT),
INTENT(IN),
VALUE :: mmax, ngpts, nmaps
863 REAL(kind=c_double),
INTENT(IN),
VALUE :: scale
865 END SUBROUTINE pw_gpu_sf_c
868 CALL timeset(routinen, handle)
870 ngpts =
SIZE(pw1%pw_grid%gsq)
871 npts => pw1%pw_grid%npts
873 IF ((npts(1) /= 0) .AND. (ngpts /= 0))
THEN
874 mg =
SIZE(pw1%pw_grid%grays, 2)
878 ptr_pwin => pw1%array(1)
879 ptr_pwout => pwbuf(1, 1)
882 nmaps =
SIZE(pw1%pw_grid%g_hatmap, 2)
883 ptr_ghatmap => pw1%pw_grid%g_hatmap(1, 1)
886#if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_PW)
887 CALL pw_gpu_sf_c(c_loc(ptr_pwin), c_loc(ptr_pwout), c_loc(ptr_ghatmap), npts, mmax, ngpts, nmaps, scale)
890 cpabort(
"Compiled without pw offloading")
894 CALL timestop(handle)
895 END SUBROUTINE pw_gpu_sf
Defines the basic variable types.
integer, parameter, public dp
Definition of mathematical constants and functions.
complex(kind=dp), parameter, public z_zero
Interface to the message passing library MPI.
subroutine, public pw_gpu_c1dr3d_3d_ps(pw1, pw2)
perform an parallel scatter followed by a fft on the gpu
subroutine, public pw_gpu_c1dr3d_3d(pw1, pw2)
perform an scatter followed by a fft on the gpu
subroutine, public pw_gpu_init()
Allocates resources on the gpu device for gpu fft acceleration.
subroutine, public pw_gpu_r3dc1d_3d(pw1, pw2)
perform an fft followed by a gather on the gpu
subroutine, public pw_gpu_finalize()
Releases resources on the gpu device for gpu fft acceleration.
subroutine, public pw_gpu_r3dc1d_3d_ps(pw1, pw2)
perform an parallel fft followed by a gather on the gpu
integer, parameter, public fullspace