9 USE iso_c_binding,
ONLY: c_null_ptr, &
11#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
13 USE iso_c_binding,
ONLY: c_associated, &
15 USE spla,
ONLY: spla_pu_host, &
19 spla_op_conj_transpose, &
26 spla_ctx_set_op_threshold_gpu, &
33#include "./base/base_uses.f90"
39 CHARACTER(len=*),
PARAMETER,
PRIVATE :: moduleN =
'local_gemm_api'
44 INTEGER,
PARAMETER,
PUBLIC :: &
48 INTEGER,
PRIVATE :: do_dgemm = 1
51 TYPE(c_ptr) :: spla_context = c_null_ptr
53 PROCEDURE, pass(ctx), non_overridable :: create => local_gemm_create
54 PROCEDURE, pass(ctx), non_overridable :: destroy => local_gemm_destroy
55 PROCEDURE, pass(ctx), non_overridable :: set_op_threshold_gpu => local_gemm_set_op_threshold_gpu
56 PROCEDURE, pass(ctx), non_overridable :: gemm =>
local_gemm
79 alpha, A, lda, B, ldb, &
81 CHARACTER,
INTENT(in) :: opA
82 CHARACTER,
INTENT(in) :: opB
83 INTEGER,
INTENT(in) :: m
84 INTEGER,
INTENT(in) :: n
85 INTEGER,
INTENT(in) :: k
86 REAL(8),
INTENT(in) :: alpha
87#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
88 REAL(8),
DIMENSION(*),
INTENT(in),
TARGET :: A
90 REAL(8),
DIMENSION(:, :),
INTENT(in),
TARGET :: A
92 INTEGER,
INTENT(in) :: lda
93#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
94 REAL(8),
DIMENSION(*),
INTENT(in),
TARGET :: B
96 REAL(8),
DIMENSION(:, :),
INTENT(in),
TARGET :: B
99 INTEGER,
INTENT(in) :: ldb
100 REAL(8),
INTENT(in) :: beta
101#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
102 REAL(8),
DIMENSION(*),
INTENT(inout),
TARGET ::C
104 REAL(8),
DIMENSION(:, :),
INTENT(inout),
TARGET :: C
106 INTEGER,
INTENT(in) :: ldc
111#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
112 INTEGER :: spla_op_A, spla_op_B, spla_error
114 CHARACTER(LEN=*),
PARAMETER :: routineN =
'local_gemm'
115 CALL timeset(routinen, handle)
118#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
121 IF (opa ==
'N') spla_op_a = spla_op_none
122 IF (opa ==
'T') spla_op_a = spla_op_transpose
124 IF (opb ==
'N') spla_op_b = spla_op_none
125 IF (opb ==
'T') spla_op_b = spla_op_transpose
128 cpassert(is_contiguous(a))
129 cpassert(is_contiguous(b))
130 cpassert(is_contiguous(c))
134 spla_error = spla_dgemm(spla_op_a, spla_op_b, &
138 beta, c_loc(c), ldc, ctx%spla_context)
139 IF (spla_error /= spla_success) &
140 cpabort(
"spla_dgemm failed: "//
cp_to_string(spla_error))
143 CALL dgemm(opa, opb, m, n, k, alpha, &
145 b, ldb, beta, c, ldc)
146#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
151 CALL timestop(handle)
160 SUBROUTINE local_gemm_create(ctx, pu)
162 INTEGER,
INTENT(in) :: pu
164#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
167 IF (.NOT. c_associated(ctx%spla_context))
THEN
171 error_ = spla_ctx_create(ctx%spla_context, pu)
172 IF (error_ /= spla_success) &
173 cpabort(
"spla_ctx_create failed: "//
cp_to_string(error_))
175 ctx%spla_context = c_null_ptr
180 ctx%spla_context = c_null_ptr
182 END SUBROUTINE local_gemm_create
188 SUBROUTINE local_gemm_destroy(ctx)
191#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
197 error_ = spla_ctx_destroy(ctx%spla_context)
198 IF (error_ /= spla_success) &
199 cpabort(
"spla_ctx_destroy failed: "//
cp_to_string(error_))
202 ctx%spla_context = c_null_ptr
203 END SUBROUTINE local_gemm_destroy
210 SUBROUTINE local_gemm_set_op_threshold_gpu(ctx, opThresholdGPU)
212 INTEGER,
INTENT(in) :: opThresholdGPU
214#if defined(__SPLA) && defined(__OFFLOAD_GEMM)
218 error__ = spla_ctx_set_op_threshold_gpu(ctx%spla_context, opthresholdgpu)
221 mark_used(opthresholdgpu)
223 END SUBROUTINE local_gemm_set_op_threshold_gpu
230 INTEGER,
INTENT(IN) :: dgemm_library
232 do_dgemm = dgemm_library
static void dgemm(const char transa, const char transb, const int m, const int n, const int k, const double alpha, const double *a, const int lda, const double *b, const int ldb, const double beta, double *c, const int ldc)
Convenient wrapper to hide Fortran nature of dgemm_, swapping a and b.
various routines to log and control the output. The idea is that decisions about where to log should ...
subroutine, public local_gemm_set_library(dgemm_library)
...
integer, parameter, public local_gemm_pu_gpu
subroutine local_gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, ctx)
...
integer, parameter, public local_gemm_pu_host
Fortran API for the offload package, which is written in C.
subroutine, public offload_activate_chosen_device()
Activates the device selected via offload_set_chosen_device()