231 static bool initialized =
false;
238 for (
int lx = 0; lx <= 18; lx++) {
239 for (
int ly = 0; ly <= 18 - lx; ly++) {
240 for (
int lz = 0; lz <= 18 - lx - ly; lz++) {
241 const int i = coset(lx, ly, lz);
242 coset_inv_host[
i] = {{lx, ly, lz}};
247 hipMemcpyToSymbol(
coset_inv, &coset_inv_host,
sizeof(coset_inv_host), 0,
248 hipMemcpyHostToDevice);
249 assert(error == hipSuccess);
252 int binomial_coef_host[19][19] = {
253 {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
254 {1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
255 {1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
256 {1, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
257 {1, 4, 6, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
258 {1, 5, 10, 10, 5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
259 {1, 6, 15, 20, 15, 6, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
260 {1, 7, 21, 35, 35, 21, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
261 {1, 8, 28, 56, 70, 56, 28, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
262 {1, 9, 36, 84, 126, 126, 84, 36, 9, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
263 {1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1, 0, 0, 0, 0, 0, 0, 0, 0},
264 {1, 11, 55, 165, 330, 462, 462, 330, 165, 55, 11, 1, 0, 0, 0, 0, 0, 0, 0},
265 {1, 12, 66, 220, 495, 792, 924, 792, 495, 220, 66, 12, 1, 0, 0, 0, 0, 0,
267 {1, 13, 78, 286, 715, 1287, 1716, 1716, 1287, 715, 286, 78, 13, 1, 0, 0,
269 {1, 14, 91, 364, 1001, 2002, 3003, 3432, 3003, 2002, 1001, 364, 91, 14, 1,
271 {1, 15, 105, 455, 1365, 3003, 5005, 6435, 6435, 5005, 3003, 1365, 455,
272 105, 15, 1, 0, 0, 0},
273 {1, 16, 120, 560, 1820, 4368, 8008, 11440, 12870, 11440, 8008, 4368, 1820,
274 560, 120, 16, 1, 0, 0},
275 {1, 17, 136, 680, 2380, 6188, 12376, 19448, 24310, 24310, 19448, 12376,
276 6188, 2380, 680, 136, 17, 1, 0},
277 {1, 18, 153, 816, 3060, 8568, 18564, 31824, 43758, 48620, 43758, 31824,
278 18564, 8568, 3060, 816, 153, 18, 1}};
281 sizeof(binomial_coef_host), 0, hipMemcpyHostToDevice);
282 assert(error == hipSuccess);
393 const T radius,
const T *
const __restrict__ dh_,
394 const T *
const __restrict__ dh_inv_,
const T3 *__restrict__ rp,
395 T3 *__restrict__ roffset, int3 *__restrict__ cubecenter,
396 int3 *__restrict__ lb_cube, int3 *__restrict__ cube_size) {
405 cubecenter->x = std::floor(rp1.x);
406 cubecenter->y = std::floor(rp1.y);
407 cubecenter->z = std::floor(rp1.z);
424 T norm1, norm2, norm3;
425 norm1 = dh_[0] * dh_[0] + dh_[1] * dh_[1] + dh_[2] * dh_[2];
426 norm2 = dh_[3] * dh_[3] + dh_[4] * dh_[4] + dh_[5] * dh_[5];
427 norm3 = dh_[6] * dh_[6] + dh_[7] * dh_[7] + dh_[8] * dh_[8];
429 norm1 = std::sqrt(norm1);
430 norm2 = std::sqrt(norm2);
431 norm3 = std::sqrt(norm3);
433 const T disr_radius =
434 std::min(norm1, std::min(norm2, norm3)) *
436 (
int)ceil(radius / std::min(norm1, std::min(norm2, norm3))));
438 rp2.x = cubecenter->x;
439 rp2.y = cubecenter->y;
440 rp2.z = cubecenter->z;
456 lb_cube->x = std::ceil(-1e-8 - rp3.x);
457 lb_cube->y = std::ceil(-1e-8 - rp3.y);
458 lb_cube->z = std::ceil(-1e-8 - rp3.z);
471 cube_size->x = 2 - 2 * lb_cube->x;
472 cube_size->y = 2 - 2 * lb_cube->y;
473 cube_size->z = 2 - 2 * lb_cube->z;
478 lb_cube->x = INT_MAX;
480 lb_cube->y = INT_MAX;
482 lb_cube->z = INT_MAX;
485 for (
int i = -1;
i <= 1;
i++) {
486 for (
int j = -1; j <= 1; j++) {
487 for (
int k = -1; k <= 1; k++) {
489 r.x = rp->x + ((T)
i) * radius;
490 r.y = rp->y + ((T)j) * radius;
491 r.z = rp->z + ((T)k) * radius;
494 lb_cube->x = std::min(lb_cube->x, (
int)std::floor(roffset->x));
495 ub_cube.x = std::max(ub_cube.x, (
int)std::ceil(roffset->x));
497 lb_cube->y = std::min(lb_cube->y, (
int)std::floor(roffset->y));
498 ub_cube.y = std::max(ub_cube.y, (
int)std::ceil(roffset->y));
500 lb_cube->z = std::min(lb_cube->z, (
int)std::floor(roffset->z));
501 ub_cube.z = std::max(ub_cube.z, (
int)std::ceil(roffset->z));
506 cube_size->x = ub_cube.x - lb_cube->x;
507 cube_size->y = ub_cube.y - lb_cube->y;
508 cube_size->z = ub_cube.z - lb_cube->z;
512 roffset->x = cubecenter->x - rp1.x;
513 roffset->y = cubecenter->y - rp1.y;
514 roffset->z = cubecenter->z - rp1.z;
518 lb_cube->x -= cubecenter->x;
519 lb_cube->y -= cubecenter->y;
520 lb_cube->z -= cubecenter->z;