(git:0de0cc2)
dbm_shard.c
Go to the documentation of this file.
1 /*----------------------------------------------------------------------------*/
2 /* CP2K: A general program to perform molecular dynamics simulations */
3 /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */
4 /* */
5 /* SPDX-License-Identifier: BSD-3-Clause */
6 /*----------------------------------------------------------------------------*/
7 #include <assert.h>
8 #include <omp.h>
9 #include <stdbool.h>
10 #include <stddef.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include "dbm_hyperparams.h"
15 #include "dbm_shard.h"
16 
17 /*******************************************************************************
18  * \brief Internal routine for finding a power of two greater than given number.
19  * \author Ole Schuett
20  ******************************************************************************/
21 static int next_power2(const int start) {
22  int candidate = 2;
23  while (candidate < start) {
24  candidate *= 2;
25  }
26  return candidate;
27 }
28 
29 /*******************************************************************************
30  * \brief Internal routine for finding a prime greater equal than given number.
31  * \author Ole Schuett
32  ******************************************************************************/
33 static int next_prime(const int start) {
34  int candidate = start, divisor = 0;
35  while (divisor < candidate) {
36  for (divisor = 2; divisor < candidate; divisor++) {
37  if (candidate % divisor == 0) {
38  candidate++;
39  break;
40  }
41  }
42  }
43  return candidate;
44 }
45 
46 /*******************************************************************************
47  * \brief Internal routine for initializing a shard's hashtable.
48  * \author Ole Schuett
49  ******************************************************************************/
50 static void hashtable_init(dbm_shard_t *shard) {
51  // Choosing size as power of two allows to replace modulo with bitwise AND.
52  shard->hashtable_size =
54  shard->hashtable_mask = shard->hashtable_size - 1;
55  shard->hashtable_prime = next_prime(shard->hashtable_size);
56  shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
57 }
58 
59 /*******************************************************************************
60  * \brief Internal routine for initializing a shard.
61  * \author Ole Schuett
62  ******************************************************************************/
64  shard->nblocks = 0;
66  shard->blocks = malloc(shard->nblocks_allocated * sizeof(dbm_block_t));
67  hashtable_init(shard);
68  shard->data_size = 0;
69  shard->data_promised = 0;
71  shard->data = malloc(shard->data_allocated * sizeof(double));
72 
73  omp_init_lock(&shard->lock);
74 }
75 
76 /*******************************************************************************
77  * \brief Internal routine for copying content of shard_b into shard_a.
78  * \author Ole Schuett
79  ******************************************************************************/
80 void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
81  free(shard_a->blocks);
82  shard_a->nblocks = shard_b->nblocks;
83  shard_a->nblocks_allocated = shard_b->nblocks_allocated;
84  shard_a->blocks = malloc(shard_b->nblocks_allocated * sizeof(dbm_block_t));
85  memcpy(shard_a->blocks, shard_b->blocks,
86  shard_b->nblocks * sizeof(dbm_block_t));
87 
88  free(shard_a->hashtable);
89  shard_a->hashtable_size = shard_b->hashtable_size;
90  shard_a->hashtable_mask = shard_b->hashtable_mask;
91  shard_a->hashtable_prime = shard_b->hashtable_prime;
92  shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
93  memcpy(shard_a->hashtable, shard_b->hashtable,
94  shard_b->hashtable_size * sizeof(int));
95 
96  free(shard_a->data);
97  shard_a->data_allocated = shard_b->data_allocated;
98  shard_a->data = malloc(shard_b->data_allocated * sizeof(double));
99  shard_a->data_size = shard_b->data_size;
100  memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
101 }
102 
103 /*******************************************************************************
104  * \brief Internal routine for releasing a shard.
105  * \author Ole Schuett
106  ******************************************************************************/
108  free(shard->blocks);
109  free(shard->hashtable);
110  free(shard->data);
111  omp_destroy_lock(&shard->lock);
112 }
113 
114 /*******************************************************************************
115  * \brief Private hash function based on Cantor pairing function.
116  * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
117  * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
118  * Using unsigned int to return a positive number even after overflow.
119  * \author Ole Schuett
120  ******************************************************************************/
121 static inline unsigned int hash(const unsigned int row,
122  const unsigned int col) {
123  return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
124 }
125 
126 /*******************************************************************************
127  * \brief Private routine for inserting a block into a shard's hashtable.
128  * \author Ole Schuett
129  ******************************************************************************/
130 static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
131  assert(0 <= block_idx && block_idx < shard->nblocks);
132  const dbm_block_t *blk = &shard->blocks[block_idx];
133  const int row = blk->row, col = blk->col;
134  int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask;
135  while (true) {
136  if (shard->hashtable[slot] == 0) {
137  shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty
138  return;
139  }
140  // linear probing
141  slot = (slot + 1) & shard->hashtable_mask;
142  }
143 }
144 
145 /*******************************************************************************
146  * \brief Internal routine for looking up a block from a shard.
147  * \author Ole Schuett
148  ******************************************************************************/
149 dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
150  const int col) {
151  int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask;
152  while (true) {
153  const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty.
154  if (block_idx < 0) {
155  return NULL; // block not found
156  }
157  assert(0 <= block_idx && block_idx < shard->nblocks);
158  dbm_block_t *blk = &shard->blocks[block_idx];
159  if (blk->row == row && blk->col == col) {
160  return blk;
161  }
162  // linear probing
163  slot = (slot + 1) & shard->hashtable_mask;
164  }
165 }
166 
167 /*******************************************************************************
168  * \brief Internal routine for allocating the metadata of a new block.
169  * \author Ole Schuett
170  ******************************************************************************/
172  const int col, const int block_size) {
173  // Grow blocks array if necessary.
174  if (shard->nblocks_allocated < shard->nblocks + 1) {
175  shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1);
176  shard->blocks =
177  realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
178 
179  // rebuild hashtable
180  free(shard->hashtable);
181  hashtable_init(shard);
182  for (int i = 0; i < shard->nblocks; i++) {
183  hashtable_insert(shard, i);
184  }
185  }
186 
187  const int new_block_idx = shard->nblocks;
188  shard->nblocks++;
189  dbm_block_t *new_block = &shard->blocks[new_block_idx];
190  new_block->row = row;
191  new_block->col = col;
192  new_block->offset = shard->data_promised;
193  shard->data_promised += block_size;
194  // The data_size will be increase after the memory is allocated and zeroed.
195  hashtable_insert(shard, new_block_idx);
196  return new_block;
197 }
198 
199 /*******************************************************************************
200  * \brief Internal routine for allocating and zeroing any promised block's data.
201  * \author Ole Schuett
202  ******************************************************************************/
204 
205  // Reallocate data array if necessary.
206  if (shard->data_promised > shard->data_allocated) {
208  shard->data = realloc(shard->data, shard->data_allocated * sizeof(double));
209  }
210 
211  // Zero new blocks.
212  // The following memset is usually the first touch of the memory, which leads
213  // to frequent page faults. The executing thread determines the NUMA location
214  if (shard->data_promised > shard->data_size) {
215  const int tail = shard->data_promised - shard->data_size;
216  memset(&shard->data[shard->data_size], 0, tail * sizeof(double));
217  shard->data_size = shard->data_promised;
218  }
219 }
220 
221 /*******************************************************************************
222  * \brief Internal routine for getting block or promising a new one.
223  * \author Ole Schuett
224  ******************************************************************************/
226  const int col,
227  const int block_size) {
228  dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
229  if (existing_blk != NULL) {
230  return existing_blk;
231  } else {
232  return dbm_shard_promise_new_block(shard, row, col, block_size);
233  }
234 }
235 
236 /*******************************************************************************
237  * \brief Internal routine for getting block or allocating a new one.
238  * \author Ole Schuett
239  ******************************************************************************/
241  const int col,
242  const int block_size) {
243  dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
244  if (existing_blk != NULL) {
245  return existing_blk;
246  }
247 
248  // Create a new block.
249  dbm_block_t *new_blk =
250  dbm_shard_promise_new_block(shard, row, col, block_size);
252 
253  return new_blk;
254 }
255 
256 // EOF
static const float ALLOCATION_FACTOR
static const int INITIAL_DATA_ALLOCATED
static const int INITIAL_NBLOCKS_ALLOCATED
static const float HASHTABLE_FACTOR
dbm_block_t * dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for allocating the metadata of a new block.
Definition: dbm_shard.c:171
static void hashtable_insert(dbm_shard_t *shard, const int block_idx)
Private routine for inserting a block into a shard's hashtable.
Definition: dbm_shard.c:130
static void hashtable_init(dbm_shard_t *shard)
Internal routine for initializing a shard's hashtable.
Definition: dbm_shard.c:50
void dbm_shard_release(dbm_shard_t *shard)
Internal routine for releasing a shard.
Definition: dbm_shard.c:107
static int next_prime(const int start)
Internal routine for finding a prime greater equal than given number.
Definition: dbm_shard.c:33
dbm_block_t * dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or allocating a new one.
Definition: dbm_shard.c:240
static int next_power2(const int start)
Internal routine for finding a power of two greater than given number.
Definition: dbm_shard.c:21
dbm_block_t * dbm_shard_lookup(const dbm_shard_t *shard, const int row, const int col)
Internal routine for looking up a block from a shard.
Definition: dbm_shard.c:149
void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard)
Internal routine for allocating and zeroing any promised block's data.
Definition: dbm_shard.c:203
static unsigned int hash(const unsigned int row, const unsigned int col)
Private hash function based on Cantor pairing function. https://en.wikipedia.org/wiki/Pairing_functio...
Definition: dbm_shard.c:121
void dbm_shard_init(dbm_shard_t *shard)
Internal routine for initializing a shard.
Definition: dbm_shard.c:63
void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b)
Internal routine for copying content of shard_b into shard_a.
Definition: dbm_shard.c:80
dbm_block_t * dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, const int col, const int block_size)
Internal routine for getting block or promising a new one.
Definition: dbm_shard.c:225
static void const int const int i
Internal struct for storing a block's metadata.
Definition: dbm_shard.h:20
int offset
Definition: dbm_shard.h:23
Internal struct for storing a matrix shard.
Definition: dbm_shard.h:30
double * data
Definition: dbm_shard.h:44
int * hashtable
Definition: dbm_shard.h:38
omp_lock_t lock
Definition: dbm_shard.h:46
int nblocks_allocated
Definition: dbm_shard.h:32
dbm_block_t * blocks
Definition: dbm_shard.h:33
int nblocks
Definition: dbm_shard.h:31
int hashtable_prime
Definition: dbm_shard.h:37
int data_allocated
Definition: dbm_shard.h:42
int data_size
Definition: dbm_shard.h:43
int hashtable_size
Definition: dbm_shard.h:35
int data_promised
Definition: dbm_shard.h:40
int hashtable_mask
Definition: dbm_shard.h:36