28 dist->nshards = nshards;
31 dist->length = length;
32 dist->index2coord = malloc(length *
sizeof(
int));
33 assert(dist->index2coord != NULL);
34 memcpy(dist->index2coord, coords, length *
sizeof(
int));
37 int cart_dims[1], cart_periods[1], cart_coords[1];
39 assert(dist->nranks == cart_dims[0]);
40 assert(dist->my_rank == cart_coords[0]);
43 for (
int i = 0;
i < length;
i++) {
44 assert(0 <= coords[
i] && coords[
i] < dist->nranks);
45 if (coords[
i] == dist->my_rank) {
51 dist->local_indicies = malloc(dist->nlocals *
sizeof(
int));
52 assert(dist->local_indicies != NULL);
54 for (
int i = 0;
i < length;
i++) {
55 if (coords[
i] == dist->my_rank) {
56 dist->local_indicies[j++] =
i;
59 assert(j == dist->nlocals);
67 free(dist->index2coord);
68 free(dist->local_indicies);
78 const double target =
imax(nrows, 1) / (double)
imax(ncols, 1);
79 int best_nrow_shards = nshards;
80 double best_error = fabs(log(target / (
double)nshards));
82 for (
int nrow_shards = 1; nrow_shards <= nshards; nrow_shards++) {
83 const int ncol_shards = nshards / nrow_shards;
84 if (nrow_shards * ncol_shards != nshards) {
87 const double ratio = (double)nrow_shards / (
double)ncol_shards;
88 const double error = fabs(log(target / ratio));
89 if (error < best_error) {
91 best_nrow_shards = nrow_shards;
94 return best_nrow_shards;
102 const int nrows,
const int ncols,
103 const int row_dist[nrows],
104 const int col_dist[ncols]) {
105 assert(omp_get_num_threads() == 1);
113 const int row_dim_remains[2] = {1, 0};
116 const int col_dim_remains[2] = {0, 1};
121 const int ncol_shards = nshards / nrow_shards;
126 assert(*dist_out == NULL);
135 assert(dist->ref_count > 0);
144 assert(dist->ref_count > 0);
146 if (dist->ref_count == 0) {
158 const int **row_dist) {
159 assert(dist->ref_count > 0);
160 *nrows = dist->rows.length;
161 *row_dist = dist->rows.index2coord;
169 const int **col_dist) {
170 assert(dist->ref_count > 0);
171 *ncols = dist->cols.length;
172 *col_dist = dist->cols.index2coord;
180 const int row,
const int col) {
181 assert(dist->ref_count > 0);
182 assert(0 <= row && row < dist->rows.length);
183 assert(0 <= col && col < dist->cols.length);
184 int coords[2] = {dist->rows.index2coord[row], dist->cols.index2coord[col]};
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
static void dbm_dist_1d_free(dbm_dist_1d_t *dist)
Private routine for releasing a one dimensional distribution.
static int find_best_nrow_shards(const int nshards, const int nrows, const int ncols)
Private routine for finding the optimal number of shard rows.
static void dbm_dist_1d_new(dbm_dist_1d_t *dist, const int length, const int coords[length], const dbm_mpi_comm_t comm, const int nshards)
Private routine for creating a new one dimensional distribution.
#define DBM_SHARDS_PER_THREAD
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
int dbm_mpi_comm_rank(const dbm_mpi_comm_t comm)
Wrapper around MPI_Comm_rank.
int dbm_mpi_cart_rank(const dbm_mpi_comm_t comm, const int coords[])
Wrapper around MPI_Cart_rank.
int dbm_mpi_comm_size(const dbm_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
dbm_mpi_comm_t dbm_mpi_cart_sub(const dbm_mpi_comm_t comm, const int remain_dims[])
Wrapper around MPI_Cart_sub.
void dbm_mpi_comm_free(dbm_mpi_comm_t *comm)
Wrapper around MPI_Comm_free.
void dbm_mpi_cart_get(const dbm_mpi_comm_t comm, int maxdims, int dims[], int periods[], int coords[])
Wrapper around MPI_Cart_get.
dbm_mpi_comm_t dbm_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
static void const int const int i
Internal struct for storing a one dimensional distribution.
Internal struct for storing a two dimensional distribution.