27 dist->nshards = nshards;
30 dist->length = length;
31 dist->index2coord = malloc(length *
sizeof(
int));
32 assert(dist->index2coord != NULL || length == 0);
34 memcpy(dist->index2coord, coords, length *
sizeof(
int));
38 int cart_dims[1], cart_periods[1], cart_coords[1];
40 assert(dist->nranks == cart_dims[0]);
41 assert(dist->my_rank == cart_coords[0]);
44 for (
int i = 0;
i < length;
i++) {
45 assert(0 <= coords[
i] && coords[
i] < dist->nranks);
46 if (coords[
i] == dist->my_rank) {
52 dist->local_indicies = malloc(dist->nlocals *
sizeof(
int));
53 assert(dist->local_indicies != NULL || dist->nlocals == 0);
55 for (
int i = 0;
i < length;
i++) {
56 if (coords[
i] == dist->my_rank) {
57 dist->local_indicies[j++] =
i;
60 assert(j == dist->nlocals);
68 free(dist->index2coord);
69 free(dist->local_indicies);
79 const double target =
imax(nrows, 1) / (double)
imax(ncols, 1);
80 int best_nrow_shards = nshards;
81 double best_error = fabs(log(target / (
double)nshards));
83 for (
int nrow_shards = 1; nrow_shards <= nshards; nrow_shards++) {
84 const int ncol_shards = nshards / nrow_shards;
85 if (nrow_shards * ncol_shards != nshards) {
88 const double ratio = (double)nrow_shards / (
double)ncol_shards;
89 const double error = fabs(log(target / ratio));
90 if (error < best_error) {
92 best_nrow_shards = nrow_shards;
95 return best_nrow_shards;
103 const int nrows,
const int ncols,
104 const int row_dist[nrows],
105 const int col_dist[ncols]) {
106 assert(omp_get_num_threads() == 1);
114 const int row_dim_remains[2] = {1, 0};
117 const int col_dim_remains[2] = {0, 1};
122 const int ncol_shards = nshards / nrow_shards;
127 assert(*dist_out == NULL);
136 assert(dist->ref_count > 0);
145 assert(dist->ref_count > 0);
147 if (dist->ref_count == 0) {
159 const int **row_dist) {
160 assert(dist->ref_count > 0);
161 *nrows = dist->rows.length;
162 *row_dist = dist->rows.index2coord;
170 const int **col_dist) {
171 assert(dist->ref_count > 0);
172 *ncols = dist->cols.length;
173 *col_dist = dist->cols.index2coord;
181 const int row,
const int col) {
182 assert(dist->ref_count > 0);
183 assert(0 <= row && row < dist->rows.length);
184 assert(0 <= col && col < dist->cols.length);
185 int coords[2] = {dist->rows.index2coord[row], dist->cols.index2coord[col]};
int cp_mpi_comm_size(const cp_mpi_comm_t comm)
Wrapper around MPI_Comm_size.
int cp_mpi_cart_rank(const cp_mpi_comm_t comm, const int coords[])
Wrapper around MPI_Cart_rank.
void cp_mpi_cart_get(const cp_mpi_comm_t comm, int maxdims, int dims[], int periods[], int coords[])
Wrapper around MPI_Cart_get.
void cp_mpi_comm_free(cp_mpi_comm_t *comm)
Wrapper around MPI_Comm_free.
cp_mpi_comm_t cp_mpi_comm_f2c(const int fortran_comm)
Wrapper around MPI_Comm_f2c.
cp_mpi_comm_t cp_mpi_cart_sub(const cp_mpi_comm_t comm, const int remain_dims[])
Wrapper around MPI_Cart_sub.
int cp_mpi_comm_rank(const cp_mpi_comm_t comm)
Wrapper around MPI_Comm_rank.
int dbm_distribution_stored_coords(const dbm_distribution_t *dist, const int row, const int col)
Returns the MPI rank on which the given block should be stored.
static void dbm_dist_1d_free(dbm_dist_1d_t *dist)
Private routine for releasing a one dimensional distribution.
static int find_best_nrow_shards(const int nshards, const int nrows, const int ncols)
Private routine for finding the optimal number of shard rows.
static void dbm_dist_1d_new(dbm_dist_1d_t *dist, const int length, const int coords[length], const cp_mpi_comm_t comm, const int nshards)
Private routine for creating a new one dimensional distribution.
#define DBM_SHARDS_PER_THREAD
static int imax(int x, int y)
Returns the larger of two given integers (missing from the C standard)
static void const int const int i
Internal struct for storing a one dimensional distribution.
Internal struct for storing a two dimensional distribution.