H5CPP  v1.14.0
Modern C++ templates for HDF5 serial and parallel I/O
Loading...
Searching...
No Matches
throughput.cpp
// Copyright (c) 2018-2026 Steven Varga, Toronto, ON Canada
//
// Parallel HDF5 — write/read throughput benchmark across MPI ranks.
//
// Each rank writes (default) 80 MB to its own row of a (world_size × nrows)
// chunked dataset, then reads it back. Aggregate MB/s is gathered on rank
// 0 via MPI_Gather and printed. Use this as the "what's my actual disk
// bandwidth across N processes" sanity check before profiling real code.
//
// REQUIRES: HDF5 built with --enable-parallel (HDF5_IS_PARALLEL=ON). On a
// node-local filesystem the achievable throughput plateaus at the disk's
// sequential write limit divided by world_size; on Lustre/GPFS it scales
// with the number of OSTs/stripes.
//
// RUN: mpirun -n <N> ./examples-mpi-throughput
#include <mpi.h>
#include <h5cpp/all>
#include <chrono>
#include <numeric>
#include <vector>
int main(int argc, char** argv) {
MPI_Init(&argc, &argv);
int rank = 0, world_size = 0;
MPI_Comm comm = MPI_COMM_WORLD;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &world_size);
// 80 MB per rank — adjust nchunk if the test should run smaller.
// The chunk_size matches nrows to keep each rank's slab a single chunk.
constexpr std::size_t nchunk = 1024 * 1024;
constexpr std::size_t nrows = 10 * nchunk;
const std::size_t vbytes = nrows * sizeof(double);
// ── WRITE ──────────────────────────────────────────────────────────────
{
std::vector<double> v(nrows, double(rank + 2));
auto fd = h5::create("throughput.h5", H5F_ACC_TRUNC, h5::default_fcpl,
h5::mpiio({comm, MPI_INFO_NULL}));
// alloc_time_early forces space allocation at create-time so the
// write benchmark below measures pure I/O, not metadata + alloc.
h5::ds_t ds = h5::create<double>(fd, "dataset",
h5::max_dims{static_cast<hsize_t>(world_size), nrows},
h5::chunk{1, nchunk} | h5::alloc_time_early);
h5::write(ds, v,
h5::current_dims{static_cast<hsize_t>(world_size), nrows},
h5::offset{static_cast<hsize_t>(rank), 0},
h5::count{1, nrows},
const double seconds = std::chrono::duration<double>(t1 - t0).count();
const double MB_per_s = (double(vbytes) / 1e6) / seconds;
std::vector<double> rates(world_size);
MPI_Gather(&MB_per_s, 1, MPI_DOUBLE, rates.data(), 1, MPI_DOUBLE, 0, comm);
if (rank == 0) {
double total = std::accumulate(rates.begin(), rates.end(), 0.0);
std::cout << "WRITE: " << total << " MB/s aggregate ("
<< world_size << " ranks)\n";
}
}
// ── READ ───────────────────────────────────────────────────────────────
{
auto fd = h5::open("throughput.h5", H5F_ACC_RDWR,
h5::mpiio({comm, MPI_INFO_NULL}));
auto ds = h5::open(fd, "/dataset");
h5::read(ds, v.data(),
h5::offset{static_cast<hsize_t>(rank), 0},
h5::count{1, nrows},
const double seconds = std::chrono::duration<double>(t1 - t0).count();
const double MB_per_s = (double(vbytes) / 1e6) / seconds;
std::vector<double> rates(world_size);
MPI_Gather(&MB_per_s, 1, MPI_DOUBLE, rates.data(), 1, MPI_DOUBLE, 0, comm);
if (rank == 0) {
double total = std::accumulate(rates.begin(), rates.end(), 0.0);
std::cout << "READ: " << total << " MB/s aggregate ("
<< world_size << " ranks)\n";
}
}
MPI_Barrier(comm);
MPI_Finalize();
return 0;
}
T accumulate(T... args)
T begin(T... args)
T data(T... args)
T end(T... args)
h5::at_t create(const hid_t &parent, const std::string &path, args_t &&... args)
Create a new attribute of element type T on a parent HDF5 object.
Definition H5Acreate.hpp:100
T aread(const hid_t &ds, const std::string &name, const h5::acpl_t &acpl=h5::default_acpl)
Read an attribute by name and return its value as type T.
Definition H5Aread.hpp:76
h5::at_t open(const hid_t &parent, const std::string &path, const h5::acpl_t &acpl=h5::default_acpl)
Open an existing attribute by name on a parent HDF5 object.
Definition H5Aopen.hpp:56
std::enable_if_t<!std::is_same_v< T, char ** >, void > read(const h5::ds_t &ds, T *ptr, args_t &&... args)
Read elements from an open HDF5 dataset into caller-allocated memory.
Definition H5Dread.hpp:56
h5::gr_t write(const LOC &parent, const std::string &path, const T &src)
Write a sparse matrix or vector as a CSC group.
Definition H5Dsparse.hpp:185