Skip to content
attributes

attributes.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <cstdint>

#include "struct.h"
#include <h5cpp/core>
    // generated file must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "generated.h"
#include <h5cpp/io>
#include <cstddef>
#include "utils.hpp"

int main(){
    arma::mat M = arma::zeros(5,6);

    arma::mat matrix = arma::zeros(3,4); for(int i=0; i<matrix.n_elem; i++ ) matrix[i] = i;
    std::vector<sn::example::Record> vector = h5::utils::get_test_data<sn::example::Record>(40);
    sn::example::Record& record = vector[3];
    // set to use the latest file format version to able to use large size attributes
    h5::fd_t fd = h5::create("001.h5", H5F_ACC_TRUNC, h5::default_fcpl,
        h5::libver_bounds({H5F_LIBVER_V18, H5F_LIBVER_V18}) );
    h5::ds_t ds = h5::write(fd,"directory/dataset", M);
    {
        /*
        (gr_t | ds_t | dt_t = fd["/root/some/path"]) = some object;
        ds_t ds = ... ;

        gr["data set"] = some object;
        ds["attribute"] = some attribute;
        h5::ds_t obj = fd["/some/path"]["attribute"];
        //h5::gr_t obj = fd["/some/path"];*/
    }

    {
        ds["att_01"] = 42 ;
        ds["att_02"] = {1.,2.,3.,4.};
        ds["att_03"] = {'1','2','3','4'};
        ds["att_04"] = {"alpha", "beta","gamma","..."};

        ds["att_05"] = "const char[N]";
        ds["att_06"] = u8"const char[N]áééé";
        ds["att_07"] = std::string( "std::string");

        ds["att_08"] = record; // pod/compound datatype
        ds["att_09"] = vector; // vector of pod/compound type
        ds["att_10"] = matrix; // linear algebra object
    }

    /* supported types:
     * T := integral | std::string | const char[] | POD struct*
     * accept := T | std::vector<T> | linalg 
     * * pod struct requires h5cpp compiler or manual labour
     */
    { // create + write
        h5::awrite(ds,"att_21", 42 );
        h5::awrite(ds,"att_22", {1.,3.,4.,5.} );
        h5::awrite(ds,"att_23", {'1','3','4','5'} );
        h5::awrite(ds,"att_24", {"alpha", "beta","gamma","..."} );

        h5::awrite(ds,"att_25", "const char[N]");
        h5::awrite(ds,"att_26", u8"const char[N]áééé");
        h5::awrite(ds,"att_27", std::string( "std::string") );

        h5::awrite(ds,"att_28", record ); // pod/compound datatype
        h5::awrite(ds,"att_29", vector ); // vector of pod/compound type
        h5::awrite(ds,"att_30", matrix ); // linear algebra object
    }
    {//directory
        h5::gr_t gr{H5Gopen(fd,"/directory", H5P_DEFAULT)};
        h5::awrite(gr,"att_21", 42 );
        h5::awrite(gr,"att_22", {1.,3.,4.,5.} );
        h5::awrite(gr,"att_23", {'1','3','4','5'} );
        h5::awrite(gr,"att_24", {"alpha", "beta","gamma","..."} );

        h5::awrite(gr,"att_25", "const char[N]");
        h5::awrite(gr,"att_26", u8"const char[N]áééé");
        h5::awrite(gr,"att_27", std::string( "std::string") );

        h5::awrite(gr,"att_28", record ); // pod/compound datatype
        h5::awrite(gr,"att_29", vector ); // vector of pod/compound type
        h5::awrite(gr,"att_30", matrix ); // linear algebra object
    }
    { // open + write -> attribute size must not change
        arma::mat att_01 = arma::ones(3,4);
        h5::awrite(ds,"att_01", att_01 );
        //TODO: attribute with different dimension
        // 1.) remove previous attribute
        // 2.) create new attribute
        // 3.) write attribute
        h5::awrite(ds,"att_02", {1.,2.,3.,4.,5.,6.} );
        // trigger runtime error and then handle it
        h5::mute();
        try{
            h5::awrite(ds,"att_02", {"one","two","..."} );
        } catch ( const h5::error::io::attribute::any& err ){
            std::cerr <<"INTENTIONAL ERROR: " <<err.what() << std::endl;
        }
        h5::unmute();
    }
    { // reading back attribute is always single shot, no partial IO 
        int a = h5::aread<int>(ds,"att_01");
        arma::mat att_10 = h5::aread<arma::mat>(ds,"att_10");
        std::cerr << att_10 <<"\n";
    }
    { // reading back attribute is always single shot, no partial IO 
        // vector of ints
        std::cout << "att_01 : "; int att_01 = h5::aread<int>(ds,"att_01"); std::cout << att_01 << "\n";
        // vector of doubles
        std::cout << "att_02 : "; auto att_02 = h5::aread<std::vector<double>>(ds,"att_02");
        std::cout << att_02 << "\n";
        // vector of ints
        std::cout << "att_03 : "; auto att_03 = h5::aread<std::vector<int>>(ds,"att_03");
        std::cout << att_03 << "\n";
        // vector of strings, NOTE: charaters or initializer lists are not yet supported
        std::cout << "att_04 : "; auto att_04 = h5::aread<std::vector<std::string>>(ds,"att_04");
        std::cout<< att_04 <<"\n";
        // std::string 
        std::cout << "att_07 : "; std::string att_07 = h5::aread<std::string>(ds,"att_07");
        std::cout << att_07  <<"\n";
        // POD type rank 0
        std::cout << "att_08 : "; sn::example::Record att_08 = h5::aread<sn::example::Record>(ds,"att_08");
        std::cout<< att_08.idx <<"\n";
        // POD type rank 1
        std::cout << "att_09 : "; auto att_09 = h5::aread<std::vector<sn::example::Record>>(ds,"att_09");
        for(auto v: att_09) std::cout << v.idx <<","; std::cout <<std::endl;
        // linear algebra object
        std::cout << "att_10 : "; auto att_10 = h5::aread<arma::mat>(ds,"att_10"); std::cout << att_10 << "\n";
    }
}

generated.h

/* Copyright (c) 2018 vargaconsulting, Toronto,ON Canada
 *     Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_aioPh
#define H5CPP_GUARD_aioPh

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  H5TEST_STRUCT_01 
#define  H5TEST_STRUCT_01

/* typedef is allowed */
typedef unsigned long long int MyUInt;

namespace sn {
    namespace typecheck {
        struct Record { /*the types with direct mapping to HDF5*/
            char  _char; unsigned char _uchar; short _short; unsigned short _ushort; int _int; unsigned int _uint;
            long _long; unsigned long _ulong; long long int _llong; unsigned long long _ullong;
            float _float; double _double; long double _ldouble;
            bool _bool;
            // wide characters are not supported in HDF5
            // wchar_t _wchar; char16_t _wchar16; char32_t _wchar32;
        };
    }
    namespace other {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            MyUInt                     aa; // typedef type 
            double            field_02[3]; // const array mapped 
            typecheck::Record field_03[4]; //
        };
    }
    namespace example {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            float             field_02[7]; // const array mapped 
            sn::other::Record field_03[5]; // embedded Record
            sn::other::Record field_04[5]; // must be optimized out, same as previous
            other::Record  field_05[3][8]; // array of arrays 
        };
    }
    namespace not_supported_yet {
        // NON POD: not supported in phase 1
        // C++ Class -> PODstruct -> persistence[ HDF5 | ??? ] -> PODstruct -> C++ Class 
        struct Container {
            double                            idx; // 
            std::string                  field_05; // c++ object makes it non-POD
            std::vector<example::Record> field_02; // ditto
        };
    }
    /* BEGIN IGNORED STRUCT */
    // these structs are not referenced with h5::read|h5::write|h5::create operators
    // hence compiler should ignore them.
    struct IgnoredRecord {
        signed long int   idx;
        float        field_0n;
    };
    /* END IGNORED STRUCTS */
}
#endif

utils.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include "struct.h"
#include <h5cpp/H5misc.hpp>

namespace h5 { namespace utils {
    // template specialization 
    template <> inline  std::vector<sn::example::Record> get_test_data( size_t n ){
        std::vector<sn::example::Record> vec (n);
        for(int i=0; i<n; i++ )
            vec[i].idx = i;
        return vec;
    }

}}

README.md

## Attributes[^1]

An HDF5 attribute is a small metadata object describing the nature and/or intended usage of a primary data object. A primary data object may be a dataset `h5::ds_t`, group `h5::gr_t`, or committed datatype `h5::dt_t` and in this documentation will be referred as `parent` or `location`.

Attributes are assumed to be very small as data objects go, so storing them as standard HDF5 datasets would be quite inefficient. HDF5 attributes are therefore managed through a special attributes interface, H5A, which is designed to easily attach attributes to primary data objects as small datasets containing metadata information and to minimize storage requirements.

Consider, as examples of the simplest case, a set of laboratory readings taken under known temperature and pressure conditions of 18.0 degrees celsius and 0.5 atmospheres, respectively. The temperature and pressure stored as attributes of the dataset could be described as the following name/value pairs:

* temp=18.0
* pressure=0.5

**example:**

```cpp
#include <h5cpp/all>
...
auto ds = h5::open("my-cntainer.h5","my-dataset");
ds["temperature"] = 18.0;
ds["pressure"] = 0.5;
...

While HDF5 attributes are not standard HDF5 datasets, they have much in common:

  • An attribute has a user-defined dataspace and the included metadata has a user-assigned datatype
  • Metadata can be of any valid HDF5 datatype
  • Attributes are addressed by name

But there are some very important differences:

  • There is no provision for special storage such as compression or chunking
  • There is no partial I/O or sub-setting capability for attribute data
  • Attributes cannot be shared
  • Attributes cannot have attributes
  • Being small, an attribute is stored in the object header of the object it describes and is thus attached directly to that object

Objects

pod             := arbitrary plain old dataype - consequtive memory region:
                   automatic compiler assisted reflection or manual handling
integral        := [ unsigned | signed ] [int_8 | int_16 | int_32 | int_64 | float | double ] 
string          := std::string | char*
scalar          := integral | pod_struct | string
vector          := std::vector<scalar>
initializer_list:= std::initializer_list<scalar>{}
linalg          := armadillo | eigen | ... 

T ::= scalar | vector | initializer_list | linalg

IO Templates

Single Objects

parent ::= h5::gr_t | h5::ds_t | h5::dt_t | h5::at_t;

[open]
h5::at_t h5::aopen(parent, const std::string& name [, const & acpl] );

[create]
h5::at_t acreate<T>( parent, const std::string& name 
    [, const h5::current_dims{...} ] [, const h5::acpl_t& acpl]);

[read]
T aread( parent, const std::string& name [, const h5::acpl_t& acpl]) const;

[write]
void awrite( parent, const std::string &name, const T& obj  [, const h5::acpl_t& acpl]);

Multiple Objects

Passing std::tuple<std::string 0, T0 V_0, std::string 1, T1 V1, ..., std::string n, Tn Vn> a sequence of name - value pairs, a meta template breaks up the call into a sequence of awrite calls at compile time.

void awrite( parent, const std::string &name, const std::tuple<T...>& objects  [, const h5::acpl_t& acpl]);
This mechanism comes handy when dealing with many attributes. Typing IO operators is boring repetitive process killing the flow, bundling multiple calls into a single operations with std::tuple<...> type seems to be natural:

...
h5::gr_t gr = h5::gcreate(fd, "my-group" );

arma::mat matrix = arma::zeros(3,4); 
std::vector<sn::example::Record> vector = h5::utils::get_test_data<sn::example::Record>(40);
sn::example::Record& record = vector[0];

h5::awrite(gr, std::make_tuple(
    "temperature", 18.0,
    "pressure",    0.5,
    "matrix",      matrix, // linear algebra object
    "vector",      vector, // std::vector of POD type
    "pod struct",  record  // single POD type
    ));

Examples:

The examples are to demonstrate how to use HDF5 attribute with various object types

  • writing attributes to dataset with ds_t['attribute_name'] = attribute;
  • using: h5::awrite, h5::aread, h5::create
  • compiler assisted reflection with h5cpp code transformation tool
  • bundling objects for single shot write with std::tuple<T...>
**Makefile**
```make linenums="1"
#  _____________________________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _____________________________________________________________________________

apps = attributes
CXXFLAGS =  -std=c++17 
LIBS =  -lhdf5  -lz -ldl -lm
INCLUDES = -I/usr/local/include -I/usr/include

test: $(apps)
    @./attributes
    #h5dump -pH 001.h5

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

# compiler assisted introspection will scan TU translation unit, which is the 
# collection of files you're compiling into an object, generates desired output
# from topologically ordered dependency graph. 
# currently POD structs, arrays and integral types are supported, in arbitrary deep
# embedding
#
# with -Dmyfile.h specifies the generated output, which must be sandwiched between
# <h5cpp/core> and <h5cpp/io>
generated.h: struct.h
    h5cpp  attributes.cpp --  $(CXXFLAGS) -Dgenerated.h

attributes.o : attributes.cpp generated.h 
    $(CXX) -o attributes.o  $(CXXFLAGS) -c attributes.cpp

attributes: attributes.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
    @$(RM) generated.h
.PHONY: test
basics

basics.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <h5cpp/all>
#include <cstddef>
/*

basics.cpp:15:74: error: no matching function for call to 
‘h5::impl::prop_t<
    h5::impl::detail::hid_t<h5::impl::fapl_t, H5Pclose, true, true, true, false>, 
    h5::impl::default_fapl, 
    h5::impl::capi_t<long int, H5F_libver_t, H5F_libver_t>, 
    H5Pset_libver_bounds>
    ::prop_t(H5F_libver_t, H5F_libver_t)’
*/

int main(){
    // data type mapped from C/C++ type system to HDF5 through class templates
    // h5::dt_t<T> hdf5_type_descriptor; 
    {
        // default ctor selects H5T_NATIVE_INT through template partial specialization
        // then obtains an hid_t HDF5 type descriptor through H5copy( H5T_NATIVE_INT )
        // through template inheritance its behaviour matches the rest of the descriptors
        // RAII closes resource when leaving scope 
        h5::dt_t<int> my_int_type;
        // depending on conversion policy type id may be implicitly or explicitly cast 
        // to CAPI style HDF5 ID
        hid_t capi_style_id = static_cast<hid_t>( my_int_type );
        // for types not yet defined you need to register it with thew following macro call:
        // H5CPP_REGISTER_TYPE(C_COMPOUND_TYPE, HDF5_COMPOUND_TYPE )
        // which is a template specialization of for given type, see H5Tall.hpp for details
        //
        H5CPP_CHECK_EQ(  H5Tequal( capi_style_id, H5T_NATIVE_INT),
                std::runtime_error, "HDF5 type system failure!!! " )

        // types have names at compile + runtime
        std::cout << h5::name<int>::value << std::endl; // prints out type information
        std::cout << my_int_type << std::endl; // prints out type information
    }
    {
        h5::dcpl_t dcpl0 = h5::chunk{12} | h5::gzip{2};
        h5::dcpl_t dcpl1 = h5::chunk{12} | h5::gzip{2};
        h5::dcpl_t dcpl = dcpl0 | dcpl1;
        dcpl0 |= dcpl1;
        //H5CPP_CHECK_EQ( sizeof(dcpl) == sizeof(hid_t), 
        //      std::runtime_error, "HDF5 hid_t are not binary equivalent!!!" )
    }
    // error handling
    {
        h5::mute(); // to avoid confusion mute CAPI error messages
        try {
            h5::dcpl_t dcpl_0 = h5::gzip{79798}; // invalid argument
        } catch ( const h5::error::any& err ){
            std::cerr << "THIS ERROR is on PURPOSE: " << err.what() <<std::endl;
        }
        h5::unmute();
    }

    { // property lists can bae daisy chained with | operator
        h5::fcpl_t fcpl = h5::file_space_page_size{4096} | h5::userblock{512};
        h5::fapl_t fapl = h5::fclose_degree_weak | h5::stdio;
        auto some_prop = h5::libver_bounds({H5F_LIBVER_LATEST, H5F_LIBVER_LATEST});
        h5::page_buffer_size{{1024,0,0}};
        h5::dcpl_t dcpl = h5::chunk{2,3} | h5::fill_value<short>{42} | h5::fletcher32 | h5::shuffle | h5::nbit | h5::gzip{9};
        h5::lcpl_t lcpl = h5::create_path | h5::utf8;
        // and come with sensible default setting: h5::default_xxxl where xxx ::= hdf5 property name 
        // h5::dapl_t dapl = h5::default_dapl; // compiler error, default values are not assignable, instead create your own as seen above
        // h5::lcpl_t lcpl = h5::create_path | h5::utf8; 
    }
    { // all resources follow RAII idiom / managed
        h5::fd_t fd = h5::create("001.h5", H5F_ACC_TRUNC);  // f5::fd_t is managed resource, will call H5Fclose upon leaving code block
        hid_t ref = static_cast<hid_t>( fd );               // static cast to hid_t is always allowed, ref must be treated as managed reference, 
                                                            // must not call CAPI H5Fclose( ref )  on it. This explicit or implicit conversion 
                                                            // is to support CAPI interop.   
    }
    { // file create example: 
        // flags := H5F_ACC_TRUNC | H5F_ACC_EXCL either to truncate or open file exclusively
        // you may pass CAPI property list descriptors daisy chained with '|' operator 
        auto fd = h5::create("002.h5", H5F_ACC_TRUNC,
                h5::file_space_page_size{4096} | h5::userblock{512} );  // file creation properties
                //,h5::fclose_degree_weak | h5::fapl_core{2048,1} );     // file access properties
        // or the c++11 wrapped smart pointer equivalent h5::AP_DEFAULT
        h5::create("003.h5", H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT );
        // file is closed when returning h5::create function, convenient if we don't proceed with
        // creating dataset
    }

    { // dataset create: 
        auto fd = h5::create("004.h5", H5F_ACC_TRUNC );
        auto ds_0 = h5::create<short>(fd,"/type/short/tree_0", 
                h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED},
                h5::create_path | h5::utf8, // optional lcpl with this default settings**
                h5::chunk{2,3} | h5::fill_value<short>{42} | h5::fletcher32 | h5::shuffle | h5::nbit | h5::gzip{9}, // optional dcpl
                h5::default_dapl ); // optional dapl
        //** lcpl controls how path (or hdf5 name: links) created, `h5::create_path` makes sure that sub paths are created  
        h5::dcpl_t dcpl = h5::chunk{2,3} | h5::fill_value<short>{42} | h5::fletcher32 | h5::shuffle | h5::nbit | h5::gzip{2};
        // same as above, default values implicit, dcpl explicit
        auto ds_1 = h5::create<short>(fd,"/type/short/tree_1", h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED}, dcpl);
        // same as above, default values explicit
        auto ds_2 = h5::create<short>(fd,"/type/short/tree_2", h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED},
                h5::default_lcpl, dcpl, h5::default_dapl);
        // if only max_dims specified, the current dims is set to max_dims or zero if the dimension is H5S_UNLIMITED
        // making it suitable storage for packet table, compression not specified implies no compression
        // gzip{0} == lowest level of compression!!!
        auto ds_3 = h5::create<short>(fd,"/type/short/max_dims", h5::max_dims{10,H5S_UNLIMITED}, // [10 X 0]  
              h5::chunk{10,1}  );
    }

}

Makefile

#  _____________________________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _____________________________________________________________________________

apps = basics

CXXFLAGS = -std=c++17 -Wall
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./basics
    #h5dump -pH 004.h5

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

basics: basics.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

compound

struct.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <cstdint>
#include "struct.h"
#include <h5cpp/core>
    // generated file must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "generated.h"
#include <h5cpp/io>
#include "utils.hpp"

#define CHUNK_SIZE 5
#define NROWS 4*CHUNK_SIZE
#define NCOLS 1*CHUNK_SIZE

int main(){
    //RAII will close resource, noo need H5Fclose( any_longer ); 
    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);

    {// LINARG:=[armaidllo|eigen3|blaze|blitz|it++|dlib|ublas] supported
        arma::imat M(NROWS,NCOLS);              // define a linalg object
        h5::write(fd, "/linalg/armadillo",M);   // save it somewhere, partial and full read|write and append supported
    }
    {// create a Matrix of STRUCT with chunked and GZIP compressed properties ready for partial read|write
     // upto 7 dimensions/extents are supported
        h5::create<sn::example::Record>(fd, "/orm/chunked_2D", 
                h5::current_dims{NROWS,NCOLS}, h5::chunk{1,CHUNK_SIZE} | h5::gzip{8} );
        //FIXME: defaults to unit chunk, which may not the best setting, yet chunking is required for unlimted
        // should we have some plausable value: 1024 instead?
        h5::create<sn::typecheck::Record>(fd, "/orm/typecheck", h5::max_dims{H5S_UNLIMITED} );
    }

    { // creates + writes entire object tree
        std::vector<sn::example::Record> vec = h5::utils::get_test_data<sn::example::Record>(20);
        h5::write(fd, "orm/partial/vector one_shot", vec );
        // dimensions and other properties specified additional argument 
        h5::write(fd, "orm/partial/vector custom_dims", vec,
            h5::max_dims{H5S_UNLIMITED}, h5::gzip{9} | h5::chunk{20} );
        // you don't need to remember order, compiler will do it for you without runtime penalty:
         h5::write(fd, "orm/partial/vector custom_dims different_order", vec,
            h5::chunk{20} | h5::gzip{9}, 
            h5::max_dims{H5S_UNLIMITED}, 
            // how much to move from current location to next, stide[i] >= block[i] must hold
            // gap = block[i] - stride[i]
            h5::stride{6}, h5::block{4}, 
            h5::current_dims{100}, h5::offset{2});
    }

    { // read entire dataset back
        using T = std::vector<sn::example::Record>;
        auto data = h5::read<T>(fd,"/orm/partial/vector one_shot");
        std::cerr <<"reading back data previously written:\n\t";
        for( auto r:data )
            std::cerr << r.idx <<" ";
        std::cerr << std::endl;
    }
}

generated.h

/* Copyright (c) 2018 vargaconsulting, Toronto,ON Canada
 *     Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_ZbnVm
#define H5CPP_GUARD_ZbnVm

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

namespace h5{
    //template specialization of sn::typecheck::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::typecheck::Record>(){

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_00' must be closed: H5Tclose(ct_00);
        return ct_00;
    };
}
H5CPP_REGISTER_STRUCT(sn::typecheck::Record);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  H5TEST_STRUCT_01 
#define  H5TEST_STRUCT_01

/* typedef is allowed */
typedef unsigned long long int MyUInt;

namespace sn {
    namespace typecheck {
        struct Record { /*the types with direct mapping to HDF5*/
            char  _char; unsigned char _uchar; short _short; unsigned short _ushort; int _int; unsigned int _uint;
            long _long; unsigned long _ulong; long long int _llong; unsigned long long _ullong;
            float _float; double _double; long double _ldouble;
            bool _bool;
            // wide characters are not supported in HDF5
            // wchar_t _wchar; char16_t _wchar16; char32_t _wchar32;
        };
    }
    namespace other {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            MyUInt                     aa; // typedef type 
            double            field_02[3]; // const array mapped 
            typecheck::Record field_03[4]; //
        };
    }
    namespace example {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            float             field_02[7]; // const array mapped 
            sn::other::Record field_03[5]; // embedded Record
            sn::other::Record field_04[5]; // must be optimized out, same as previous
            other::Record  field_05[3][8]; // array of arrays 
        };
    }
    namespace not_supported_yet {
        // NON POD: not supported in phase 1
        // C++ Class -> PODstruct -> persistence[ HDF5 | ??? ] -> PODstruct -> C++ Class 
        struct Container {
            double                            idx; // 
            std::string                  field_05; // c++ object makes it non-POD
            std::vector<example::Record> field_02; // ditto
        };
    }
    /* BEGIN IGNORED STRUCT */
    // these structs are not referenced with h5::read|h5::write|h5::create operators
    // hence compiler should ignore them.
    struct IgnoredRecord {
        signed long int   idx;
        float        field_0n;
    };
    /* END IGNORED STRUCTS */
}
#endif

utils.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include "struct.h"
#include <h5cpp/H5misc.hpp>

namespace h5::utils {
    // template specialization 
    template <> inline  std::vector<sn::example::Record> get_test_data( size_t n ){
        std::vector<sn::example::Record> vec (n);
        for(int i=0; i<n; i++ )
            vec[i].idx = i;
        return vec;
    }
    template <> inline  std::vector<int> get_test_data( size_t n ){
        std::vector<int> vec (n);
        for(int i=0; i<n; i++ )
            vec[i] = i;
        return vec;
    }
}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm
INCLUDES = -I/usr/local/include -I/usr/include

test: struct
    ./struct

# compiler assisted introspection will scan TU translation unit, which is the 
# collection of files you're compiling into an object, generates desired output
# from topologically ordered dependency graph. 
# currently POD structs, arrays and integral types are supported, in arbitrary deep
# embedding
#
# with -Dmyfile.h specifies the generated output, which must be sandwiched between
# <h5cpp/core> and <h5cpp/io>
generated.h: struct.h
    h5cpp  struct.cpp -- $(CXXFLAGS) $(INCLUDES) -Dgenerated.h

struct.o : struct.cpp generated.h 
    $(CXX) $(INCLUDES) -o struct.o  $(CXXFLAGS) -c struct.cpp

struct: struct.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.h5 $(apps)  struct

dist-clean: clean
    @$(RM) generated.h
.PHONY: test

container

container.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <armadillo>
#include <h5cpp/all>
#include <cstddef>

constexpr auto filename = "001.h5";

int main() {

    auto fd = h5::create(filename, H5F_ACC_TRUNC);
    { // CREATE - WRITE
        arma::mat M(2,3); M.ones();                         // create a matrix
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }
    {
        arma::vec V( {1.,2.,3.,4.,5.,6.,7.,8.});    // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( filename, "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( filename, "arma vec inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}
            ,h5::stride{3,5}
            ,h5::block{2,4}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        h5::ds_t ds = h5::create<float>(fd,"dataset", h5::current_dims{3,2}, h5::fill_value<float>(NAN));  // create dataset, default to NaN-s
        auto M  = h5::read<arma::mat>( fd,"dataset" );                 // read data back as matrix
        M.print();
    }
    { // READ: 
        arma::mat M = h5::read<arma::mat>(filename,"create then write"); // read entire dataset back with a single read
        M.print();
    }



}

README.md

## File[^1]
<div id="object" style="float: right">
    ![Object](../pix/FF-IH_FileObject.gif)
</div>



HDF5 files are organized in a hierarchical structure, with two primary structures: groups and datasets.

* HDF5 group: a grouping structure containing instances of zero or more groups or datasets, together with supporting metadata.
* HDF5 dataset: a multidimensional array of data elements, together with supporting metadata.


Working with groups and group members is similar in many ways to working with directories and files in UNIX. As with UNIX directories and files, objects in an HDF5 file are often described by giving their full (or absolute) path names.
<div id="group" style="float: left">
    ![File](../pix/FF-IH_FileGroup.gif)
</div>

`/` signifies the root group. `/foo` signifies a member of the root group called foo. `/foo/zoo` signifies a member of the group foo, which in turn is a member of the root group. Any HDF5 group or dataset may have an associated attribute list. An HDF5 attribute is a user-defined HDF5 structure that provides extra information about an HDF5 object.

For those who are interested, this section takes a look at the low-level elements of the file as the file is written to disk (or other storage media) and the relation of those low-level elements to the higher level elements with which users typically are more familiar. The HDF5 API generally exposes only the high-level elements to the user; the low-level elements are often hidden. The rest of this Introduction does not assume an understanding of this material.

The format of an HDF5 file on disk encompasses several key ideas of the HDF4 and AIO file formats as well as addressing some shortcomings therein. The new format is more self-describing than the HDF4 format and is more uniformly applied to data objects in the file.


[^1]: Lifted from HDF5 CAPI documentation

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
apps = datasets

CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./datasets

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

datasets: datasets.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

csv

csv2hdf5.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include "csv.h"
// data structure include file: `struct.h` must precede 'generated.h' as the latter contains dependencies
// from previous
#include "struct.h"

#include <h5cpp/core>      // has handle + type descriptors
// sandwiched: as `h5cpp/io` depends on `henerated.h` which needs `h5cpp/core`
    #include "generated.h" // uses type descriptors
#include <h5cpp/io>        // uses generated.h + core 

int main(){

    // create HDF5 container
    h5::fd_t fd = h5::create("output.h5",H5F_ACC_TRUNC);
    // create dataset   
    // chunk size is unrealistically small, usually you would set this such that ~= 1MB or an ethernet jumbo frame size
    h5::ds_t ds = h5::create<input_t>(fd,  "simple approach/dataset.csv",
                 h5::max_dims{H5S_UNLIMITED}, h5::chunk{10} | h5::gzip{9} );
    // `h5::ds_t` handle is seamlessly cast to `h5::pt_t` packet table handle, this could have been done in single step
    // but we need `h5::ds_t` handle to add attributes
    h5::pt_t pt = ds;
    // attributes may be added to `h5::ds_t` handle
    ds["data set"] = "monroe-county-crash-data2003-to-2015.csv";
    ds["cvs parser"] = "https://github.com/ben-strasser/fast-cpp-csv-parser"; // thank you!

    constexpr unsigned N_COLS = 5;
    io::CSVReader<N_COLS> in("input.csv"); // number of cols may be less, than total columns in a row, we're to read only 5
    in.read_header(io::ignore_extra_column, "Master Record Number", "Hour", "Reported_Location","Latitude","Longitude");
    input_t row;                           // buffer to read line by line
    char* ptr;      // indirection, as `read_row` doesn't take array directly
    while(in.read_row(row.MasterRecordNumber, row.Hour, ptr, row.Latitude, row.Longitude)){
        strncpy(row.ReportedLocation, ptr, STR_ARRAY_SIZE); // defined in struct.h
        h5::append(pt, row);
        std::cout << std::string(ptr) << "\n";
    }
    // RAII closes all allocated resources
}

csv.h

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
// Copyright: (2012-2015) Ben Strasser <code@ben-strasser.net>
// License: BSD-3
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
//    this list of conditions and the following disclaimer.
//
//2. Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
//
//3. Neither the name of the copyright holder nor the names of its contributors
//   may be used to endorse or promote products derived from this software
//   without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

#ifndef CSV_H
#define CSV_H

#include <vector>
#include <string>
#include <cstring>
#include <algorithm>
#include <utility>
#include <cstdio>
#include <exception>
#ifndef CSV_IO_NO_THREAD
#include <mutex>
#include <thread>
#include <condition_variable>
#endif
#include <memory>
#include <cassert>
#include <cerrno>
#include <istream>

namespace io{
        ////////////////////////////////////////////////////////////////////////////
        //                                 LineReader                             //
        ////////////////////////////////////////////////////////////////////////////

        namespace error{
                struct base : std::exception{
                        virtual void format_error_message()const = 0;                          

                        const char*what()const noexcept override{
                                format_error_message();
                                return error_message_buffer;
                        }

                        mutable char error_message_buffer[512];
                };

                const int max_file_name_length = 255;

                struct with_file_name{
                        with_file_name(){
                                std::memset(file_name, 0, sizeof(file_name));
                        }

                        void set_file_name(const char*file_name){
                                if(file_name != nullptr){
                                        strncpy(this->file_name, file_name, sizeof(this->file_name));
                                        this->file_name[sizeof(this->file_name)-1] = '\0';
                                }else{
                                        this->file_name[0] = '\0';
                                }
                        }

                        char file_name[max_file_name_length+1];
                };

                struct with_file_line{
                        with_file_line(){
                                file_line = -1;
                        }

                        void set_file_line(int file_line){
                                this->file_line = file_line;
                        }

                        int file_line;
                };

                struct with_errno{
                        with_errno(){
                                errno_value = 0;
                        }

                        void set_errno(int errno_value){
                                this->errno_value = errno_value;
                        }

                        int errno_value;
                };

                struct can_not_open_file :
                        base,
                        with_file_name,
                        with_errno{
                        void format_error_message()const override{
                                if(errno_value != 0)
                                        std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                                "Can not open file \"%s\" because \"%s\"."
                                                , file_name, std::strerror(errno_value));
                                else
                                        std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                                "Can not open file \"%s\"."
                                                , file_name);
                        }
                };

                struct line_length_limit_exceeded :
                        base,
                        with_file_name,
                        with_file_line{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        "Line number %d in file \"%s\" exceeds the maximum length of 2^24-1."
                                        , file_line, file_name);
                        }
                };
        }

        class ByteSourceBase{
        public:
                virtual int read(char*buffer, int size)=0;
                virtual ~ByteSourceBase(){}
        };

        namespace detail{

                class OwningStdIOByteSourceBase : public ByteSourceBase{
                public:
                        explicit OwningStdIOByteSourceBase(FILE*file):file(file){
                                // Tell the std library that we want to do the buffering ourself.
                                std::setvbuf(file, 0, _IONBF, 0);
                        }

                        int read(char*buffer, int size){
                                return std::fread(buffer, 1, size, file);
                        }

                        ~OwningStdIOByteSourceBase(){
                                std::fclose(file);
                        }

                private:
                        FILE*file;
                };

                class NonOwningIStreamByteSource : public ByteSourceBase{
                public:
                        explicit NonOwningIStreamByteSource(std::istream&in):in(in){}

                        int read(char*buffer, int size){
                                in.read(buffer, size);
                                return in.gcount();
                        }

                        ~NonOwningIStreamByteSource(){}

                private:
                       std::istream&in;
                };

                class NonOwningStringByteSource : public ByteSourceBase{
                public:
                        NonOwningStringByteSource(const char*str, long long size):str(str), remaining_byte_count(size){}

                        int read(char*buffer, int desired_byte_count){
                                int to_copy_byte_count = desired_byte_count;
                                if(remaining_byte_count < to_copy_byte_count)
                                        to_copy_byte_count = remaining_byte_count;
                                std::memcpy(buffer, str, to_copy_byte_count);
                                remaining_byte_count -= to_copy_byte_count;
                                str += to_copy_byte_count;
                                return to_copy_byte_count;
                        }

                        ~NonOwningStringByteSource(){}

                private:
                        const char*str;
                        long long remaining_byte_count;
                };

                #ifndef CSV_IO_NO_THREAD
                class AsynchronousReader{
                public:
                        void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
                                std::unique_lock<std::mutex>guard(lock);
                                byte_source = std::move(arg_byte_source);
                                desired_byte_count = -1;
                                termination_requested = false;
                                worker = std::thread(
                                        [&]{
                                                std::unique_lock<std::mutex>guard(lock);
                                                try{
                                                        for(;;){
                                                                read_requested_condition.wait(
                                                                        guard, 
                                                                        [&]{
                                                                                return desired_byte_count != -1 || termination_requested;
                                                                        }
                                                                );
                                                                if(termination_requested)
                                                                        return;

                                                                read_byte_count = byte_source->read(buffer, desired_byte_count);
                                                                desired_byte_count = -1;
                                                                if(read_byte_count == 0)
                                                                        break;
                                                                read_finished_condition.notify_one();
                                                        }
                                                }catch(...){
                                                        read_error = std::current_exception();
                                                }
                                                read_finished_condition.notify_one();
                                        }
                                );
                        }

                        bool is_valid()const{
                                return byte_source != nullptr;
                        }

                        void start_read(char*arg_buffer, int arg_desired_byte_count){
                                std::unique_lock<std::mutex>guard(lock);
                                buffer = arg_buffer;
                                desired_byte_count = arg_desired_byte_count;
                                read_byte_count = -1;
                                read_requested_condition.notify_one();
                        }

                        int finish_read(){
                                std::unique_lock<std::mutex>guard(lock);
                                read_finished_condition.wait(
                                        guard, 
                                        [&]{
                                                return read_byte_count != -1 || read_error;
                                        }
                                );
                                if(read_error)
                                        std::rethrow_exception(read_error);
                                else
                                        return read_byte_count;
                        }

                        ~AsynchronousReader(){
                                if(byte_source != nullptr){
                                        {
                                                std::unique_lock<std::mutex>guard(lock);
                                                termination_requested = true;
                                        }
                                        read_requested_condition.notify_one();
                                        worker.join();
                                }
                        }

                private:           
                        std::unique_ptr<ByteSourceBase>byte_source;

                        std::thread worker;

                        bool termination_requested;
                        std::exception_ptr read_error;
                        char*buffer;
                        int desired_byte_count;
                        int read_byte_count;

                        std::mutex lock;
                        std::condition_variable read_finished_condition;
                        std::condition_variable read_requested_condition;  
                };
                #endif

                class SynchronousReader{
                public:
                        void init(std::unique_ptr<ByteSourceBase>arg_byte_source){
                                byte_source = std::move(arg_byte_source);
                        }

                        bool is_valid()const{
                                return byte_source != nullptr;
                        }

                        void start_read(char*arg_buffer, int arg_desired_byte_count){
                                buffer = arg_buffer;
                                desired_byte_count = arg_desired_byte_count;
                        }

                        int finish_read(){
                                return byte_source->read(buffer, desired_byte_count);
                        }
                private:
                        std::unique_ptr<ByteSourceBase>byte_source;
                        char*buffer;
                        int desired_byte_count;
                };
        }

        class LineReader{
        private:
                static const int block_len = 1<<20;
                std::unique_ptr<char[]>buffer; // must be constructed before (and thus destructed after) the reader!
                #ifdef CSV_IO_NO_THREAD
                detail::SynchronousReader reader;
                #else
                detail::AsynchronousReader reader;
                #endif
                int data_begin;
                int data_end;

                char file_name[error::max_file_name_length+1];
                unsigned file_line;

                static std::unique_ptr<ByteSourceBase> open_file(const char*file_name){
                        // We open the file in binary mode as it makes no difference under *nix
                        // and under Windows we handle \r\n newlines ourself.
                        FILE*file = std::fopen(file_name, "rb");
                        if(file == 0){
                                int x = errno; // store errno as soon as possible, doing it after constructor call can fail.
                                error::can_not_open_file err;
                                err.set_errno(x);
                                err.set_file_name(file_name);
                                throw err;
                        }
                        return std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file));
                }

                void init(std::unique_ptr<ByteSourceBase>byte_source){
                        file_line = 0;

                        buffer = std::unique_ptr<char[]>(new char[3*block_len]);
                        data_begin = 0;
                        data_end = byte_source->read(buffer.get(), 2*block_len);

                        // Ignore UTF-8 BOM
                        if(data_end >= 3 && buffer[0] == '\xEF' && buffer[1] == '\xBB' && buffer[2] == '\xBF')
                                data_begin = 3;

                        if(data_end == 2*block_len){
                                reader.init(std::move(byte_source));
                                reader.start_read(buffer.get() + 2*block_len, block_len);
                        }
                }

        public:
                LineReader() = delete;
                LineReader(const LineReader&) = delete;
                LineReader&operator=(const LineReader&) = delete;

                explicit LineReader(const char*file_name){
                        set_file_name(file_name);
                        init(open_file(file_name));
                }

                explicit LineReader(const std::string&file_name){
                        set_file_name(file_name.c_str());
                        init(open_file(file_name.c_str()));
                }

                LineReader(const char*file_name, std::unique_ptr<ByteSourceBase>byte_source){
                        set_file_name(file_name);
                        init(std::move(byte_source));
                }

                LineReader(const std::string&file_name, std::unique_ptr<ByteSourceBase>byte_source){
                        set_file_name(file_name.c_str());
                        init(std::move(byte_source));
                }

                LineReader(const char*file_name, const char*data_begin, const char*data_end){
                        set_file_name(file_name);
                        init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
                }

                LineReader(const std::string&file_name, const char*data_begin, const char*data_end){
                        set_file_name(file_name.c_str());
                        init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningStringByteSource(data_begin, data_end-data_begin)));
                }

                LineReader(const char*file_name, FILE*file){
                        set_file_name(file_name);
                        init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
                }

                LineReader(const std::string&file_name, FILE*file){
                        set_file_name(file_name.c_str());
                        init(std::unique_ptr<ByteSourceBase>(new detail::OwningStdIOByteSourceBase(file)));
                }

                LineReader(const char*file_name, std::istream&in){
                        set_file_name(file_name);
                        init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
                }

                LineReader(const std::string&file_name, std::istream&in){
                        set_file_name(file_name.c_str());
                        init(std::unique_ptr<ByteSourceBase>(new detail::NonOwningIStreamByteSource(in)));
                }

                void set_file_name(const std::string&file_name){
                        set_file_name(file_name.c_str());
                }

                void set_file_name(const char*file_name){
                        if(file_name != nullptr){
                                strncpy(this->file_name, file_name, sizeof(this->file_name));
                                this->file_name[sizeof(this->file_name)-1] = '\0';
                        }else{
                                this->file_name[0] = '\0';
                        }
                }

                const char*get_truncated_file_name()const{
                        return file_name;
                }

                void set_file_line(unsigned file_line){
                        this->file_line = file_line;
                }

                unsigned get_file_line()const{
                        return file_line;
                }

                char*next_line(){
                        if(data_begin == data_end)
                                return nullptr;

                        ++file_line;

                        assert(data_begin < data_end);
                        assert(data_end <= block_len*2);

                        if(data_begin >= block_len){
                                std::memcpy(buffer.get(), buffer.get()+block_len, block_len);
                                data_begin -= block_len;
                                data_end -= block_len;
                                if(reader.is_valid())
                                {
                                        data_end += reader.finish_read();
                                        std::memcpy(buffer.get()+block_len, buffer.get()+2*block_len, block_len);
                                        reader.start_read(buffer.get() + 2*block_len, block_len);
                                }
                        }

                        int line_end = data_begin;
                        while(buffer[line_end] != '\n' && line_end != data_end){
                                ++line_end;
                        }

                        if(line_end - data_begin + 1 > block_len){
                                error::line_length_limit_exceeded err;
                                err.set_file_name(file_name);
                                err.set_file_line(file_line);
                                throw err;
                        }

                        if(buffer[line_end] == '\n' && line_end != data_end){
                                buffer[line_end] = '\0';
                        }else{
                                // some files are missing the newline at the end of the
                                // last line
                                ++data_end;
                                buffer[line_end] = '\0';
                        }

                        // handle windows \r\n-line breaks
                        if(line_end != data_begin && buffer[line_end-1] == '\r')
                                buffer[line_end-1] = '\0';

                        char*ret = buffer.get() + data_begin;
                        data_begin = line_end+1;
                        return ret;
                }
        };


        ////////////////////////////////////////////////////////////////////////////
        //                                 CSV                                    //
        ////////////////////////////////////////////////////////////////////////////

        namespace error{
                const int max_column_name_length = 63;
                struct with_column_name{
                        with_column_name(){
                                std::memset(column_name, 0, max_column_name_length+1);
                        }

                        void set_column_name(const char*column_name){
                                if(column_name != nullptr){
                                        std::strncpy(this->column_name, column_name, max_column_name_length);
                                        this->column_name[max_column_name_length] = '\0';
                                }else{
                                        this->column_name[0] = '\0';
                                }
                        }

                        char column_name[max_column_name_length+1];
                };


                const int max_column_content_length = 63;

                struct with_column_content{
                        with_column_content(){
                                std::memset(column_content, 0, max_column_content_length+1);
                        }

                        void set_column_content(const char*column_content){
                                if(column_content != nullptr){
                                        std::strncpy(this->column_content, column_content, max_column_content_length);
                                        this->column_content[max_column_content_length] = '\0';
                                }else{
                                        this->column_content[0] = '\0';
                                }
                        }

                        char column_content[max_column_content_length+1];
                };


                struct extra_column_in_header :
                        base,
                        with_file_name,
                        with_column_name{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(Extra column "%s" in header of file "%s".)"
                                        , column_name, file_name);
                        }
                };

                struct missing_column_in_header :
                        base,
                        with_file_name,
                        with_column_name{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(Missing column "%s" in header of file "%s".)"
                                        , column_name, file_name);
                        }
                };

                struct duplicated_column_in_header :
                        base,
                        with_file_name,
                        with_column_name{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(Duplicated column "%s" in header of file "%s".)"
                                        , column_name, file_name);
                        }
                };

                struct header_missing :
                        base,
                        with_file_name{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        "Header missing in file \"%s\"."
                                        , file_name);
                        }
                };

                struct too_few_columns :
                        base,
                        with_file_name,
                        with_file_line{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        "Too few columns in line %d in file \"%s\"."
                                        , file_line, file_name);
                        }
                };

                struct too_many_columns :
                        base,
                        with_file_name,
                        with_file_line{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        "Too many columns in line %d in file \"%s\"."
                                        , file_line, file_name);
                        }
                };

                struct escaped_string_not_closed :
                        base,
                        with_file_name,
                        with_file_line{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        "Escaped string was not closed in line %d in file \"%s\"."
                                        , file_line, file_name);
                        }
                };

                struct integer_must_be_positive :
                        base,
                        with_file_name,
                        with_file_line,
                        with_column_name,
                        with_column_content{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(The integer "%s" must be positive or 0 in column "%s" in file "%s" in line "%d".)"
                                        , column_content, column_name, file_name, file_line);
                        }
                };

                struct no_digit :
                        base,
                        with_file_name,
                        with_file_line,
                        with_column_name,
                        with_column_content{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(The integer "%s" contains an invalid digit in column "%s" in file "%s" in line "%d".)"
                                        , column_content, column_name, file_name, file_line);
                        }
                };

                struct integer_overflow :
                        base,
                        with_file_name,
                        with_file_line,
                        with_column_name,
                        with_column_content{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(The integer "%s" overflows in column "%s" in file "%s" in line "%d".)"
                                        , column_content, column_name, file_name, file_line);
                        }
                };

                struct integer_underflow :
                        base,
                        with_file_name,
                        with_file_line,
                        with_column_name,
                        with_column_content{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(The integer "%s" underflows in column "%s" in file "%s" in line "%d".)"
                                        , column_content, column_name, file_name, file_line);
                        }
                };

                struct invalid_single_character :
                        base,
                        with_file_name,
                        with_file_line,
                        with_column_name,
                        with_column_content{
                        void format_error_message()const override{
                                std::snprintf(error_message_buffer, sizeof(error_message_buffer),
                                        R"(The content "%s" of column "%s" in file "%s" in line "%d" is not a single character.)"
                                        , column_content, column_name, file_name, file_line);
                        }
                };
        }

        using ignore_column = unsigned int;
        static const ignore_column ignore_no_column = 0;
        static const ignore_column ignore_extra_column = 1;
        static const ignore_column ignore_missing_column = 2;

        template<char ... trim_char_list>
        struct trim_chars{
        private:
                constexpr static bool is_trim_char(char){
                        return false;
                }

                template<class ...OtherTrimChars>
                constexpr static bool is_trim_char(char c, char trim_char, OtherTrimChars...other_trim_chars){
                        return c == trim_char || is_trim_char(c, other_trim_chars...);
                }

        public:
                static void trim(char*&str_begin, char*&str_end){
                        while(str_begin != str_end && is_trim_char(*str_begin, trim_char_list...))
                                ++str_begin;
                        while(str_begin != str_end && is_trim_char(*(str_end-1), trim_char_list...))
                                --str_end;
                        *str_end = '\0';
                }
        };


        struct no_comment{
                static bool is_comment(const char*){
                        return false;
                }
        };

        template<char ... comment_start_char_list>
        struct single_line_comment{
        private:
                constexpr static bool is_comment_start_char(char){
                        return false;
                }

                template<class ...OtherCommentStartChars>
                constexpr static bool is_comment_start_char(char c, char comment_start_char, OtherCommentStartChars...other_comment_start_chars){
                        return c == comment_start_char || is_comment_start_char(c, other_comment_start_chars...);
                }

        public:

                static bool is_comment(const char*line){
                        return is_comment_start_char(*line, comment_start_char_list...);
                }
        };

        struct empty_line_comment{
                static bool is_comment(const char*line){
                        if(*line == '\0')
                                return true;
                        while(*line == ' ' || *line == '\t'){
                                ++line;
                                if(*line == 0)
                                        return true;
                        }
                        return false;
                }
        };

        template<char ... comment_start_char_list>
        struct single_and_empty_line_comment{
                static bool is_comment(const char*line){
                        return single_line_comment<comment_start_char_list...>::is_comment(line) || empty_line_comment::is_comment(line);
                }
        };

        template<char sep>
        struct no_quote_escape{
                static const char*find_next_column_end(const char*col_begin){
                        while(*col_begin != sep && *col_begin != '\0')
                                ++col_begin;
                        return col_begin;
                }

                static void unescape(char*&, char*&){

                }
        };

        template<char sep, char quote>
        struct double_quote_escape{
                static const char*find_next_column_end(const char*col_begin){
                        while(*col_begin != sep && *col_begin != '\0')
                                if(*col_begin != quote)
                                        ++col_begin;
                                else{
                                        do{
                                                ++col_begin;
                                                while(*col_begin != quote){
                                                        if(*col_begin == '\0')
                                                                throw error::escaped_string_not_closed();
                                                        ++col_begin;
                                                }
                                                ++col_begin;
                                        }while(*col_begin == quote);
                                }      
                        return col_begin;      
                }

                static void unescape(char*&col_begin, char*&col_end){
                        if(col_end - col_begin >= 2){
                                if(*col_begin == quote && *(col_end-1) == quote){
                                        ++col_begin;
                                        --col_end;
                                        char*out = col_begin;
                                        for(char*in = col_begin; in!=col_end; ++in){
                                                if(*in == quote && (in+1) != col_end && *(in+1) == quote){
                                                         ++in;
                                                }
                                                *out = *in;
                                                ++out;
                                        }
                                        col_end = out;
                                        *col_end = '\0';
                                }
                        }

                }
        };

        struct throw_on_overflow{
                template<class T>
                static void on_overflow(T&){
                        throw error::integer_overflow();
                }

                template<class T>
                static void on_underflow(T&){
                        throw error::integer_underflow();
                }
        };

        struct ignore_overflow{
                template<class T>
                static void on_overflow(T&){}

                template<class T>
                static void on_underflow(T&){}
        };

        struct set_to_max_on_overflow{
                template<class T>
                static void on_overflow(T&x){
                        x = std::numeric_limits<T>::max();
                }

                template<class T>
                static void on_underflow(T&x){
                        x = std::numeric_limits<T>::min();
                }
        };


        namespace detail{
                template<class quote_policy>
                void chop_next_column(
                        char*&line, char*&col_begin, char*&col_end
                ){
                        assert(line != nullptr);

                        col_begin = line;
                        // the col_begin + (... - col_begin) removes the constness
                        col_end = col_begin + (quote_policy::find_next_column_end(col_begin) - col_begin);

                        if(*col_end == '\0'){
                                line = nullptr;
                        }else{
                                *col_end = '\0';
                                line = col_end + 1;    
                        }
                }

                template<class trim_policy, class quote_policy>
                void parse_line(
                        char*line,
                        char**sorted_col,
                        const std::vector<int>&col_order
                ){
                        for (int i : col_order) {
                                if(line == nullptr)
                                        throw ::io::error::too_few_columns();
                                char*col_begin, *col_end;
                                chop_next_column<quote_policy>(line, col_begin, col_end);

                                if (i != -1) {
                                        trim_policy::trim(col_begin, col_end);
                                        quote_policy::unescape(col_begin, col_end);

                                        sorted_col[i] = col_begin;
                                }
                        }
                        if(line != nullptr)
                                throw ::io::error::too_many_columns();
                }

                template<unsigned column_count, class trim_policy, class quote_policy>
                void parse_header_line(
                        char*line,
                        std::vector<int>&col_order,
                        const std::string*col_name,
                        ignore_column ignore_policy
                ){
                        col_order.clear();

                        bool found[column_count];
                        std::fill(found, found + column_count, false);
                        while(line){
                                char*col_begin,*col_end;
                                chop_next_column<quote_policy>(line, col_begin, col_end);

                                trim_policy::trim(col_begin, col_end);
                                quote_policy::unescape(col_begin, col_end);

                                for(unsigned i=0; i<column_count; ++i)
                                        if(col_begin == col_name[i]){
                                                if(found[i]){
                                                        error::duplicated_column_in_header err;
                                                        err.set_column_name(col_begin);
                                                        throw err;
                                                }
                                                found[i] = true;
                                                col_order.push_back(i);
                                                col_begin = 0;
                                                break;
                                        }
                                if(col_begin){
                                        if(ignore_policy & ::io::ignore_extra_column)
                                                col_order.push_back(-1);
                                        else{
                                                error::extra_column_in_header err;
                                                err.set_column_name(col_begin);
                                                throw err;
                                        }
                                }
                        }
                        if(!(ignore_policy & ::io::ignore_missing_column)){
                                for(unsigned i=0; i<column_count; ++i){
                                        if(!found[i]){
                                                error::missing_column_in_header err;
                                                err.set_column_name(col_name[i].c_str());
                                                throw err;
                                        }
                                }
                        }
                }

                template<class overflow_policy>
                void parse(char*col, char &x){
                        if(!*col)
                                throw error::invalid_single_character();
                        x = *col;
                        ++col;
                        if(*col)
                                throw error::invalid_single_character();
                }

                template<class overflow_policy>
                void parse(char*col, std::string&x){
                        x = col;
                }

                template<class overflow_policy>
                void parse(char*col, const char*&x){
                        x = col;
                }

                template<class overflow_policy>
                void parse(char*col, char*&x){
                        x = col;
                }

                template<class overflow_policy, class T>
                void parse_unsigned_integer(const char*col, T&x){
                        x = 0;
                        while(*col != '\0'){
                                if('0' <= *col && *col <= '9'){
                                        T y = *col - '0';
                                        if(x > (std::numeric_limits<T>::max()-y)/10){
                                                overflow_policy::on_overflow(x);
                                                return;
                                        }
                                        x = 10*x+y;
                                }else
                                        throw error::no_digit();
                                ++col;
                        }
                }

                template<class overflow_policy>void parse(char*col, unsigned char &x)
                        {parse_unsigned_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, unsigned short &x)
                        {parse_unsigned_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, unsigned int &x)
                        {parse_unsigned_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, unsigned long &x)
                        {parse_unsigned_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, unsigned long long &x)
                        {parse_unsigned_integer<overflow_policy>(col, x);}

                template<class overflow_policy, class T>
                void parse_signed_integer(const char*col, T&x){
                        if(*col == '-'){
                                ++col;

                                x = 0;
                                while(*col != '\0'){
                                        if('0' <= *col && *col <= '9'){
                                                T y = *col - '0';
                                                if(x < (std::numeric_limits<T>::min()+y)/10){
                                                        overflow_policy::on_underflow(x);
                                                        return;
                                                }
                                                x = 10*x-y;
                                        }else
                                                throw error::no_digit();
                                        ++col;
                                }
                                return;
                        }else if(*col == '+')
                                ++col;
                        parse_unsigned_integer<overflow_policy>(col, x);
                }      

                template<class overflow_policy>void parse(char*col, signed char &x)
                        {parse_signed_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, signed short &x)
                        {parse_signed_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, signed int &x)
                        {parse_signed_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, signed long &x)
                        {parse_signed_integer<overflow_policy>(col, x);}
                template<class overflow_policy>void parse(char*col, signed long long &x)
                        {parse_signed_integer<overflow_policy>(col, x);}

                template<class T>
                void parse_float(const char*col, T&x){
                        bool is_neg = false;
                        if(*col == '-'){
                                is_neg = true;
                                ++col;
                        }else if(*col == '+')
                                ++col;

                        x = 0;
                        while('0' <= *col && *col <= '9'){
                                int y = *col - '0';
                                x *= 10;
                                x += y;
                                ++col;
                        }

                        if(*col == '.'|| *col == ','){
                                ++col;
                                T pos = 1;
                                while('0' <= *col && *col <= '9'){
                                        pos /= 10;
                                        int y = *col - '0';
                                        ++col;
                                        x += y*pos;
                                }
                        }

                        if(*col == 'e' || *col == 'E'){
                                ++col;
                                int e;

                                parse_signed_integer<set_to_max_on_overflow>(col, e);

                                if(e != 0){
                                        T base;
                                        if(e < 0){
                                                base = T(0.1);
                                                e = -e;
                                        }else{
                                                base = T(10);
                                        }

                                        while(e != 1){
                                                if((e & 1) == 0){
                                                        base = base*base;
                                                        e >>= 1;
                                                }else{
                                                        x *= base;
                                                        --e;
                                                }
                                        }
                                        x *= base;
                                }
                        }else{
                                if(*col != '\0')
                                        throw error::no_digit();
                        }

                        if(is_neg)
                                x = -x;
                }

                template<class overflow_policy> void parse(char*col, float&x) { parse_float(col, x); }
                template<class overflow_policy> void parse(char*col, double&x) { parse_float(col, x); }
                template<class overflow_policy> void parse(char*col, long double&x) { parse_float(col, x); }

                template<class overflow_policy, class T>
                void parse(char*col, T&x){
                        // Mute unused variable compiler warning
                        (void)col;
                        (void)x;
                        // GCC evalutes "false" when reading the template and
                        // "sizeof(T)!=sizeof(T)" only when instantiating it. This is why
                        // this strange construct is used.
                        static_assert(sizeof(T)!=sizeof(T),
                                "Can not parse this type. Only buildin integrals, floats, char, char*, const char* and std::string are supported");
                }

        }

        template<unsigned column_count,
                class trim_policy = trim_chars<' ', '\t'>,
                class quote_policy = no_quote_escape<','>,
                class overflow_policy = throw_on_overflow,
                class comment_policy = no_comment
        >
        class CSVReader{
        private:
                LineReader in;

                char*row[column_count];
                std::string column_names[column_count];

                std::vector<int>col_order;

                template<class ...ColNames>
                void set_column_names(std::string s, ColNames...cols){
                        column_names[column_count-sizeof...(ColNames)-1] = std::move(s);
                        set_column_names(std::forward<ColNames>(cols)...);
                }

                void set_column_names(){}


        public:
                CSVReader() = delete;
                CSVReader(const CSVReader&) = delete;
                CSVReader&operator=(const CSVReader&);

                template<class ...Args>
                explicit CSVReader(Args&&...args):in(std::forward<Args>(args)...){
                        std::fill(row, row+column_count, nullptr);
                        col_order.resize(column_count);
                        for(unsigned i=0; i<column_count; ++i)
                                col_order[i] = i;
                        for(unsigned i=1; i<=column_count; ++i)
                                column_names[i-1] = "col"+std::to_string(i);
                }

        char*next_line(){
            return in.next_line();
        }

                template<class ...ColNames>
                void read_header(ignore_column ignore_policy, ColNames...cols){
                        static_assert(sizeof...(ColNames)>=column_count, "not enough column names specified");
                        static_assert(sizeof...(ColNames)<=column_count, "too many column names specified");
                        try{
                                set_column_names(std::forward<ColNames>(cols)...);

                                char*line;
                                do{
                                        line = in.next_line();
                                        if(!line)
                                                throw error::header_missing();
                                }while(comment_policy::is_comment(line));

                                detail::parse_header_line
                                        <column_count, trim_policy, quote_policy>
                                        (line, col_order, column_names, ignore_policy);
                        }catch(error::with_file_name&err){
                                err.set_file_name(in.get_truncated_file_name());
                                throw;
                        }
                }

                template<class ...ColNames>
                void set_header(ColNames...cols){
                        static_assert(sizeof...(ColNames)>=column_count,
                                "not enough column names specified");
                        static_assert(sizeof...(ColNames)<=column_count,
                                "too many column names specified");
                        set_column_names(std::forward<ColNames>(cols)...);
                        std::fill(row, row+column_count, nullptr);
                        col_order.resize(column_count);
                        for(unsigned i=0; i<column_count; ++i)
                                col_order[i] = i;
                }

                bool has_column(const std::string&name) const {
                        return col_order.end() != std::find(
                                col_order.begin(), col_order.end(),
                                        std::find(std::begin(column_names), std::end(column_names), name)
                                - std::begin(column_names));
                }

                void set_file_name(const std::string&file_name){
                        in.set_file_name(file_name);
                }

                void set_file_name(const char*file_name){
                        in.set_file_name(file_name);
                }

                const char*get_truncated_file_name()const{
                        return in.get_truncated_file_name();
                }

                void set_file_line(unsigned file_line){
                        in.set_file_line(file_line);
                }

                unsigned get_file_line()const{
                        return in.get_file_line();
                }

        private:
                void parse_helper(std::size_t){}

                template<class T, class ...ColType>
                void parse_helper(std::size_t r, T&t, ColType&...cols){                        
                        if(row[r]){
                                try{
                                        try{
                                                ::io::detail::parse<overflow_policy>(row[r], t);
                                        }catch(error::with_column_content&err){
                                                err.set_column_content(row[r]);
                                                throw;
                                        }
                                }catch(error::with_column_name&err){
                                        err.set_column_name(column_names[r].c_str());
                                        throw;
                                }
                        }
                        parse_helper(r+1, cols...);
                }


        public:
                template<class ...ColType>
                bool read_row(ColType& ...cols){
                        static_assert(sizeof...(ColType)>=column_count,
                                "not enough columns specified");
                        static_assert(sizeof...(ColType)<=column_count,
                                "too many columns specified");
                        try{
                                try{

                                        char*line;
                                        do{
                                                line = in.next_line();
                                                if(!line)
                                                        return false;
                                        }while(comment_policy::is_comment(line));

                                        detail::parse_line<trim_policy, quote_policy>
                                                (line, row, col_order);

                                        parse_helper(0, cols...);
                                }catch(error::with_file_name&err){
                                        err.set_file_name(in.get_truncated_file_name());
                                        throw;
                                }
                        }catch(error::with_file_line&err){
                                err.set_file_line(in.get_file_line());
                                throw;
                        }

                        return true;
                }
        };
}
#endif

generated.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_GCBLl
#define H5CPP_GUARD_GCBLl

namespace h5{
    //template specialization of input_t to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<input_t>(){
        hsize_t at_00_[] ={20};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_CHAR,1,at_00_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (input_t));
        H5Tinsert(ct_00, "MasterRecordNumber",  HOFFSET(input_t,MasterRecordNumber),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "Hour",    HOFFSET(input_t,Hour),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "Latitude",    HOFFSET(input_t,Latitude),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "Longitude",   HOFFSET(input_t,Longitude),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "ReportedLocation",    HOFFSET(input_t,ReportedLocation),at_00);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_00' must be closed: H5Tclose(ct_00);
        return ct_00;
    };
}
H5CPP_REGISTER_STRUCT(input_t);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  CSV2H5_H 
#define  CSV2H5_H

constexpr int STR_ARRAY_SIZE = 20;
/*define C++ representation as POD struct*/
struct input_t {
    long MasterRecordNumber;
    unsigned int Hour;
    double Latitude;
    double Longitude;
    char ReportedLocation[STR_ARRAY_SIZE];
};
#endif

input.csv

Master Record Number,Year,Month,Day,Weekend?,Hour,Collision Type,Injury Type,Primary Factor,Reported_Location,Latitude,Longitude
902363382,2015,1,5,Weekday,0,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,1ST & FESS,39.15920668,-86.52587356
902364268,2015,1,6,Weekday,1500,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,2ND & COLLEGE,39.16144,-86.534848
902364412,2015,1,6,Weekend,2300,2-Car,Non-incapacitating,DISREGARD SIGNAL/REG SIGN,BASSWOOD & BLOOMFIELD,39.14978027,-86.56889006
902364551,2015,1,7,Weekend,900,2-Car,Non-incapacitating,FAILURE TO YIELD RIGHT OF WAY,GATES & JACOBS,39.165655,-86.57595635
902364615,2015,1,7,Weekend,1100,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,W 3RD,39.164848,-86.57962482
902364664,2015,1,6,Weekday,1800,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,BURKS & WALNUT,39.12666969,-86.53136998
902364682,2015,1,6,Weekday,1200,2-Car,No injury/unknown,DRIVER DISTRACTED - EXPLAIN IN NARRATIVE,SOUTH CURRY PIKE LOT 71,39.150825,-86.584899
902364683,2015,1,6,Weekday,1400,1-Car,Incapacitating,ENGINE FAILURE OR DEFECTIVE,NORTH LOUDEN RD,39.19927216,-86.63702393
902364714,2015,1,7,Weekend,1400,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,LIBERTY & W 3RD,39.16461021,-86.57913007
902364756,2015,1,7,Weekend,1600,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,PATTERSON & W 3RD,39.16344009,-86.55128002
902364761,2015,1,7,Weekend,1500,2-Car,No injury/unknown,UNSAFE BACKING,S LIBERTY,39.145264,-86.577616
902364764,2015,1,7,Weekend,1600,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,E WINSLOW & S HIGHLAND,39.136048,-86.518496
902364822,2015,1,7,Weekend,1700,1-Car,No injury/unknown,UNSAFE BACKING,E KIRKWOOD,39.16664,-86.53070106
902365038,2015,1,7,Weekend,1800,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,HEATHERWOOD & LEONARD SPRINGS,39.12678972,-86.58258006
902365063,2015,1,1,Weekend,1300,3+ Cars,Incapacitating,DISREGARD SIGNAL/REG SIGN,ROGERS & W PATTERSON,39.15379018,-86.53875008
902365100,2015,1,1,Weekend,1500,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SOUTH WALNUT & SOUTH WALNUT STREET,39.107344,-86.5264
902365193,2015,1,1,Weekend,1800,1-Car,No injury/unknown,ROADWAY SURFACE CONDITION,17TH & MONROE,39.179248,-86.547104
902365241,2015,1,1,Weekend,1800,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,ARLINGTON & MONROE,39.179248,-86.547104
902365255,2015,1,1,Weekend,1600,2-Car,No injury/unknown,,NORTH GRANT,39.17830276,-86.52934265
902365284,2015,1,1,Weekend,1600,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,SOUTH LIBERTY,39.145264,-86.577616
902365371,2015,1,1,Weekend,1800,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,DILLMAN & SR37S,39.09571236,-86.54595204
902365494,2015,1,1,Weekend,1200,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,RHORER & S WALNUT,39.12144,-86.526496
902365759,2015,1,2,Weekday,700,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,446 & SR46E,39.16692661,-86.4572165
902365763,2015,1,1,Weekend,700,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,CHAPEL HILL & SR446S,39.02843027,-86.40780689
902365865,2015,1,2,Weekday,700,1-Car,No injury/unknown,ANIMAL/OBJECT IN ROADWAY,3320 E RHORER & SNODDY,39.121584,-86.489744
902366338,2015,1,2,Weekday,1300,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,3RD & KINGSTON,39.16424,-86.49312041
902366648,2015,1,2,Weekday,1700,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,EAST 10TH & NORTH UNION,39.17093258,-86.509391
902366662,2015,1,1,Weekday,2100,2-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,SOUTH CLARIZZ,39.15263367,-86.49272919
902366676,2015,1,2,Weekday,1700,2-Car,Non-incapacitating,FOLLOWING TOO CLOSELY,COUNTRY CLUB & ROGERS,39.13608,-86.538096
902366706,2015,1,1,Weekday,2000,1-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,W SR 45,39.125312,-86.610496
902366710,2015,1,2,Weekday,1900,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,E GETTYS CREEK & E KERR CREK,39.17187028,-86.4167107
902366720,2015,1,2,Weekday,1700,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,3RD & LIBERTY,39.16461021,-86.57913007
902366726,2015,1,2,Weekday,2100,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,SOUTH ROGERS & WEST GORDON PIKE,39.121648,-86.539088
902366785,2015,1,7,Weekend,2000,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,INGRAM & SR37,38.992016,-86.537248
902367011,2015,1,3,Weekday,700,2-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,1025 W 7TH & N OAK,39.168656,-86.5468
902367259,2015,1,3,Weekday,1200,2-Car,Non-incapacitating,SPEED TOO FAST FOR WEATHER CONDITIONS,DUNN & EAST 17TH,39.17900187,-86.52832631
902367552,2015,1,3,Weekday,1500,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,10TH & WOODLAWN,39.171632,-86.523536
902367554,2015,1,2,Weekday,2100,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,GARDNER & GARRISON CHAPEL,39.134144,-86.642864
902367655,2015,1,7,Weekend,1800,Pedestrian,Incapacitating,PEDESTRIAN ACTION,COOLIDGE & ROCKPORT,39.14192595,-86.54314176
902367680,2015,1,3,Weekday,1900,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,HEADLEY & MATLOCK,39.19115236,-86.51175974
902367699,2015,1,2,Weekday,1500,2-Car,No injury/unknown,UNSAFE BACKING,WEST ROLL,39.14728113,-86.58089831
902367715,2015,1,2,Weekday,1500,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,SR46 & UNIONVALLEY,39.215152,-86.5928
902367716,2015,1,3,Weekday,1900,2-Car,No injury/unknown,IMPROPER TURNING,DEER PARK & SR46W,39.21210074,-86.58736299
902367789,2015,1,4,Weekday,0,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,DUNN & OLD STATE RD 37,39.208048,-86.528704
902367842,2015,1,4,Weekday,400,1-Car,Incapacitating,RAN OFF ROAD RIGHT,BREEDEN & SR45W,39.0881573,-86.66901213
902367995,2015,1,3,Weekday,1000,2-Car,No injury/unknown,UNSAFE BACKING,RAPPEL,39.18768311,-86.54618835
902368658,2015,1,4,Weekday,1700,2-Car,No injury/unknown,UNSAFE LANE MOVEMENT,FESS & THIRD,39.16427777,-86.52588752
902368672,2015,1,4,Weekday,1800,2-Car,No injury/unknown,UNSAFE BACKING,S COLLEGE MALL,39.15412578,-86.49786768
902368697,2015,1,3,Weekday,1800,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SR37S & VICTOR,39.105552,-86.551104
902368762,2015,1,4,Weekday,1600,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,EAST KERR CREEK & SR46E,39.16664,-86.462496
902368763,2015,1,4,Weekday,1500,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SR37N & WAYPORT,39.263024,-86.52264
902368908,2015,1,5,Weekday,700,1-Car,No injury/unknown,IMPROPER TURNING,RHORER & S OLD ST RD 37,39.12141994,-86.53134987
902368964,2015,1,5,Weekday,600,2-Car,Non-incapacitating,IMPROPER TURNING,S COLLEGE & W 4TH,39.165664,-86.5348
902369025,2015,1,5,Weekday,900,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,E 3RD & SR46E,39.16424,-86.473296
902369035,2015,1,5,Weekday,900,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,E 1ST & S EASTSIDE,39.159248,-86.51256
902369036,2015,1,5,Weekday,900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,E 8TH & N WALNUT,39.169552,-86.533552
902369165,2015,1,5,Weekday,800,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,JORDAN & LAW,39.173552,-86.515488
902369233,2015,1,7,Weekend,0,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,EAST 3RD & PETE ELLIS,39.16424,-86.495088
902369506,2015,1,5,Weekday,1400,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,FOURTH & SR46W,0,0
902369518,2015,1,4,Weekday,2200,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,S ROCKPORT & VICTOR,39.053744,-86.606304
902369547,2015,1,5,Weekday,1800,Cyclist,Incapacitating,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,ATWATER & HIGH,39.16311481,-86.50846175
902369585,2015,1,5,Weekday,2000,2-Car,No injury/unknown,LEFT OF CENTER,CHAFIN CHAPEL & W STATE ROAD 46,39.25962636,-86.65763204
902369787,2015,1,6,Weekday,700,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,RHORER & S WALNUT ST,39.12144,-86.526496
902369864,2015,1,6,Weekday,800,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,100 BLK W TAPP & S WALNUT,0,0
902369870,2015,1,6,Weekday,800,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SO PATTERSON & W 3RD,39.16344009,-86.55127137
902369933,2015,1,1,Weekend,1300,2-Car,No injury/unknown,IMPROPER LANE USAGE,CURRY & SR46W,39.15835953,-86.58264923
902370000,2015,1,5,Weekday,1300,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,2ND & PATTERSON,39.16074984,-86.54850997
902370079,2015,1,6,Weekday,700,2-Car,No injury/unknown,LEFT OF CENTER,SR48 & STONE CHASE,39.16475122,-86.59523284
902370118,2015,1,6,Weekday,1200,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,HENDERSON & HILLSIDE,39.15063632,-86.52720688
902370264,2015,1,6,Weekday,1300,2-Car,Non-incapacitating,FOLLOWING TOO CLOSELY,THIRD,39.164752,-86.573104
902370468,2015,1,6,Weekday,1500,2-Car,Non-incapacitating,FOLLOWING TOO CLOSELY,PATTERSON & THIRD,39.16344009,-86.55127137
902370473,2015,1,6,Weekday,1500,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,E ATWATER & S HIGHLAND,39.16305989,-86.51773028
902370571,2015,1,6,Weekday,1500,2-Car,No injury/unknown,RAN OFF ROAD RIGHT,MOORES CREEK & SNOODY,39.12134014,-86.48974361
902370734,2015,1,4,Weekday,0,2-Car,Incapacitating,FAILURE TO YIELD RIGHT OF WAY,ACUFF & SR37N,39.20824,-86.554704
902370735,2015,1,4,Weekday,800,1-Car,No injury/unknown,ROADWAY SURFACE CONDITION,SR37S,0,0
902370848,2015,1,7,Weekend,1100,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,BURKS & WALNUT,39.15836229,-86.53136998
902370987,2015,1,6,Weekend,2100,1-Car,No injury/unknown,IMPROPER TURNING,MITCHELL & THIRD,39.16427137,-86.51420373
902371028,2015,1,1,Weekend,1800,2-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,BETHEL,39.21258773,-86.46348219
902371029,2015,1,7,Weekend,1600,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,CURRY & GIFFORD,39.15648401,-86.58270442
902371055,2015,1,7,Weekend,1500,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,3RD & GRANT,39.16464,-86.529728
902371076,2015,1,6,Weekend,2000,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,RUSSELL & TIMBERCREST,39.193408,-86.48584
902371085,2015,1,7,Weekend,1700,2-Car,No injury/unknown,IMPROPER TURNING,S COLLEGE,39.16115717,-86.53485253
902371197,2015,1,1,Weekend,100,2-Car,No injury/unknown,UNSAFE LANE MOVEMENT,7TH & N ROGERS,39.168704,-86.538576
902371285,2015,1,1,Weekend,1000,2-Car,No injury/unknown,IMPROPER LANE USAGE,7TH & WALNUT,39.16864,-86.533568
902371535,2015,1,1,Weekday,2000,2-Car,No injury/unknown,UNSAFE LANE MOVEMENT,S LIBERTY & SR45W,39.14547579,-86.57703014
902371565,2015,1,1,Weekend,1800,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,S CLARIZZ,39.15263367,-86.49272918
902371659,2015,1,2,Weekday,200,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,E SAMPLE & N OLD STATE 37,39.25864,-86.5012
902371677,2015,1,1,Weekend,1200,1-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,WEST 11TH,39.17344,-86.52237094
902371877,2015,1,2,Weekday,600,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,BLOOMFIELD & SR37,39.148144,-86.572992
902371937,2015,1,2,Weekday,600,2-Car,No injury/unknown,ROADWAY SURFACE CONDITION,WEST 2ND,39.16025543,-86.54006195
902372354,2015,1,2,Weekday,700,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,TYLER LANE,39.1316,-86.576944
902372412,2015,1,2,Weekday,1400,3+ Cars,Incapacitating,FOLLOWING TOO CLOSELY,BLOOMFIELD & RANSOM,39.15944,-86.555296
902372560,2015,1,2,Weekday,1500,1-Car,No injury/unknown,ANIMAL/OBJECT IN ROADWAY,S WEIMER & W TAPP,39.136512,-86.562016
902372716,2015,1,3,Weekday,300,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,CHANDLER & RENEE,39.23488,-86.616736
902372776,2015,1,3,Weekday,700,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,E 10TH & N WASHINGTON,39.171728,-86.5324
902372873,2015,1,1,Weekend,1100,1-Car,Non-incapacitating,ANIMAL/OBJECT IN ROADWAY,DELAP,39.250778,-86.615867
902372874,2015,1,2,Weekday,800,1-Car,No injury/unknown,OVERCORRECTING/OVERSTEERING,SHUFFLE CREEK & SR45E,39.22993318,-86.4157437
902372994,2015,1,3,Weekday,900,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,SR37 & SR45W,39.187344,-86.5552
902373153,2015,1,3,Weekday,1100,Pedestrian,Non-incapacitating,PEDESTRIAN ACTION,6TH & INDIANA,39.167744,-86.526928
902373279,2015,1,3,Weekday,1300,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,S WALNUT,39.13432546,-86.52733091
902373402,2015,1,3,Weekday,1700,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,ROGERS & W 2ND,39.16152,-86.538592
902373415,2015,1,3,Weekday,1800,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,RAMP 15 SB SR 37 TO SR 45 WEST & SR45W,0,0
902373417,2015,1,3,Weekday,1700,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,3RD & SR37,39.164752,-86.573104
902373450,2015,1,3,Weekday,1800,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,9TH & COLLEGE,39.170544,-86.534768
902373562,2015,1,3,Weekday,2300,Pedestrian,Incapacitating,FAILURE TO YIELD RIGHT OF WAY,THIRD & WOODLAWN,39.16426449,-86.5226336
902373707,2015,1,4,Weekday,800,2-Car,No injury/unknown,LEFT OF CENTER,S CURRY,39.142048,-86.582592
902373722,2015,1,3,Weekday,1200,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,S ROGERS & W 4TH,39.165696,-86.538592
902373724,2015,1,4,Weekday,900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,PARK & WILSON,39.151552,-86.523664
902373769,2015,1,4,Weekday,1100,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,COLLEGE & KIRKWOOD,39.16718927,-86.53479128
902373795,2015,1,4,Weekday,1200,2-Car,Non-incapacitating,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,S HIGHLAND & WINSLOW FARM,39.136048,-86.518496
902374157,2015,1,3,Weekday,1800,3+ Cars,No injury/unknown,FOLLOWING TOO CLOSELY,SR37S & VICTOR,39.105552,-86.551104
902374167,2015,1,4,Weekday,2000,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,10TH & WOODLAWN,39.17108301,-86.52351178
902374296,2015,1,5,Weekday,700,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,3RD & WALNUT,39.16464,-86.5336
902374303,2015,1,3,Weekday,1100,1-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,3RD,39.16424,-86.48599111
902374407,2015,1,5,Weekday,700,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,S WALNUT,39.142555,-86.531238
902374495,2015,1,5,Weekday,1200,2-Car,No injury/unknown,UNSAFE BACKING,3RD & LIBERTY,39.16461021,-86.57913006
902374528,2015,1,5,Weekday,900,1-Car,No injury/unknown,UNSAFE BACKING,2418 E GOLDIN & E GOLDIN,39.128067,-86.50872634
902374541,2015,1,5,Weekday,0,2-Car,No injury/unknown,UNSAFE BACKING,NORTH FEE,39.17658828,-86.51891313
902374549,2015,1,5,Weekday,800,1-Car,No injury/unknown,UNSAFE LANE MOVEMENT,FOREST & KIRKWOOD,39.16608943,-86.5196257
902374663,2015,1,5,Weekday,1500,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,N WALNUT & SR46E,39.186368,-86.5344
902374711,2015,1,5,Weekday,1600,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,S HARMONY & SR45,39.111024,-86.638304
902374763,2015,1,5,Weekday,1700,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,COLLEGE MALL & EASTLAND PLAZA,0,0
902374770,2015,1,4,Weekday,2200,2-Car,No injury/unknown,IMPROPER TURNING,WEST 2ND,39.16144,-86.52709845
902374772,2015,1,5,Weekday,1400,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SOUTHERN & WALNUT,39.14984,-86.533392
902375012,2015,1,5,Weekday,1400,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,BETHAL & OLD SR 37,0,0
902375077,2015,1,6,Weekday,1000,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SR37N & VERNAL,39.17635934,-86.56266121
902375104,2015,1,5,Weekday,1100,2-Car,No injury/unknown,UNSAFE BACKING,S FRANKLIN & W 3RD,39.164688,-86.5712
902375106,2015,1,5,Weekday,1200,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,COLLEGE MALL & SR46E,39.16424,-86.4984
902375511,2015,1,5,Weekday,1600,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,GRIMES & WASHINGTON,39.15344,-86.5324
902375556,2015,1,6,Weekday,1700,2-Car,No injury/unknown,IMPROPER TURNING,17TH & JORDAN,39.178848,-86.511088
902375560,2015,1,6,Weekday,1600,2-Car,No injury/unknown,UNSAFE BACKING,LAW,39.17302832,-86.51339918
902375586,2015,1,6,Weekday,1700,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,S GATES & WEST 3RD,39.16459009,-86.57619975
902375614,2015,1,6,Weekend,1900,2-Car,No injury/unknown,,US45W,39.125312,-86.610496
902375751,2015,1,7,Weekend,200,Pedestrian,Non-incapacitating,FAILURE TO YIELD RIGHT OF WAY,DUNN & KIRKWOOD,39.16664,-86.528192
902375764,2015,1,6,Weekday,1200,3+ Cars,No injury/unknown,DISREGARD SIGNAL/REG SIGN,KIMBLE & W 3RD,39.16464,-86.568
902375793,2015,1,7,Weekend,100,2-Car,No injury/unknown,IMPROPER LANE USAGE,HOPEWELL & W 8TH,39.16968,-86.552192
902375812,2015,1,7,Weekend,700,Bus,Incapacitating,BRAKE FAILURE OR DEFECTIVE,17TH & COLLEGE,39.17896,-86.53472
902375825,2015,1,7,Weekend,900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,INDIANA & THIRTEENTH,39.17524746,-86.52701896
902375837,2015,1,7,Weekend,900,2-Car,No injury/unknown,IMPROPER LANE USAGE,JORDAN & THIRD,39.16427532,-86.51640856
902375867,2015,1,7,Weekend,1200,2-Car,No injury/unknown,IMPROPER TURNING,JORDAN & THIRD,39.16427532,-86.51640856
902375880,2015,1,6,Weekday,1600,2-Car,No injury/unknown,IMPROPER TURNING,W 3RD,39.228944,-86.627504
902375899,2015,1,6,Weekday,1800,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,4TH & WASHINGTON,39.165648,-86.5324
902375928,2015,1,6,Weekday,1800,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,KOONTZ & W DUVALL,39.079248,-86.619792
902375946,2015,1,7,Weekend,1500,2-Car,No injury/unknown,UNSAFE BACKING,E 10TH,39.17168,-86.527024
902376025,2015,1,7,Weekend,1700,1-Car,No injury/unknown,IMPROPER TURNING,INDIANA & SEVENTH,39.16851475,-86.52695381
902376043,2015,1,7,Weekend,1600,Moped/Motorcycle,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,E GRAMERCY PARK & N HINKLE,39.21040021,-86.50542967
902376047,2015,1,7,Weekend,1800,2-Car,Incapacitating,FAILURE TO YIELD RIGHT OF WAY,NORTH KINSER & SR46E,39.186544,-86.537888
902376062,2015,1,4,Weekday,1800,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,SR37S & WEST WAYSIDE,39.10921097,-86.53382873
902376063,2015,1,7,Weekend,1500,2-Car,No injury/unknown,UNSAFE BACKING,S COLLEGE MALL,39.175152,-86.534752
902376064,2015,1,7,Weekend,1500,3+ Cars,No injury/unknown,UNSAFE LANE MOVEMENT,S WALNUT & W COUNTRY CLUB,0,0
902376066,2015,1,7,Weekend,1600,2-Car,Non-incapacitating,FAILURE TO YIELD RIGHT OF WAY,E 3RD & OVERHILL,39.164272,-86.501488
902376079,2015,1,5,Weekday,1500,1-Car,No injury/unknown,ANIMAL/OBJECT IN ROADWAY,NORTH TEXAS RIDGE,39.31562042,-86.66162872
902376080,2015,1,6,Weekday,1600,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,EAST SMITHVILLE,39.071056,-86.5356
902376081,2015,1,6,Weekend,1900,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,SOUTH BREEDEN & SR45W,39.0873344,-86.67042215
902376207,2015,1,7,Weekend,1100,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,GATES & SR45W,39.16820908,-86.57639312
902376248,2015,1,1,Weekend,800,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,CAVE ROAD & W AIRPORT ROAD,39.13688,-86.628768
902376250,2015,1,1,Weekend,900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,MILLER & S WALNUT,39.146944,-86.532544
902376282,2015,1,1,Weekend,1200,2-Car,No injury/unknown,BRAKE FAILURE OR DEFECTIVE,2101 S LIBERTY & W 2ND,0,0
902376326,2015,1,7,Weekend,2100,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,N MARY & W REEVES,39.231312,-86.636704
902376382,2015,1,1,Weekend,1400,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,E 3RD & S JEFFERSON,39.164304,-86.506992
902376405,2015,1,6,Weekday,700,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,BURMA & SR37,39.31831096,-86.51055935
902376417,2015,1,1,Weekend,800,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,GETTYS CREEK & SR46,39.16351024,-86.41786097
902376427,2015,1,1,Weekend,900,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,GETTYS CREEK & SR46,39.16351024,-86.41786097
902376433,2015,1,1,Weekend,900,1-Car,No injury/unknown,SPEED TOO FAST FOR WEATHER CONDITIONS,GETTYS CREEK & SR46,39.16351024,-86.41786097
902376467,2015,1,1,Weekday,2200,1-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,S MONON,39.1506691,-86.53471374
902376541,2015,1,2,Weekday,200,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,E 14TH,39.17600327,-86.53063151
902376554,2015,1,2,Weekday,200,2-Car,No injury/unknown,UNSAFE LANE MOVEMENT,JORDAN & THIRD,39.16427532,-86.51640856
902376562,2015,1,1,Weekend,0,1-Car,No injury/unknown,ANIMAL/OBJECT IN ROADWAY,S ROCKPORT & W MAY,39.093344,-86.5868
902376565,2015,1,6,Weekend,2200,1-Car,Non-incapacitating,RAN OFF ROAD RIGHT,ANNE & STRAIN RIDGE,39.04934026,-86.51109208
902376571,2015,1,7,Weekend,2300,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,E WINDEMERE DR,39.164032,-86.570272
902376691,2015,1,2,Weekday,300,3+ Cars,No injury/unknown,UNSAFE SPEED,E 13TH & INDIANA,39.175504,-86.527104
902376750,2015,1,2,Weekday,1000,1-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,WEST 2ND,39.16122743,-86.51884571
902376844,2015,1,5,Weekday,700,1-Car,No injury/unknown,RAN OFF ROAD RIGHT,W BEECH & W WOODLAND,39.208544,-86.591504
902376914,2015,1,2,Weekday,1500,2-Car,Non-incapacitating,FAILURE TO YIELD RIGHT OF WAY,W SR 45,39.2512,-86.652504
902376951,2015,1,2,Weekday,1700,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,SR37S & W VERNAL,39.17662048,-86.56252288
902376993,2015,1,2,Weekday,1600,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,BLOOMFIELD & LANDMARK,39.15993981,-86.55382007
902377011,2015,1,3,Weekday,1800,2-Car,No injury/unknown,DISREGARD SIGNAL/REG SIGN,SR37S & VICTOR,39.105552,-86.551104
902377048,2015,1,3,Weekday,0,2-Car,No injury/unknown,DRIVER DISTRACTED - EXPLAIN IN NARRATIVE,OLD STATE ROAD 37 & ORCHARD,39.11584,-86.5312
902377198,2015,1,1,Weekend,900,1-Car,Incapacitating,SPEED TOO FAST FOR WEATHER CONDITIONS,FRIENDSHIP & SR46,39.15149145,-86.40317983
902377242,2015,1,1,Weekend,1300,2-Car,No injury/unknown,OTHER (DRIVER) - EXPLAIN IN NARRATIVE,SO KINGSTON,39.16742706,-86.49314117
902384082,2015,1,6,Weekday,1000,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,7TH & GRANT,39.16864,-86.529696
902384099,2015,1,7,Weekend,1500,2-Car,No injury/unknown,UNSAFE LANE MOVEMENT,BLOOMFIELD & CORY,39.157248,-86.559792
902384137,2015,1,7,Weekend,1800,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,KIRKWOOD & MADISON,39.166656,-86.537296
902384175,2015,1,7,Weekend,1900,2-Car,No injury/unknown,UNSAFE BACKING,S WALNUT,39.14705763,-86.53257665
902384258,2015,1,7,Weekend,2300,1-Car,No injury/unknown,LEFT OF CENTER,SR37S & WAYSIDE,39.10921097,-86.53382873
902384269,2015,1,6,Weekday,1300,2-Car,No injury/unknown,IMPROPER LANE USAGE,VERNAL,39.176544,-86.5624
902384360,2015,2,1,Weekend,100,1-Car,Non-incapacitating,RAN OFF ROAD RIGHT,JEFFERSON & TENTH,39.17145984,-86.50680459
902384470,2015,1,7,Weekend,1700,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,9TH & COLLEGE,39.170544,-86.534768
902384482,2015,2,1,Weekend,1100,2-Car,No injury/unknown,UNSAFE BACKING,NORTH WALNUT,39.19396833,-86.53335146
902384628,2015,1,7,Weekend,1900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,EAST RHORER & SOUTH WALNUT STREET,39.12144,-86.526496
902384641,2015,2,1,Weekend,1500,2-Car,Incapacitating,IMPROPER TURNING,ATWATER & JORDAN,39.163168,-86.516384
902384754,2015,2,1,Weekend,1800,1-Car,No injury/unknown,IMPROPER TURNING,SR45E,39.21936967,-86.44920774
902384755,2015,1,7,Weekend,1800,1-Car,Incapacitating,RAN OFF ROAD RIGHT,HARRODSBURG & W POPCORN,38.85405138,-86.54503986
902384833,2015,1,7,Weekend,300,1-Car,No injury/unknown,DRIVER ASLEEP OR FATIGUED,E CHAMBERS & SR37,39.30651541,-86.51481628
902385040,2015,2,2,Weekday,900,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,1425 S CURRY & GIFFORD,39.156896,-86.582704
902385118,2015,2,2,Weekday,1000,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,BUICK CADILLAC & COLLEGE MALL,39.15816,-86.497792
902385244,2015,2,2,Weekday,1100,3+ Cars,No injury/unknown,FOLLOWING TOO CLOSELY,SARE & SOUTH MEADOW,0,0
902385272,2015,2,2,Weekday,900,2-Car,No injury/unknown,FAILURE TO YIELD RIGHT OF WAY,E COVENANTER & S CLARIZZ,39.15462032,-86.49176401
902385416,2015,2,2,Weekday,1200,2-Car,No injury/unknown,FOLLOWING TOO CLOSELY,11TH & VERNAL,39.17323818,-86.55512325

README.md

1
2
3
4
5
6
7
8
# CSV to HDF5 

Public domain CSV example file obtained from [this link](https://data.bloomington.in.gov/dataset/117733fb-31cb-480a-8b30-fbf425a690cd/resource/8673744e-53f2-42d1-9d05-4e412bd55c94/download/monroe-county-crash-data2003-to-2015.csv)
 The CSV library is [Fast C++ CSV Parser](https://github.com/ben-strasser/fast-cpp-csv-parser)

# C++/C representation

arbitrary pod struct can be represented in HDF5 format, one easy representation of strings is character array. An alternative --often better performing --representation would be to factor out strings from numerical data, then save them in separate datasets.

ifndef CSV2H5_H

define CSV2H5_H

/define C++ representation as POD struct/ struct input_t { long MasterRecordNumber; unsigned int Hour; double Latitude; double Longitude; char ReportedLocation[20]; // character arrays are supported };

endif

Reading the CSV is rather easy thanks to [Fast C++ CSV Parser](https://github.com/ben-strasser/fast-cpp-csv-parser), a single header file `csv.h` is attached to the project. Not only fast and simple but also elegantly allows to specify specific columns marked as ncols: `N_COLS`
io::CSVReader in("input.csv"); // number of cols may be less, than total columns in a row, we're to read only 5 in.read_header(io::ignore_extra_column, "Master Record Number", "Hour", "Reported_Location","Latitude","Longitude"); [...] while(in.read_row(row.MasterRecordNumber, row.Hour, ptr, row.Latitude, row.Longitude)){ [...]
The HDF5 part is matching in simplicity:
h5::fd_t fd = h5::create("output.h5",H5F_ACC_TRUNC); h5::pt_t pt = h5::create(fd, "monroe-county-crash-data2003-to-2015.csv", h5::max_dims{H5S_UNLIMITED}, h5::chunk{1024} | h5::gzip{9} ); // compression, chunked, unlimited size [...] while(...){ h5::append(pt, row); // append operator uses internal buffers to cache and convert row insertions to block/chunk operations } [...]
The TU translation unit is scanned with LLVM based `h5cpp` compiler and the necessary hdf5 specific type descriptors are produced:

ifndef H5CPP_GUARD_mzMuQ

define H5CPP_GUARD_mzMuQ

namespace h5{ //template specialization of input_t to create HDF5 COMPOUND type template<> hid_t inline register_struct(){ //hsize_t at_00_[] ={20}; hid_t at_00 = H5Tarray_create(H5T_STRING,20,at_00_); hid_t at_00 = H5Tcopy (H5T_C_S1); H5Tset_size(at_00, 20); hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (input_t)); H5Tinsert(ct_00, "MasterRecordNumber", HOFFSET(input_t,MasterRecordNumber),H5T_NATIVE_LONG); H5Tinsert(ct_00, "Hour", HOFFSET(input_t,Hour),H5T_NATIVE_UINT); H5Tinsert(ct_00, "Latitude", HOFFSET(input_t,Latitude),H5T_NATIVE_DOUBLE); H5Tinsert(ct_00, "Longitude", HOFFSET(input_t,Longitude),H5T_NATIVE_DOUBLE); H5Tinsert(ct_00, "ReportedLocation", HOFFSET(input_t,ReportedLocation),at_00);

    //closing all hid_t allocations to prevent resource leakage
    H5Tclose(at_00);

    //if not used with h5cpp framework, but as a standalone code generator then
    //the returned 'hid_t ct_00' must be closed: H5Tclose(ct_00);
    return ct_00;
};

} H5CPP_REGISTER_STRUCT(input_t);

endif

The entire project can be [downloaded from this link](https://github.com/steven-varga/HDFGroup-mailinglist/tree/master/csv-2020-03-03) but for completeness here is the source file:
/ Copyright (c) 2020 vargaconsulting, Toronto,ON Canada * Author: Varga, Steven steven@vargaconsulting.ca /

include "csv.h"

// data structure include file: struct.h must precede 'generated.h' as the latter contains dependencies // from previous

include "struct.h"

include // has handle + type descriptors

// sandwiched: as h5cpp/io depends on henerated.h which needs h5cpp/core #include "generated.h" // uses type descriptors

include // uses generated.h + core

int main(){

// create HDF5 container
h5::fd_t fd = h5::create("output.h5",H5F_ACC_TRUNC);
// create dataset   
// chunk size is unrealistically small, usually you would set this such that ~= 1MB or an ethernet jumbo frame size
h5::ds_t ds = h5::create<input_t>(fd,  "simple approach/dataset.csv",
             h5::max_dims{H5S_UNLIMITED}, h5::chunk{10} | h5::gzip{9} );
// `h5::ds_t` handle is seamlessly cast to `h5::pt_t` packet table handle, this could have been done in single step
// but we need `h5::ds_t` handle to add attributes
h5::pt_t pt = ds;
// attributes may be added to `h5::ds_t` handle
ds["data set"] = "monroe-county-crash-data2003-to-2015.csv";
ds["cvs parser"] = "https://github.com/ben-strasser/fast-cpp-csv-parser"; // thank you!

constexpr unsigned N_COLS = 5;
io::CSVReader<N_COLS> in("input.csv"); // number of cols may be less, than total columns in a row, we're to read only 5
in.read_header(io::ignore_extra_column, "Master Record Number", "Hour", "Reported_Location","Latitude","Longitude");
input_t row;                           // buffer to read line by line
char* ptr;      // indirection, as `read_row` doesn't take array directly
while(in.read_row(row.MasterRecordNumber, row.Hour, ptr, row.Latitude, row.Longitude)){
    strncpy(row.ReportedLocation, ptr, STR_ARRAY_SIZE); // defined in struct.h
    h5::append(pt, row);
    std::cout << std::string(ptr) << "\n";
}
// RAII closes all allocated resources

} the output of `h5dump -pH output.h5` HDF5 "output.h5" { GROUP "/" { GROUP "simple approach" { DATASET "dataset.csv" { DATATYPE H5T_COMPOUND { H5T_STD_I64LE "MasterRecordNumber"; H5T_STD_U32LE "Hour"; H5T_IEEE_F64LE "Latitude"; H5T_IEEE_F64LE "Longitude"; H5T_ARRAY { [20] H5T_STD_I8LE } "ReportedLocation"; } DATASPACE SIMPLE { ( 199 ) / ( H5S_UNLIMITED ) } STORAGE_LAYOUT { CHUNKED ( 10 ) SIZE 7347 (1.517:1 COMPRESSION) } FILTERS { COMPRESSION DEFLATE { LEVEL 9 } } FILLVALUE { FILL_TIME H5D_FILL_TIME_IFSET VALUE H5D_FILL_VALUE_DEFAULT } ALLOCATION_TIME { H5D_ALLOC_TIME_INCR } ATTRIBUTE "cvs parser" { DATATYPE H5T_STRING { STRSIZE H5T_VARIABLE; STRPAD H5T_STR_NULLTERM; CSET H5T_CSET_UTF8; CTYPE H5T_C_S1; } DATASPACE SCALAR } ATTRIBUTE "data set" { DATATYPE H5T_STRING { STRSIZE H5T_VARIABLE; STRPAD H5T_STR_NULLTERM; CSET H5T_CSET_UTF8; CTYPE H5T_C_S1; } DATASPACE SCALAR } } } } }


Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm -lpthread
INCLUDES = -I/usr/local/include -I/usr/include -I./

test: csv2hdf5
    ./csv2hdf5

generated.h: csv2hdf5.cpp
    h5cpp csv2hdf5.cpp -- $(CXXFLAGS) $(INCLUDES) -Dgenerated.h

csv2hdf5.o : csv2hdf5.cpp generated.h 
    $(CXX) $(INCLUDES) -o csv2hdf5.o  $(CXXFLAGS) -c csv2hdf5.cpp

csv2hdf5: csv2hdf5.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.h5 csv2hdf5

dist-clean: clean
    @$(RM) generated.h
.PHONY: test

custom_pipeline

pipeline.cpp

1
2
3
4
5
6
7
8
#include "h5cpp/all"

int main() {
  std::vector<double> v(100, 2.);
  auto fd = h5::create("test.h5", H5F_ACC_TRUNC);
  //FIXME: reference count problems...
  h5::write(fd, "dataset", v, h5::chunk{2}, h5::high_throughput);
}

Makefile

#  _____________________________________________________________________________
#  Copyright (c) 2018-2021 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _____________________________________________________________________________

apps = pipeline

CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./pipeline
    #h5dump -pH 004.h5

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

pipeline: pipeline.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

datasets

datasets.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <h5cpp/all>
#include <cstddef>

constexpr auto filename = "001.h5";

int main() {

    auto fd = h5::create(filename, H5F_ACC_TRUNC);
    { // CREATE - WRITE
        arma::mat M(2,3); M.ones();                         // create a matrix
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }
    {
        arma::vec V( {1.,2.,3.,4.,5.,6.,7.,8.});    // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( filename, "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  

        /* FIXME: hyperblock selection
        h5::write( filename, "arma vec inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}
            ,h5::stride{3,5}
            ,h5::block{2,4}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );*/
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        h5::ds_t ds = h5::create<float>(fd,"dataset", h5::current_dims{3,2}, h5::fill_value<float>(NAN));  // create dataset, default to NaN-s
        auto M  = h5::read<arma::mat>( fd,"dataset" );                 // read data back as matrix
        M.print();
    }
    { // READ: 
        arma::mat M = h5::read<arma::mat>(filename,"create then write"); // read entire dataset back with a single read
        M.print();
    }



}

README.md

## Datasets[^1]
A dataset is stored in a file in two parts: a header and a data array.

The header contains information that is needed to interpret the array portion of the dataset, as well as metadata (or pointers to metadata) that describes or annotates the dataset. Header information includes the name of the object, its dimensionality, its number-type, information about how the data itself is stored on disk, and other information used by the library to speed up access to the dataset or maintain the file's integrity.

There are four essential classes of information in any header: name, datatype, dataspace, and storage layout:

* **Name** A dataset name is a sequence of alphanumeric ASCII characters.
* **Datatype** HDF5 allows one to define many different kinds of datatypes. There are two categories of datatypes:
    * atomic datatype: currently only **NATIVE** byte order supported on H5CPP
    * compound datatypes: see h5cpp compiler assisted reflection

* **Dataspace** A dataset dataspace describes the dimensionality of the dataset. The dimensions of a dataset can be fixed (unchanging), or they may be  unlimited, which means that they are extendible (i.e. they can grow larger). 

* **Storage layout** The HDF5 format makes it possible to store data in a variety of ways. The default storage layout format is contiguous, meaning that data is stored in the same linear way that it is organized in memory. Two other storage layout formats are currently defined for HDF5: compact, and chunked.
    * **Chunked storage** involves dividing the dataset into equal-sized "chunks" that are stored separately. Chunking **has three important benefits**.
        1. It makes it possible to achieve good performance when accessing subsets of the datasets, even when the subset to be chosen is orthogonal to the normal storage order of the dataset.
        2. It makes it possible to compress large datasets and still achieve good performance when accessing subsets of the dataset.
        3. It makes it possible efficiently to extend the dimensions of a dataset in any direction.
    * **Compact storage is** used when the amount of data is small and can be stored directly in the object header. And is **NOT SUPPORTED by H5CPP** directly

### Dataspace
Properties of a dataspace consist of the rank (number of dimensions) of the data array, the actual sizes of the dimensions of the array, and the maximum sizes of the dimensions of the array. For a fixed-dimension dataset, the actual size is the same as the maximum size of a dimension. When a dimension is unlimited, the maximum size is set to the value H5P_UNLIMITED. 
A dataspace can also describe portions of a dataset, making it possible to do partial I/O operations on selections. Selection is supported by the dataspace interface (H5S). Given an n-dimensional dataset, there are currently four ways to do partial selection:

* Select a logically contiguous n-dimensional hyperslab.
* Select a non-contiguous hyperslab consisting of elements or blocks of elements (hyperslabs) that are equally spaced.

List to describe dimensions of a dataset:

* `h5::current_dims{i,j,k,..}` - actual dimension `i,j,k \in {1 - max}`
* `h5::max_dims{...}` - maximum dimension, use `H5S_UNLIMITED` for infinite 
* `h5::chunk{...}` - define block size, clever blocking arrangement increases throughout

List how to select from datasets for read or write:

* `h5::offset{...}` - start coordinates of data selection
* `h5::stride{...}` - every `n` considered 
* `h5::block{...}` - every `m` block is considered
* `h5::count{...}` - the amount of data 

**Note:** `h5::stride`, `h5::block` and scatter - gather operations doesn't work when `h5::high_throughput` set, due to performance reasons.






[^1]: Lifted from HDF5 CAPI documentation

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

apps = datasets

CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./datasets

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

datasets: datasets.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

datatypes

n-bit.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <cstdint>
#include <iostream>
#include <limits>
#include <random>
#include <algorithm>
#include <armadillo>
#include <Eigen/Dense> 
#include <h5cpp/core> // include this before custom type definition
    #include "n-bit.hpp"
#include <h5cpp/io> // IO operators become aware of your custom type

namespace ei {
    template <class T>
    using Matrix   = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
}

// in all cases when filtering used chunk must be set as well, no contiguous 
int main(){
    namespace bs = bitstring;

    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);
    // prints out type info, eases on debugging
    std::cout << h5::dt_t<bs::n_bit>() << std::endl;

    std::random_device rd;
    std::mt19937 random_int(rd());
    std::uniform_int_distribution<> sample(0, 3);

    {
    // method 1: use C++ conversion CTOR to convert from fundamental type to your custom one
    // notice the armadillo can only hold arithmetic types, you have to do some forcing 
    // also that colmajor, coordinates are swapped
        arma::Mat<unsigned char> M(12,8);
        std::generate(M.begin(), M.end(), [&](){ return sample(random_int);} );

        h5::ds_t ds = h5::create<bs::n_bit>(fd, "arma",
               h5::current_dims{8,12,1}, h5::max_dims{8,12,H5S_UNLIMITED}, h5::chunk{4,3,1} | h5::nbit);
        // force conversion for zero copy: 
        h5::write<bs::n_bit>(fd,"arma", (bs::n_bit*)M.memptr(), h5::offset{0,0,0}, h5::count{8,12,1});

        arma::Mat<unsigned char> data(12,8);
        h5::read<bs::n_bit>(fd, "arma", (bs::n_bit*)data.memptr(), h5::offset{0,0,0}, h5::count{8,12,1});
        data.print();
        std::cout <<"\n\n";
        M.print();
    }
    {
    // method 2: eigen allows native type handling
        ei::Matrix<bs::n_bit> M(12,8);
        for(int i=0; i<12; i++) for( int j=0; j<8; j++)
            M(i,j) = static_cast<bs::n_bit>( sample(random_int));

        h5::ds_t ds = h5::create<bs::n_bit>(fd, "eigen", // chunk must be used with nbit
               h5::current_dims{12,8}, h5::max_dims{12,H5S_UNLIMITED}, h5::chunk{3,4} | h5::nbit);
        h5::write<bs::n_bit>(ds, h5::impl::data(M), h5::count{12,8});

        ei::Matrix<bs::n_bit> data(12,8);
        h5::read(fd, "eigen", data, h5::offset{0,0});
        std::cout << data << std::endl <<std::endl << M <<std::endl;
    }
    { //method 3, use STL 
        std::vector<bs::n_bit> V(12*8);
        std::generate(V.begin(), V.end(), [&](){
            return static_cast<bs::n_bit>(sample(random_int));
        });

        h5::ds_t ds = h5::create<bs::n_bit>(fd, "stl", // chunk must be used with nbit
               h5::current_dims{12,8}, h5::max_dims{12,H5S_UNLIMITED}, h5::chunk{3,4} | h5::nbit);
        // from typed memory pointer to different shape
        h5::write<bs::n_bit>(ds, V.data(), h5::count{12,8}); // single shot write

        auto data = h5::read<std::vector<bs::n_bit>>(fd, "stl");
        for( int i=0; i<V.size(); i++ )
            std::cout << static_cast<unsigned int>( data[i] ) << " ";
        std::cout << "\n\ncomputing difference ||saved - read|| expecting norm to be zero:\n";
        for( int i=0; i<V.size(); i++ )
            std::cout << abs(V[i].value - data[i].value) <<" ";
    }
}

n-bit.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

// do your thing, usually this would be in a separate header file
namespace bitstring {
    struct n_bit { // wrapper to aid C++ template mechanism, zero runtime cost
        // allow conversion: auto value =  static_cast<unsigned char>( n_bit_type );
        explicit operator unsigned char() const {
            return value;
        }
        explicit operator unsigned int() const {
            return value;
        }
        n_bit() = default;
        n_bit( unsigned char value_ ) : value(value_){}
        unsigned char value;
    };
}

// BEGIN H5CPP SPECIFIC CUSTOM TYPE DEFINITION
namespace h5::impl::detail {
    template <> struct hid_t<bitstring::n_bit, H5Tclose,true,true, hdf5::type> : public dt_p<bitstring::n_bit> {
        using parent = dt_p<bitstring::n_bit>;  // h5cpp needs the following typedefs
        using dt_p<bitstring::n_bit>::hid_t;
        //using parent::hid_t; --> this style of inheriting ctor will not work with clang :(
        using hidtype = bitstring::n_bit;

        // opaque doesn't care of byte order, also since you are using single byte
        // it is not relevant
        hid_t() : parent( H5Tcopy( H5T_NATIVE_UCHAR) ) {
            H5Tset_precision(handle, 2);
            hid_t id = static_cast<hid_t>( *this );
        }
    };
}
namespace h5 {
    template <> struct name<bitstring::n_bit> {
        static constexpr char const * value = "bitstring::n_bit";
    };
}
// END H5CPP SPECIFIC TYPE DEFINEITION


// SERVICE ROUTINES
std::ostream& operator<<(std::ostream& os, const bitstring::n_bit& data){
    os << data.value;
    return os;
}

std::ostream& operator<<(std::ostream& os, const
     Eigen::Matrix<bitstring::n_bit, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>& M){
    for( size_t i=0; i<M.rows(); i++){
        for( size_t j=0; j<M.cols(); j++) os << static_cast<unsigned int>( M(i,j)) <<" ";
        os<<std::endl;
    }
    return os;
}

two-bit.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <cstdint>
#include <iostream>
#include <limits>
#include <random>
#include <h5cpp/core> // include this before custom type definition
    #include "two-bit.hpp"
#include <h5cpp/io> // IO operators become aware of your custom type

int main(){
    namespace nm = bitstring;

    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);
    // prints out type info, eases on debugging
    std::cout << h5::dt_t<nm::two_bit>() << std::endl;

    std::vector<nm::two_bit> vec = {0xff,0x0f,0xf0,0x00,0b0001'1011};

    /* H5CPP operators are aware of your dataype, will do the right thing
     */
    h5::write(fd,"data", vec); // single shot write
    auto data = h5::read<std::vector<nm::two_bit>>(fd, "data");

    for( int i=0; i<vec.size(); i++ )
        std::cout << "[" << i << ": " << vec[i] << " "  <<"]";
    std::cout << "\n\ncomputing difference ||saved - read|| expecting norm to be zero:\n";
    for( int i=0; i<vec.size(); i++ )
        std::cout << abs(vec[i].value - data[i].value) <<" ";
}

two-bit.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
// do your thing, usually this would be in a separate header file
namespace bitstring {
    struct two_bit { // wrapper to aid C++ template mechanism, zero runtime cost
        // but you have to develop your own efficient two bit arithmetic
        // as well as in memory storage mechanism, this is a 4 x 2bit data block
        unsigned operator[]( int idx_ ) const {
            switch (idx_){
                case 0: return (value & 0b0000'0011) >> 0;
                case 1: return (value & 0b0000'1100) >> 2;
                case 2: return (value & 0b0011'0000) >> 4;
                case 3: return (value & 0b1100'0000) >> 6;
                default:
                        throw std::runtime_error("out of bound");
            }
        }
        two_bit( unsigned char value_ ) : value(value_){
        }
        two_bit() = default;
        unsigned char value;
    };
}

// BEGIN H5CPP SPECIFIC CUSTOM TYPE DEFINITION
namespace h5::impl::detail {
    template <> struct hid_t<bitstring::two_bit, H5Tclose,true,true, hdf5::type> : public dt_p<bitstring::two_bit> {
        using parent = dt_p<bitstring::two_bit>;  // h5cpp needs the following typedefs
        using dt_p<bitstring::two_bit>::hid_t; // inheriting ctor
        //using parent::hid_t; --> this style of inheriting ctor will not work with clang :(
        using hidtype = bitstring::two_bit;

        // opaque doesn't care of byte order, also since you are using single byte
        // it is not relevant
        hid_t() : parent( H5Tcreate( H5T_OPAQUE, 1) ) { // 1 == single byte, i would pack it into 64 bit though
            H5Tset_tag(handle, "bitstring::two_bit");
            hid_t id = static_cast<hid_t>( *this );
        }
    };
}
namespace h5 {
    template <> struct name<bitstring::two_bit> {
        static constexpr char const * value = "bitstring::two_bit";
    };
}
// END H5CPP SPECIFIC TYPE DEFINEITION


// SERVICE ROUTINES
std::ostream& operator<<(std::ostream& os, const bitstring::two_bit& val){
    os << val[3] << " " << val[2] << " " << val[1] <<" " << val[0];
    return os;
}

README.md

1
2
3
#custom datatype support

The examples outline how to interact with H5CPP and inherently HDF5 type system.

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm 
INCLUDES = -I/usr/local/include -I/usr/include -I/usr/include/eigen3 -I./
test: two-bit n-bit
    ./n-bit

two-bit.o : two-bit.cpp 
    $(CXX) $(INCLUDES) -o two-bit.o  $(CXXFLAGS) -c two-bit.cpp

two-bit: two-bit.o
    $(CXX) $^ $(LIBS) -o $@ 

n-bit.o : n-bit.cpp 
    $(CXX) $(INCLUDES) -o n-bit.o  $(CXXFLAGS) -c n-bit.cpp

n-bit: n-bit.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.h5  n-bit two-bit

dist-clean: clean
.PHONY: test

groups

groups.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <h5cpp/all>
#include <cstddef>


int main() {

    auto fd = h5::create("001.h5", H5F_ACC_TRUNC);
    {   // create intermediate groups
        // NOTICE: const static h5::lcpl_t lcpl = h5::char_encoding{H5T_CSET_UTF8} | h5::create_intermediate_group{1};
        // the default h5::default_lcpl will create immediate path and the encoding is set to utf8
        h5::gr_t gr = h5::gcreate(fd, "my-group/sub/path");

        h5::lcpl_t lcpl = h5::create_intermediate_group{1};
        h5::gcreate(fd, "/mygroup", lcpl); // passing lcpl type explicitly
    }
    { // exceptions
        h5::mute(); // mute CAPI error handler
        try { // this group already exists, will throw `h5::error::io::group::create` exception
            h5::gcreate(fd, "/mygroup", h5::dont_create_path);
        }catch (  const h5::error::io::group::create& e){
            std::cout << e.what() <<"\n";
        }

        // catching all exceptions 
        try { // this group already exists, will throw `h5::error::io::group::create` exception
            h5::gcreate(fd, "/mygroup", h5::dont_create_path);
        }catch (  const h5::error::any& e){
            std::cout << e.what() <<"\n";
        }
        h5::unmute(); // re-enable CAPI error handling
    }
    { // opening a group and adding attributes
        auto gr = h5::gopen(fd, "/mygroup");
        std::initializer_list list = {1,2,3,4,5};
        h5::awrite(gr, std::make_tuple(
            "temperature", 42.0,
            "unit", "C",
            "vector of ints", std::vector<int>({1,2,3,4,5}),
            "initializer list", list,
            "strings", std::initializer_list({"first", "second", "third","..."})
        ));
    }
}

README.md

## Groups[^1]

An HDF5 group is a structure containing zero or more HDF5 objects. A group has two parts:

1. A group header, which contains a group name and a list of group attributes.
2. A group symbol table, which is a list of the HDF5 objects that belong to the group.

```cpp
parent ::= fd_t | gr_t;

[open]
h5::gr_t gopen(parent, const std::string& path [, const h5::gapl_t& gapl]);

[create]
h5::gr_t  gcreate(const L& parent, const std::string& path,
    [, h5::lcpl_t lcpl] [, h5::gcpl_t gcpl] [, h5::gapl_t gapl]);

Example: adding attributes to a group

auto gr = h5::gopen(fd, "/mygroup");
std::initializer_list list = {1,2,3,4,5};
h5::awrite(gr, std::make_tuple(
    "temperature", 42.0,
    "unit", "C",
    "vector of ints", std::vector<int>({1,2,3,4,5}),
    "initializer list", list,
    "strings", std::initializer_list({"first", "second", "third","..."})
));

Examples:

The examples are to demonstrate how to use HDF5 groups as well as how to add attributes to them

  • creating groups with an h5::fd_t file handle and h5::gr_t group handle as parents
  • error handling:
    • catching specific h5::error::io::group::create
    • using umbrella exception h5::error::any
  • how to open a group and add complex set of attributes with a single call
**Makefile**
```make linenums="1"
#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
apps = groups

CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./groups

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

groups: groups.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
.PHONY: test
kita

kita.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#define H5CPP_HAVE_KITA

#include <mpi.h>
#include <h5cpp/all>
#include <chrono>
#include <vector>
#include <algorithm>

#pragma GCC diagnostic ignored "-Wnarrowing"
// armadillo
//./configure -DCMAKE_INSTALL_PREFIX=/usr/local -DDETECT_HDF5=OFF
int main(int argc, char** argv) {
    int size, rank, name_len;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(NULL, NULL);
    MPI_Info info;
    MPI_Comm comm  = MPI_COMM_WORLD;

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(processor_name, &name_len);

    MPI_Info_create(&info);
    int nchunk = 1024*1024;
    int nrows  = 10*nchunk; // 800MB per rank!!

    char data_path[200];
    sprintf(data_path, "dataset_%i", rank);

//static_assert(false, "please update your credentials in `.hsds` matching with your kita lab settings... ");
RVinit();

    { // CREATE - WRITE
        std::vector<double> v(nrows);
        std::fill(std::begin(v), std::end(v), rank + 2 );
        size_t vsize =v.size() * sizeof(double);
        h5::fapl_t fapl = h5::kita{};
        //static_assert(false, "please update PATH to your KITA account PATH");
        if( rank == 0 ){
            auto fd = h5::create("/home/steven/kita-io-test.h5", H5F_ACC_TRUNC,
                h5::fcpl, fapl );
        }
        MPI_Barrier(MPI_COMM_WORLD);
        auto fd = h5::open("/home/steven/kita-io-test.h5",H5F_ACC_RDWR, fapl);
        h5::ds_t ds = h5::create<double>(fd,data_path,
                h5::max_dims{size,nrows}, h5::chunk{1,nchunk} | h5::alloc_time_early );

        // ACTUAL WRITE MEASUREMENT
        std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
            h5::write( ds, v, h5::current_dims{nrows,size},
                h5::offset{rank,0}, h5::count{1,nrows} );
        std::chrono::system_clock::time_point stop = std::chrono::system_clock::now();
        double running_time = 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
        double MB_sec =  (vsize / 1e6) / running_time;

        // COLLECTING RESULTS: 
        std::vector<double> throughput(size);
        MPI_Gather(&MB_sec, 1, MPI_DOUBLE, throughput.data(), 1, MPI_DOUBLE, 0, comm);
        if( rank == 0)
            std::cout << "\nWRITE: " <<
                std::accumulate(throughput.begin(), throughput.end(), 0) <<" MB/s" <<std::endl;

    }

    { // READ
        std::vector<double> v(nrows);
        size_t vsize =v.size() * sizeof(double);
        auto fd = h5::open("collective.h5", H5F_ACC_RDWR,  h5::mpiio({MPI_COMM_WORLD, info}));
        auto ds = h5::open(fd, "/dataset");

        // make a copy of dxpl, so we can query if collective IO was successful
        h5::dxpl_t dxpl = 5::collective;

        // ACTUAL WRITE MEASUREMENT
        std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
            h5::read(ds, v.data(),  h5::offset{rank,0}, h5::count{1,nrows}, dxpl);
        std::chrono::system_clock::time_point stop = std::chrono::system_clock::now();

        double running_time = 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
        double MB_sec =  (vsize / 1e6) / running_time;

        // COLLECTING RESULTS: 
        std::vector<double> throughput(size);
        MPI_Gather(&MB_sec, 1, MPI_DOUBLE, throughput.data(), 1, MPI_DOUBLE, 0, comm);
        if( rank == 0)
            std::cout << "\nREAD: " <<
                std::accumulate(throughput.begin(), throughput.end(), 0) <<" MB/s" <<std::endl;
        // query collective io state:
        //std::cout << dxpl <<"\n";
    }
RVterm();

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Info_free(&info);
    MPI_Finalize();
}

README.md

## MPI examples

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
apps = kita
CXX=mpic++
CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS = -lrestvol -lcurl -lyajl -lhdf5  -lz -ldl -lm

test: $(apps)
    @./kita

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

kita: kita.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

linalg

arma.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <h5cpp/all>


int main(){
    { // CREATE - WRITE
        arma::mat M(2,3); M.ones();                         // create a matrix
        h5::fd_t fd = h5::create("arma.h5",H5F_ACC_TRUNC);  // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }
    {
        arma::vec V( {1.,2.,3.,4.,5.,6.,7.,8.});    // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "arma.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "arma.h5", "arma vec inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}
            ,h5::stride{3,5}
            ,h5::block{2,4}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("arma.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
        h5::ds_t ds = h5::create<float>(fd,"dataset", h5::current_dims{3,2}, h5::fill_value<float>(NAN));  // create dataset, default to NaN-s
        auto M  = h5::read<arma::mat>( fd,"dataset" );                 // read data back as matrix
        M.print();
    }
    { // READ: 
        arma::mat M = h5::read<arma::mat>("arma.h5","create then write"); // read entire dataset back with a single read
        M.print();
    }
}

blaze.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <iostream>
#include <blaze/Math.h>
#include <h5cpp/all>

using namespace std;
// 

template<class T> using Matrix = blaze::DynamicMatrix<T,blaze::rowMajor>;
template<class T> using Colvec = blaze::DynamicVector<T,blaze::columnVector>;
/* only DYNAMIC [MATRIX|VECTOR] are supported */

int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    {
        Colvec<float> V( {1.,2.,3.,4.,5.,6.,7.,8.});                              // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "linalg.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "linalg.h5", "vector inside matrix",  V // object contains 'count' and rank being written
            ,h5::count{1,1}           // encodes rank and shape: 2x4 block
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}          // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::stride{3,5}          //
            ,h5::block{2,4}           // 
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
    }
}

blitz.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <iostream>
#include <blitz/array.h>
#include <h5cpp/all>

using namespace std;
// 

template<class T> using Colvec = blitz::Array<T,1>;
template<class T> using Matrix = blitz::Array<T,2>;
template<class T> using Cube   = blitz::Array<T,3>;

int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    {
        Colvec<float> V(4);                               // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "linalg.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "linalg.h5", "vector inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::stride{4,4}, h5::block{3,3}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
    }
}

dlib.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <iostream>
#include <dlib/matrix.h>
#include <h5cpp/all>

using namespace std;

template<class T> using Matrix = dlib::matrix<T>;
/* DLIB fails from stock install 2018 july 11
* /usr/include/dlib/matrix/matrix.h:1608:38: error: ISO C++17 does not allow dynamic exception specifications
*/
int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
    }
}

eigen3.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <iostream>
#include <Eigen/Dense> // must include Eigen before <h5cpp/core>
#include <h5cpp/all>

using namespace std;
// EIGEN3 templates are unusually complex, let's use our template definitions
template<class T> using Matrix   = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::ColMajor>;
template<class T> using Colvec   = Eigen::Matrix<T, Eigen::Dynamic, 1, Eigen::ColMajor>;

template <class T> using ArrayX1D = Eigen::Array<T, Eigen::Dynamic, 1, Eigen::ColMajor>;
template <class T> using ArrayX3D = Eigen::Array<T, Eigen::Dynamic, 3, Eigen::RowMajor>;



// only EIGEN::DYNAMIC [ARRAY|MATRIX|VECTOR] are supported
// in other words Eigen::Matrix<T,S,S>  where S \in unsigned will not work, rather cast static allocation  into Dynamic (heap memory) structure

int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    {
        Colvec<float> V(8);                               // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "linalg.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "linalg.h5", "arma vec inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}, h5::stride{4,4}, h5::block{3,3}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }

    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
        std::cout << M << std::endl;
    }

    { // fixed/compile time  length arrays/matrices upto size 4 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR);
        ArrayX1D<double> x1d = ArrayX1D<double>::Zero(10);
        h5::write(fd, "/x1d ",  x1d);
        ArrayX3D<float> x3d = ArrayX3D<float>::Zero(10,3); // first dimension is fixed size
        h5::write(fd, "/x3d",  x3d);
    }
}

itpp.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <iostream>
#include <itpp/itbase.h>
#include <h5cpp/all>

using namespace std;
// 
template<class T> using Matrix = itpp::Mat<T>;
template<class T> using Colvec = itpp::Vec<T>;

int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    {
        Colvec<float> V(8);                               // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "linalg.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "linalg.h5", "arma vec inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}, h5::stride{3,5}, h5::block{2,4}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
    }
}

ublas.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <iostream>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/vector.hpp>
#include <h5cpp/all>

#include <boost/numeric/ublas/io.hpp>

using namespace std;
// 
template<class T> using Matrix = boost::numeric::ublas::matrix<T>;
template<class T> using Colvec = boost::numeric::ublas::vector<T>;


int main(){
    { // CREATE - WRITE
        Matrix<short> M(2,3);                                       // create a matrix
        h5::fd_t fd = h5::create("linalg.h5",H5F_ACC_TRUNC);    // and a file
        h5::ds_t ds = h5::create<short>(fd,"create then write"
                ,h5::current_dims{10,20}
                ,h5::max_dims{10,H5S_UNLIMITED}
                ,h5::chunk{2,3} | h5::fill_value<short>{3} |  h5::gzip{9}
        );
        h5::write( ds,  M, h5::offset{2,2}, h5::stride{1,3}  );
    }

    {
        Colvec<float> V(8);                               // create a vector
        // simple one shot write that computes current dimensions and saves matrix
        h5::write( "linalg.h5", "one shot create write",  V);
        // what if you want to position a matrix inside a higher dimension with some added complexity?  
        h5::write( "linalg.h5", "vector inside matrix",  V // object contains 'count' and rank being written
            ,h5::current_dims{40,50}  // control file_space directly where you want to place vector
            ,h5::offset{5,0}            // when no explicit current dimension given current dimension := offset .+ object_dim .* stride (hadamard product)  
            ,h5::count{1,1}, h5::stride{3,5}, h5::block{2,4}
            ,h5::max_dims{40,H5S_UNLIMITED}  // wouldn't it be nice to have unlimited dimension? if no explicit chunk is set, then the object dimension 
                             // is used as unit chunk
        );
    }
    { // CREATE - READ: we're reading back the dataset created in the very first step
      // note that data is only data, can be reshaped, cast to any format and content be modified through filtering 
        auto fd = h5::open("linalg.h5", H5F_ACC_RDWR,           // you can have multiple fd open with H5F_ACC_RDONLY, but single write
                h5::fclose_degree_strong | h5::sec2);          // and able to set various properties  
    }
    { // READ: 
        Matrix<short> M = h5::read<Matrix<short>>("linalg.h5","create then write"); // read entire dataset back with a single read
    }
}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
#
apps = arma eigen3 blaze itpp ublas dlib

CXXFLAGS =  -std=c++17 -Wno-deprecated  -Wall
LIBS =  -lhdf5 -lz -ldl -lm
INCLUDES=/usr/include/eigen3

test: $(apps)
    $(foreach app, $(apps), ./$(app) )

all: $(apps)

arma: arma.cpp
    $(CXX) $^ $(LIBS) $(CXXFLAGS)  -o $@

blaze: blaze.cpp
    $(CXX) $^ $(LIBS) $(CXXFLAGS)  -o $@

itpp: itpp.cpp
    $(CXX) $^ $(LIBS) $(CXXFLAGS)  -litpp -o $@

eigen3: eigen3.cpp
    $(CXX) $(CXXFLAGS)  -I/usr/include/eigen3 $^ $(LIBS)  -o $@

ublas: ublas.cpp
    $(CXX) $(CXXFLAGS) $^ $(LIBS)  -o $@

dlib: dlib.cpp
    $(CXX) $(CXXFLAGS)  $^ $(LIBS) -ldlib  -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
.PHONY: test

mpi

collective.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <mpi.h>
#include <h5cpp/all>
#include <chrono>
#include <vector>
#include <algorithm>

#pragma GCC diagnostic ignored "-Wnarrowing"
// armadillo
//./configure -DCMAKE_INSTALL_PREFIX=/usr/local -DDETECT_HDF5=OFF
int main(int argc, char** argv) {

    // usual boiler place
    int size, rank, name_len;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(NULL, NULL);
    MPI_Info info  = MPI_INFO_NULL;
    MPI_Comm comm  = MPI_COMM_WORLD;

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(processor_name, &name_len);

    int nrows = 10;
    { // CREATE - WRITE
        std::vector<double> v(nrows);
        std::fill(std::begin(v), std::end(v), rank + 2 );
        // open file with MPIIO
        auto fd = h5::create("collective.h5", H5F_ACC_TRUNC,
                h5::fcpl, 
                h5::mpiio({MPI_COMM_WORLD, info}) // pass the communicator and hints as usual
        );
        // single write request is expanded to chunked write at compile time, setting up 
        // required arguments. 
        // Passed property lists, chunk and size descriptors may be interchanged, and or omitted
        h5::write( fd, "dataset", v,
                h5::chunk{nrows,1}, h5::current_dims{nrows,size}, h5::offset{0,rank}, h5::count{nrows,1},
                h5::collective ); // this makes `collective IO` magic happen
        // RAII will close all descriptors when leaving code block
    }

    { // READ
        // open container with MPIIO enabled
        auto fd = h5::open("collective.h5", H5F_ACC_RDWR,  h5::mpiio({MPI_COMM_WORLD, info}));
        // this is a single shot read, all memory reservations are inside the `read` operator, convenient
        // but suboptimal for loops. 
        auto data = h5::read<std::vector<double>>(fd, "dataset", h5::offset{0,rank}, h5::count{nrows,1}, h5::collective);
        std::cout << "rank: " << rank <<" data: ";
        for( auto v : data) std::cout << v << " "; std::cout <<" ";

        // for high performance loops constructs please use:
        std::vector<double> buffer(nrows); // pre-allocate buffer, see documentation for variety of
        // linear algebra, the STL or raw memory objects

        // make sure to open dataset outside of the loop
        auto ds = h5::open(fd, "dataset");
        // this is as efficient as it gets
        h5::read(ds, buffer.data(),  h5::offset{0,rank}, h5::count{nrows,1}, h5::collective);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
}

independent.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <mpi.h>
#include <h5cpp/all>
#include <chrono>
#include <vector>
#include <algorithm>

#pragma GCC diagnostic ignored "-Wnarrowing"
// armadillo
//./configure -DCMAKE_INSTALL_PREFIX=/usr/local -DDETECT_HDF5=OFF
int main(int argc, char** argv) {

    // usual boiler place
    int size, rank, name_len;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(NULL, NULL);
    MPI_Info info  = MPI_INFO_NULL;
    MPI_Comm comm  = MPI_COMM_WORLD;

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(processor_name, &name_len);

    int nrows = 10;
    { // CREATE - WRITE
        std::vector<double> v(nrows);
        std::fill(std::begin(v), std::end(v), rank + 2 );
        // open file with MPIIO
        auto fd = h5::create("collective.h5", H5F_ACC_TRUNC,
                h5::fcpl, 
                h5::mpiio({MPI_COMM_WORLD, info}) // pass the communicator and hints as usual
        );
        // single write request is expanded to chunked write at compile time, setting up 
        // required arguments. 
        // Passed property lists, chunk and size descriptors may be interchanged, and or omitted
        h5::write( fd, "dataset", v,
                h5::chunk{nrows,1}, h5::current_dims{nrows,size}, h5::offset{0,rank}, h5::count{nrows,1},
                h5::independent ); // this makes `collective IO` magic happen
        // RAII will close all descriptors when leaving code block
    }

    { // READ
        // open container with MPIIO enabled
        auto fd = h5::open("collective.h5", H5F_ACC_RDWR,  h5::mpiio({MPI_COMM_WORLD, info}));
        // this is a single shot read, all memory reservations are inside the `read` operator, convenient
        // but suboptimal for loops. 
        auto data = h5::read<std::vector<double>>(fd, "dataset", h5::offset{0,rank}, h5::count{nrows,1}, h5::collective);
        std::cout << "rank: " << rank <<" data: ";
        for( auto v : data) std::cout << v << " "; std::cout <<" ";

        // for high performance loops constructs please use:
        std::vector<double> buffer(nrows); // pre-allocate buffer, see documentation for variety of
        // linear algebra, the STL or raw memory objects

        // make sure to open dataset outside of the loop
        auto ds = h5::open(fd, "dataset");
        // this is as efficient as it gets
        h5::read(ds, buffer.data(),  h5::offset{0,rank}, h5::count{nrows,1}, h5::independent);
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
}

throughput.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <mpi.h>
#include <h5cpp/all>
#include <chrono>
#include <vector>
#include <algorithm>

#pragma GCC diagnostic ignored "-Wnarrowing"
// armadillo
//./configure -DCMAKE_INSTALL_PREFIX=/usr/local -DDETECT_HDF5=OFF
int main(int argc, char** argv) {
    int size, rank, name_len;
    char processor_name[MPI_MAX_PROCESSOR_NAME];
    MPI_Init(NULL, NULL);
    MPI_Info info;
    MPI_Comm comm  = MPI_COMM_WORLD;

    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Get_processor_name(processor_name, &name_len);

    MPI_Info_create(&info);
    //MPI_Info_set(info, "fs_pvfs2_stripe_size", "6");
    //MPI_Info_set(info, "fs_pvfs2_stripe_width", "1024");


        int nchunk = 1024*1024;
    int nrows  = 10*nchunk; // 800MB per rank!!

    { // CREATE - WRITE
        std::vector<double> v(nrows);
        std::fill(std::begin(v), std::end(v), rank + 2 );
        size_t vsize =v.size() * sizeof(double);

        auto fd = h5::create("collective.h5", H5F_ACC_TRUNC,
                h5::fcpl, 
                h5::mpiio({MPI_COMM_WORLD, info}) // pass the communicator and hints as usual
        );
        h5::ds_t ds = h5::create<double>(fd,"dataset"
                ,h5::max_dims{size,nrows}, h5::chunk{1,nchunk} | h5::alloc_time_early );

        // make a copy of dxpl, so we can query if collective IO was successful
        h5::dxpl_t dxpl = h5::collective;

        // ACTUAL WRITE MEASUREMENT
        std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
            h5::write( ds, v, h5::current_dims{nrows,size},
                h5::offset{rank,0}, h5::count{1,nrows}, dxpl );
        std::chrono::system_clock::time_point stop = std::chrono::system_clock::now();
        double running_time = 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
        double MB_sec =  (vsize / 1e6) / running_time;

        // COLLECTING RESULTS: 
        std::vector<double> throughput(size);
        MPI_Gather(&MB_sec, 1, MPI_DOUBLE, throughput.data(), 1, MPI_DOUBLE, 0, comm);
        if( rank == 0)
            std::cout << "\nWRITE: " <<
                std::accumulate(throughput.begin(), throughput.end(), 0) <<" MB/s" <<std::endl;

        // query collective io state:
        //std::cout << dxpl <<"\n";
    }
    { // READ
        std::vector<double> v(nrows);
        size_t vsize =v.size() * sizeof(double);
        auto fd = h5::open("collective.h5", H5F_ACC_RDWR,  h5::mpiio({MPI_COMM_WORLD, info}));
        auto ds = h5::open(fd, "/dataset");

        // make a copy of dxpl, so we can query if collective IO was successful
        h5::dxpl_t dxpl = h5::collective;

        // ACTUAL WRITE MEASUREMENT
        std::chrono::system_clock::time_point start = std::chrono::system_clock::now();
            h5::read(ds, v.data(),  h5::offset{rank,0}, h5::count{1,nrows}, dxpl);
        std::chrono::system_clock::time_point stop = std::chrono::system_clock::now();

        double running_time = 1e-6 * std::chrono::duration_cast<std::chrono::microseconds>(stop - start).count();
        double MB_sec =  (vsize / 1e6) / running_time;

        // COLLECTING RESULTS: 
        std::vector<double> throughput(size);
        MPI_Gather(&MB_sec, 1, MPI_DOUBLE, throughput.data(), 1, MPI_DOUBLE, 0, comm);
        if( rank == 0)
            std::cout << "\nREAD: " <<
                std::accumulate(throughput.begin(), throughput.end(), 0) <<" MB/s" <<std::endl;
        // query collective io state:
        //std::cout << dxpl <<"\n";
    }

    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Info_free(&info);
    MPI_Finalize();
}

README.md

## MPI examples

multi-tu

main.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <cstdint>
// this TU only needs to know of h5::fd_t 
// no `sandwitching` between <h5cpp/all> and <h5cpp/io> needed
#include <h5cpp/all>



// of course the function prototype definitions needs to be here, preferably factored out:

void test_01( const h5::fd_t& fd );
void test_02( const h5::fd_t& fd );
void test_03( const h5::fd_t& fd );
void test_04( const h5::fd_t& fd );


int main(){


    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);

    test_01( fd );
    test_02( fd );
    test_03( fd );
    test_04( fd );
}

tu-01.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <cstdint>
#include "struct.h"
#include <h5cpp/core>
    // generated file must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "tu-01.h"
#include <h5cpp/io>
#include "utils.hpp"

#define CHUNK_SIZE 5
#define NROWS 4*CHUNK_SIZE
#define NCOLS 1*CHUNK_SIZE

void test_01( const h5::fd_t& fd ){// LINARG:=[armaidllo|eigen3|blaze|blitz|it++|dlib|ublas] supported

    arma::imat M(NROWS,NCOLS);              // define a linalg object
    h5::write(fd, "/linalg/armadillo",M);   // save it somewhere, partial and full read|write and append supported
}

void test_02( const h5::fd_t& fd ){// create a Matrix of STRUCT with chunked and GZIP compressed properties ready for partial read|write

    // upto 7 dimensions/extents are supported
    h5::create<sn::example::Record>(fd, "/orm/chunked_2D", 
        h5::current_dims{NROWS,NCOLS}, h5::chunk{1,CHUNK_SIZE} | h5::gzip{8} );
    h5::create<sn::typecheck::Record>(fd, "/orm/typecheck", h5::max_dims{H5S_UNLIMITED} );
}

tu-01.h

/* Copyright (c) 2018 vargaconsulting, Toronto,ON Canada
 *     Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_lfZuB
#define H5CPP_GUARD_lfZuB

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

namespace h5{
    //template specialization of sn::typecheck::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::typecheck::Record>(){

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_00' must be closed: H5Tclose(ct_00);
        return ct_00;
    };
}
H5CPP_REGISTER_STRUCT(sn::typecheck::Record);

#endif

tu-02.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <cstdint>
#include "struct.h"
#include <h5cpp/core>
    // generated file must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "tu-02.h"
    // multiple inclusion is on purpose to test 
    // include guards in generated file: tu_02.h 
    #include "tu-02.h"
#include <h5cpp/io>
#include "utils.hpp"

#define CHUNK_SIZE 5
#define NROWS 4*CHUNK_SIZE
#define NCOLS 1*CHUNK_SIZE


void test_03( const h5::fd_t& fd ){ // creates + writes entire object tree

    std::vector<sn::example::Record> vec = h5::utils::get_test_data<sn::example::Record>(20);
    h5::write(fd, "orm/partial/vector one_shot", vec );
    // dimensions and other properties specified additional argument 
    h5::write(fd, "orm/partial/vector custom_dims", vec,
        h5::max_dims{H5S_UNLIMITED}, h5::gzip{9} | h5::chunk{20} );
    // you don't need to remember order, compiler will do it for you without runtime penalty:
    h5::write(fd, "orm/partial/vector custom_dims different_order", vec,
        h5::chunk{20} | h5::gzip{9}, 
        h5::max_dims{H5S_UNLIMITED}, h5::offset{3} );
}

void test_04( const h5::fd_t& fd ){ // read entire dataset back

    using T = std::vector<sn::example::Record>;
    std::cerr<< "reading data: \n";
    auto data = h5::read<T>(fd,"/orm/partial/vector one_shot");
    std::cerr <<"reading back data previously written:\n\t";
    for( auto r:data )
        std::cerr << r.idx <<" ";

    std::cerr << std::endl;
}

tu-02.h

/* Copyright (c) 2018 vargaconsulting, Toronto,ON Canada
 *     Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_tERYg
#define H5CPP_GUARD_tERYg

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  H5TEST_STRUCT_01 
#define  H5TEST_STRUCT_01

/* typedef is allowed */
typedef unsigned long long int MyUInt;

namespace sn {
    namespace typecheck {
        struct Record { /*the types with direct mapping to HDF5*/
            char  _char; unsigned char _uchar; short _short; unsigned short _ushort; int _int; unsigned int _uint;
            long _long; unsigned long _ulong; long long int _llong; unsigned long long _ullong;
            float _float; double _double; long double _ldouble;
            bool _bool;
            // wide characters are not supported in HDF5
            // wchar_t _wchar; char16_t _wchar16; char32_t _wchar32;
        };
    }
    namespace other {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            MyUInt                     aa; // typedef type 
            double            field_02[3]; // const array mapped 
            typecheck::Record field_03[4]; //
        };
    }
    namespace example {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            float             field_02[7]; // const array mapped 
            sn::other::Record field_03[5]; // embedded Record
            sn::other::Record field_04[5]; // must be optimized out, same as previous
            other::Record  field_05[3][8]; // array of arrays 
        };
    }
    namespace not_supported_yet {
        // NON POD: not supported in phase 1
        // C++ Class -> PODstruct -> persistence[ HDF5 | ??? ] -> PODstruct -> C++ Class 
        struct Container {
            double                            idx; // 
            std::string                  field_05; // c++ object makes it non-POD
            std::vector<example::Record> field_02; // ditto
        };
    }
    /* BEGIN IGNORED STRUCT */
    // these structs are not referenced with h5::read|h5::write|h5::create operators
    // hence compiler should ignore them.
    struct IgnoredRecord {
        signed long int   idx;
        float        field_0n;
    };
    /* END IGNORED STRUCTS */
}
#endif

utils.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include "struct.h"
#include <h5cpp/H5misc.hpp>

namespace h5 { namespace utils {
    // template specialization 
    template <> inline  std::vector<sn::example::Record> get_test_data( size_t n ){
        std::vector<sn::example::Record> vec (n);
        for(int i=0; i<n; i++ )
            vec[i].idx = i;
        return vec;
    }
}}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
#
# you must include the path to LLVM version specific files  conveniently installed:
# -I/usr/local/h5cpp-llvm
CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm
INCLUDES = -I/usr/local/include -I/usr/include

test: app
    ./app

# compiler assisted introspection will scan TU translation unit, which is the 
# collection of files you're compiling into an object, generates desired output
# from topologically ordered dependency graph. 
# currently POD structs, arrays and integral types are supported, in arbitrary deep
# embedding
#
# with -Dmyfile.h specifies the generated output, which must be sandwiched between
# <h5cpp/core> and <h5cpp/io>

# invoke `h5cpp` compiler separately for any given TU translation unit
# to produce tu_0?.h header file which must be included exactly once in each given TU. 
tu-01.h: struct.h
    h5cpp  tu-01.cpp -- $(CXXFLAGS) $(INCLUDES) -Dtu-01.h
tu-02.h: struct.h
    h5cpp  tu-02.cpp -- $(CXXFLAGS) $(INCLUDES)  -Dtu-02.h

# for simplicity let's use explicit rules to compile TU-s into object files
tu-01.o : tu-01.cpp tu-01.h 
    $(CXX) $(INCLUDES) -o tu-01.o  $(CXXFLAGS) -c tu-01.cpp

tu-02.o : tu-02.cpp tu-02.h 
    $(CXX) $(INCLUDES) -o tu-02.o  $(CXXFLAGS) -c tu-02.cpp

main.o : main.cpp 
    $(CXX) -o main.o  $(CXXFLAGS) -c main.cpp

app: tu-01.o tu-02.o main.o 
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.h5  app

dist-clean: clean
    @$(RM) tu-01.h tu-02.h

.PHONY: test

optimized

optimized.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <h5cpp/all>


/* EXAMPLE:
 * to demonstrate how to factor out arguments for in-loop or lean + mean in-sub-routine data operations
 * using c+17 most operations are compiler time, and eliminated if not used. Arguments are stored on 
 * stack and have minimal size often none.
 * By not specifying optional arguments you give the compiler to eliminate entire branches compile time
 * resulting in highly optimized final binary: the way an expert wrote the code using HDF5 CAPI.  
 */

int main(){
    // have a file descriptor read
    h5::fd_t fd = h5::create("optimized.h5",H5F_ACC_TRUNC);

    // create or use existing linalg objects, the memory is reserved on heap and 
    // usually is a big chunk
    arma::mat M(10,1); M.zeros();
    // create or open the dataset withing HDF5 container, with the right properties set  
    h5::ds_t ds = h5::create<short>(fd,"huge dataset"
                ,h5::current_dims{10,100}           // the actual size of the dataspace created inside HDF5
                ,h5::max_dims{10,H5S_UNLIMITED}     // if it can grow: use `append` or H5CAPI calls 
                ,h5::chunk{10,10}                   // compression and partial IO requires chunk-ing, which is 
                                                    // reading data by small blocks at a time with internal caching mechanism
                                                    // for handling edges and frequent read | write to same region
                                                    //
                | h5::gzip{9}                       // compression comes at cost
                | h5::fill_value<short>{0} );


    // SUGGESTED: Notice that `count` is not specified, but created on 
    // that stack along `offset`. These operations have minimal impact if any. 
    for( hsize_t i=0; i < 4; i ++){
        h5::read( ds,  M, h5::offset{0,0} );
        M[0,0] = i;     // your science thing, using 80% of in-core available memory
        // this is where you swap out 
        h5::write( ds,  M, h5::offset{0,i} );
    }


    /* EXTREME:
     * h5::offset, h5::count, ... are space optimized (H5CPP_MAX_RANK + 1) * sizeof(hsize_t) small objects 
     * placed on the local stack (not on the heap) suitable for pass by value, or reference. In the following
     * section offset and count are factored out from the high performance loop to demonstrate how it can 
     * be done, but in most cases this is an **overkill** and not the suggested way. 
     * Use profiler to identify hot spots. See `profile` directory for examples.
     * */
    h5::offset offset{0,0};  // must have the proper rank: this is rank 2
    h5::count  count{10,1};  // describe the memory region of `M` matrix

    // ready for optimized loop:
    for( hsize_t i=10; i < 15; i ++){
        offset[1] = i;  // set new coordinates
        M[1,0] = i;     // do your science thing
        h5::write( ds,  M, offset, count );
    }

}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
apps = optimized
CXXFLAGS =  -std=c++17 -Wno-deprecated
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./$(apps)

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

optimized: optimized.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean

.PHONY: test

packet-table

packettable.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#define ARMA_DONT_USE_WRAPPER
#include <armadillo>
#include <Eigen/Dense> // must include Eigen before <h5cpp/core>

#include <cstdint>
#include "struct.h"
#include <h5cpp/core>
    // generated file must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "generated.h"
#include <h5cpp/io>
#include "utils.hpp"

template<class T> using Matrix   = Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;

int main(){

    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);

    // SCALAR: integral
    // The motivation behind this example to allow 2D frames be recorded into a stream
    // 3x5 is the frame or image size, with 2 planes. 
    try { // centrally used error handling
        std::vector<int> stream(83);
        std::iota(std::begin(stream), std::end(stream), 1);
        // the leading dimension is extended once chunk is full, chunk is filled in row major order
        // zero copy writes directly to chunk buffer then pushed through filter chain if specified
        // works up to H5CPP_MAX_RANK default to 7
        // last chunk if partial filled with h5::fill_value<T>( some_value )  
        h5::pt_t pt = h5::create<int>(fd, "stream of integral 01",
                 h5::max_dims{H5S_UNLIMITED,3,5}, h5::chunk{2,3,5} | h5::gzip{9} | h5::fill_value<int>(3) );
        for( auto record : stream )
            h5::append(pt, record);
        //auto M = h5::read<arma::mat>(fd,"stream of integral" );
    } catch ( const h5::error::any& e ){
        std::cerr << "ERROR:" << e.what();
    }

    try { // centrally used error handling
        std::vector<int> stream(83);
        std::iota(std::begin(stream), std::end(stream), 1);
        // the leading dimension is extended once chunk is full, chunk is filled in row major order
        // zero copy writes directly to chunk buffer then pushed through filter chain if specified
        // works up to H5CPP_MAX_RANK default to 7
        // last chunk if partial filled with h5::fill_value<T>( some_value )  
        h5::pt_t pt = h5::create<int>(fd, "stream of integral 02",
                                      h5::max_dims{H5S_UNLIMITED}, h5::chunk{6} | h5::gzip{9} | h5::fill_value<int>(3) );
        for( auto record : stream )
            h5::append(pt, record);
    } catch ( const h5::error::any& e ){
        std::cerr << "ERROR:" << e.what();
    }


    // SCALAR: pod 
    try { //
        std::vector<sn::example::Record> stream = h5::utils::get_test_data<sn::example::Record>(127);

        // implicit conversion from h5::ds_t to h5::pt_t makes it a breeze to create
        // packet_table from h5::open | h5::create calls,
        // The newly created h5::pt_t  stateful container caches the incoming data until
        // bucket filled. IO operations are at h5::chunk boundaries
        // or when resource is released. Last partial chunk handled as expected.
        //
        // compiler assisted introspection generates boilerplate, developer 
        // can focus on the idea, leaving boring details to machines 
        h5::pt_t pt = h5::create<sn::example::Record>(fd, "stream of struct",
                 h5::max_dims{H5S_UNLIMITED,7}, h5::chunk{4,7} | h5::gzip{9} );
        for( auto record : stream )
            h5::append(pt, record);
    } catch ( const h5::error::any& e ){
        std::cerr << "ERROR:" << e.what();
    }

    {   // packet table for a collection of matrices modelling a HD resolution of gray scale images
        size_t nrows = 2, ncols=256, nframes=100;
        h5::pt_t pt = h5::create<double>(fd, "stream of matrices",
                h5::max_dims{H5S_UNLIMITED,nrows,ncols}, h5::chunk{1,nrows,ncols} );
        Matrix<double> M(nrows,ncols);
        int k=0;
        for( int i=0; i<nrows; i++) for(int j=0; j<ncols; j++) M(i,j) = ++k;
        // actual code, you may insert arbitrary number of frames: nrows x ncols
        for( int i = 0; i < nframes; i++)
            h5::append( pt, M);
    }

    return 0;
}

packet_batches.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include <armadillo>
#include <chrono>
#include <vector>
#include <functional>
#include <h5cpp/all>



// factored out timer, note that this is not quite right as you're 
// measuring 'lamda' mechanism of the compiler, OTOH is an example
// how flexible H5CPP underlying descriptors are
void timer( std::function<void()> callback ){

    using tictoc = std::chrono::high_resolution_clock;
    using us = std::chrono::microseconds;
    auto duration = std::chrono::duration_values<us>::zero();
    auto start = tictoc::now();
        callback();
    auto stop = tictoc::now();
    duration += std::chrono::duration_cast<us>(stop - start);
    std::cout << duration.count() << " [us]" << std::endl;
}


int main() {

    h5::fd_t fd = h5::create("pt.h5", H5F_ACC_TRUNC);
    h5::pt_t pt = h5::create<double>(fd, "vectors",
                                    h5::max_dims{H5S_UNLIMITED, 128*1024},
                                    h5::chunk{1, 128*1024});
    {   // VARIANT 1: vector<objects<elements>> save element wise, no penalty as you write directly to buffer

        // I see what you're doing here, while semantically correct
        // please observe that std::vector<arma::colvec> is NOT RUGGED hence is a 
        // matrix with colvector bases, a common construct to 'pack' a set
        // of BLAS level 1 into a single level 2 or 3 operation
        // in any event lets have this 'your way' then below 
        std::vector<arma::colvec> v(8);  // a batch of 8 chunks
        for (size_t i = 0; i < v.size(); ++i)
            v[i] = arma::randu<arma::colvec>(128*1024) + i; // let's add 'i' to color rows

        // c++ is not a bore :) let's capture 'h5::pt_t pt' by reference
        timer( [&]() -> void {
            for( size_t i = 0; i < v.size(); i++)
                for (size_t j = 0; j < v[i].n_elem; ++j) h5::append(pt, v[i][j]);
        });
    }
    {   // VARIANT 2: set of non-rugged vector is a matrix with column vec bases
        arma::mat M(8,128*1024);  // a batch of 8 chunks
        for (size_t i = 0; i < M.n_rows; ++i)
            M(i,arma::span::all) = arma::randu<arma::rowvec>(128*1024)+i;

        timer( [&]() -> void {
            arma::rowvec v(128*1024); // reserve buffer
            for( size_t i = 0; i < M.n_rows; i++){
                h5::append(pt,
                // arma::subview_row<T> should be cast to arma::rowvec: zero copy 
                        static_cast<arma::rowvec>( M(i,arma::span::all) ));
            }
        });
    }
    {   // VARIANT 3: append/write 'chunk size' data block directly from raw memory buffer
        arma::mat M(8,128*1024);  // a batch of 8 chunks
        for (size_t i = 0; i < M.n_rows; ++i)
            M(i,arma::span::all) = arma::randu<arma::rowvec>(128*1024) + i;
        // this is an expensive operation, will not work with huge matrices
        arma::mat T = M.t();
        // data directly written to file from passed pointer, best performance
        timer( [&]() -> void {
            for (size_t i = 0; i < T.n_cols; ++i)
                h5::append(pt, &T(0,i) );
        });
    }
    /*  DOESN't WORK FOR NOW: in the next update will add functionality to 
     *  chunk/block input data directly from object's buffer. The code is there
     *  just not 'enabled' 
     *
    {   // VARIANT 3: set of non-rugged vector is a matrix with column vec bases
        // the buffer is 'v' variable, and h5::append knows that no copy is needed
        // instead it uses the object memory address directly instead of internal buffer

        // CAVEAT: the row/col order is not correctly handled as of 2019 spring, work in progress
        // to make 'transpose' efficient and seamless. Please check this with Julia/R/Matlab 
        // if reads it in the right order
        arma::mat v(8,128*1024);  // a batch of 8 chunks
        for (size_t i = 0; i < v.n_rows; ++i)
            v(i,arma::span::all) = arma::randu<arma::rowvec>(128*1024) + 1;

        // since we buffered data acquisition h5::append is reduced to a single op 
        timer( [&]() -> void {
            //h5::append(pt, v);
        });
    }
    */
    return 0;
}

generated.h

/* Copyright (c) 2018 vargaconsulting, Toronto,ON Canada
 *     Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_irGVX
#define H5CPP_GUARD_irGVX

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  H5TEST_STRUCT_01 
#define  H5TEST_STRUCT_01

/* typedef is allowed */
typedef unsigned long long int MyUInt;

namespace sn {
    namespace typecheck {
        struct Record { /*the types with direct mapping to HDF5*/
            char  _char; unsigned char _uchar; short _short; unsigned short _ushort; int _int; unsigned int _uint;
            long _long; unsigned long _ulong; long long int _llong; unsigned long long _ullong;
            float _float; double _double; long double _ldouble;
            bool _bool;
            // wide characters are not supported in HDF5
            // wchar_t _wchar; char16_t _wchar16; char32_t _wchar32;
        };
    }
    namespace other {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            MyUInt                     aa; // typedef type 
            double            field_02[3]; // const array mapped 
            typecheck::Record field_03[4]; //
        };
    }
    namespace example {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            float             field_02[7]; // const array mapped 
            sn::other::Record field_03[5]; // embedded Record
            sn::other::Record field_04[5]; // must be optimized out, same as previous
            other::Record  field_05[3][8]; // array of arrays 
        };
    }
    namespace not_supported_yet {
        // NON POD: not supported in phase 1
        // C++ Class -> PODstruct -> persistence[ HDF5 | ??? ] -> PODstruct -> C++ Class 
        struct Container {
            double                            idx; // 
            std::string                  field_05; // c++ object makes it non-POD
            std::vector<example::Record> field_02; // ditto
        };
    }
    /* BEGIN IGNORED STRUCT */
    // these structs are not referenced with h5::read|h5::write|h5::create operators
    // hence compiler should ignore them.
    struct IgnoredRecord {
        signed long int   idx;
        float        field_0n;
    };
    /* END IGNORED STRUCTS */
}
#endif

utils.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#include "struct.h"
#include <h5cpp/H5misc.hpp>

namespace h5 { namespace utils {
    // template specialization 
    template <> inline  std::vector<sn::example::Record> get_test_data( size_t n ){
        std::vector<sn::example::Record> vec (n);
        for(int i=0; i<n; i++ )
            vec[i].idx = i;
        return vec;
    }

}}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17  -I/usr/local/include -I/usr/include/eigen3 
LIBS =  -lhdf5 -lhdf5_hl  -lz -ldl -lm -lblas -llapack

test: packettable
    ./packettable
    h5dump -pH example.h5

# compiler assisted introspection will scan TU translation unit, which is the 
# collection of files you're compiling into an object, generates desired output
# from topologically ordered dependency graph. 
# currently POD structs, arrays and integral types are supported, in arbitrary deep
# embedding
#
# with -Dmyfile.h specifies the generated output, which must be sandwiched between
# <h5cpp/core> and <h5cpp/io>
generated.h: struct.h
    h5cpp  packettable.cpp -- $(CXXFLAGS)  -Dgenerated.h

packettable.o : packettable.cpp generated.h 
    $(CXX) -I/usr/include/eigen3 -o packettable.o  $(CXXFLAGS) -c packettable.cpp

packettable: packettable.o
    $(CXX) $^ $(LIBS) -o $@ 

packet_batches.o : packet_batches.cpp 
    $(CXX) -o packet_batches.o  $(CXXFLAGS) -c packet_batches.cpp

packet_batches: packet_batches.o
    $(CXX) $^ $(LIBS) -larmadillo -o $@ 


clean:
    @$(RM) *.o *.h5 $(apps)  packettable packet_batches

dist-clean: clean
    @$(RM) generated.h
.PHONY: test

raw_memory

raw.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <h5cpp/all>
#include <cstddef>
#define values 0,1,2,3,4,5,6,7,8,9

int main(){
    h5::fd_t fd = h5::create("raw.h5",H5F_ACC_TRUNC);

    float  af[] = {values};
    double ad[] = {values};
    double* ptr = static_cast<double*>( calloc(10,sizeof(double)) );

    { // dataset create: 
        auto ds_0 = h5::create<short>(fd,"/type/short with inline dcpl", 
                h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED},
                h5::create_path | h5::utf8, // optional lcpl with this default settings**
                h5::chunk{2,3} | h5::fill_value<short>{42} | h5::fletcher32 | h5::shuffle | h5::nbit | h5::gzip{9}, // optional dcpl
                h5::default_dapl ); // optional dapl
        //** lcpl controls how path (or hdf5 name: links) created, `h5::create_path` makes sure that sub paths are created 
        // h5::default_lcpl and h5::lcpl are predifined conveniently such that h5::defeult_lcpl ::= h5::create_path | h5::utf8

        h5::dcpl_t dcpl = h5::chunk{2,3} | h5::fill_value<short>{42} | h5::fletcher32 | h5::shuffle | h5::nbit | h5::gzip{9};
        // same as above, default values implicit, dcpl explicit
        auto ds_1 = h5::create<short>(fd,"/type/short explicit dcpl", h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED}, dcpl);
        // same as above, default values explicit
        auto ds_2 = h5::create<short>(fd,"/type/short default dcpl lcpl", h5::current_dims{10,20}, h5::max_dims{10,H5S_UNLIMITED},
                h5::default_lcpl, dcpl, h5::default_dapl);
        // if only max_dims specified, the current dims is set to max_dims or zero if the dimension is H5S_UNLIMITED
        // making it suitable storage for packet table 
        auto ds_3 = h5::create<short>(fd,"/type/short max_dims", h5::max_dims{10,H5S_UNLIMITED}, // [10 X 0]  
              h5::chunk{10,1} | h5::gzip{9} );
        // expandable dataset with compression and chunksize explicitly set
        h5::create<std::string>(fd,"/types/string with chunk and compression", h5::max_dims{H5S_UNLIMITED}, h5::chunk{10} | h5::gzip{9} );
    }
    {   // create + write from 1D memory location into a 2D file space 
        h5::write<double>(fd,"dataset", ad, h5::count{1,10});
    }

    {   // read back to memory location: 2D file space -> 1D mem space from specified offset
        h5::read<double>(fd,"dataset", ptr, h5::count{1,8}, h5::offset{0,2} );

        std::cout<<"valued read: ";
            for(int i=0; i<10; i++) std::cout<< ptr[i] <<" ";
        std::cout<<"\n";

       //if you want to change memory offset, manipulate the passed pointer
        h5::read<double>(fd,"dataset", ptr+4, h5::count{1,3} );
    }
    free( ptr );


}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
apps = raw
CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm

test: $(apps)
    @./raw
    h5dump -d dataset raw.h5

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

raw: raw.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
.PHONY: test

reference

reference.cpp

#include <armadillo>
#include <vector>
#include <h5cpp/all>

int main(){
    h5::fd_t fd = h5::create("ref.h5", H5F_ACC_TRUNC);
    {
        h5::ds_t ds = h5::create<float>(fd,"01",  
            h5::current_dims{10,20}, h5::chunk{2,2} | h5::fill_value<float>{1} );

        h5::reference_t ref = h5::reference(fd, "01", h5::offset{2,2}, h5::count{4,4});
        h5::write(fd, "single reference", ref);
        /* you can factor out `count` this way :  h5::count count{2,2};  */ 
        std::vector<h5::reference_t> idx {
            // The HDF5 CAPI reqires fd + dataset name, instead of hid_t to ds: wishy-washy 
            h5::reference(fd, "01", h5::offset{2,2}, h5::count{4,4}),
            h5::reference(fd, "01", h5::offset{4,8}, h5::count{1,1}),
            h5::reference(fd, "01", h5::offset{6,12}, h5::count{3,3}),
            h5::reference(fd, "01", h5::offset{8,16}, h5::count{2,1})
        };
        // datset shape can be controlled with dimensions, in this case is 2x2
        // and is not related to the selected regions!!! 
        // data type is H5R_DATASET_REGION when dataspace is provided, otherwise OBJECT
        h5::write(fd, "index", idx, h5::current_dims{2,2}, h5::max_dims{H5S_UNLIMITED, 2});
    }
    { // we going to update the regions referenced by the set of region-references 
      // stored in "index"
        h5::ds_t ds = h5::open(fd, "index");
        std::vector color(50, 9);
        // this is to read from selection
        for(auto& ref: h5::read<std::vector<h5::reference_t>>(ds))
            h5::exp::write(ds, ref, color.data());
    }

    { // we are reading back data from the regions, now they all must be 'color' value '9'
        h5::ds_t ds = h5::open(fd, "index");
        // this is to read from selection
        for(auto& ref: h5::read<std::vector<h5::reference_t>>(ds)){
            arma::fmat mat = h5::exp::read<arma::fmat>(ds, ref);
            std::cout << mat << "\n";
        }
    }
    { // for verification
        std::cout << h5::read<arma::fmat>(fd, "01") << "\n\n";

    }

}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2021 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm
INCLUDES = -I/usr/local/include -I/usr/include

test: reference
    ./reference
    h5dump ref.h5

reference.o : reference.cpp
    $(CXX) $(INCLUDES) -o reference.o  $(CXXFLAGS) -c reference.cpp

reference: reference.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.h5 $(apps)  reference

dist-clean: clean
.PHONY: test

sparse

arma.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <h5cpp/all>
#include <random>


int main(){

    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);

    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_int_distribution<> dist(40, 45);

    arma::SpMat<int>M(20,40);
    for(int i = 0, j=dist(gen); i<M.n_elem; i += j)
        M[i] = j, j = dist(gen);
    //std::cout << M;
    std::cout <<"size: " << M.n_rows << "x" << M.n_cols 
        <<" state: " << M.vec_state // 0 matrix, 1 colvec, 2 rowvec
        <<" non zeros: " <<  M.n_nonzero
        << " fill rate: " <<  (double)M.n_nonzero / (double)M.n_cols *  (double)M.n_rows   <<"\n";

    auto extra_attributes = std::make_tuple(
        "author", "Steven Varga",  "company","vargaconsulting",  "date", "2019-oct-17");


    /* single IO op to write a sparse matrix
     */
    h5::gr_t gr = h5::write(fd, "sparse-multi-file.plain", M );
    h5::awrite(gr,extra_attributes);

    /* compression and sub setting is supported, although the interpretation is delegated to software writer
     * generally it is suggested to use single IO calls instead of chunked access
     * */
    h5::write(fd, "sparse-multi-file.gzip", M, h5::chunk{254} | h5::gzip{9}, h5::offset{1024});
    arma::mat K(4,4);

    auto spmat = h5::read<arma::sp_mat>(gr);
    std::cout<<"is dense: " << h5::exp::linalg::is_dense<decltype(K)>::value <<"\n";
    std::cout<<"is continuous: " << h5::exp::is_contigious<decltype(K)>::value <<"\n";
    std::cout<<"rank: " << h5::exp::rank<int>::value << "\n";
    std::cout<<"rank: " << h5::exp::rank<std::vector<int>>::value << "\n";

    //std::cout << values <<"\n";
}

README.md

# Sparse Matrix[^1]
<div id="object" style="float: right">
    ![Object](../pix/sparse-csr.svg)
</div>

The fill rate of a matrix is a ration between non-zero and zero elements. If the latter significantly outweighs the former then we speak of Sparse Matrices. Depending on the sparsity pattern some storage format are more efficient than others. Nevertheless a sparse matrix is an object of multiple fields as opposed to a single contagious memory location with homogeneous type.

Netlib considers the following [sparse storage formats][109]:

|description                             | `h5::dapl_t`        |
|--------------------|:----------------------------------------|
|[Compressed Sparse Row][110]            | `h5::sparse::csr`   |
|[Compressed Sparse Column][111]         | `h5::sparse::csc`   |
|[Block Compressed Sparse Storage][112]  | `h5::sparse::bcrs`  |
|[Compressed Diagonal Storage][113]      | `h5::sparse::cds`   |
|[Jagged Diagonal Storage][114]          | `h5::sparse::jds`   |
|[Skyline Storage][115]                  | `h5::sparse::ss`    |




## Multi Dataset Storage Format



## Single Dataset Storage Format
TODO: write code and documentation


# Interop With Other Systems
## Python
[Alex Wolf discusses HDF5][1] and Sparse Matrix formats, and [h5py][11] nor [pytables][10] support sparse matrices.
### PyTables
has no direct support to save / load sparse matrices

```python
import scipy.sparse as sp_sparse
import tables

with tables.open_file(filename, 'r') as f:
    mat_group = f.get_node(f.root, 'matrix')
    data = getattr(mat_group, 'data').read()
    indices = getattr(mat_group, 'indices').read()
    indptr = getattr(mat_group, 'indptr').read()
    shape = getattr(mat_group, 'shape').read()
    matrix = sp_sparse.csc_matrix((data, indices, indptr), shape=shape)

H5PY


h5sparse


Julia

Object
SparseArrays uses Compressed Sparse Column format and the official JLD format can save and load sparse matrices. Less fortunate how the data sets are organized within the HDF5 container, instead the actual data is placed under _refs directory. The screen shot shows A,B sparse matrices saved in Julia, and a Pyhton h5sparse to compare. On the bright side the julia HDF5 package is feature full, it is possible loading sparse matrices to H5PY.

using JLD, SparseArrays

A = sprand(Float64, 10,20, 0.1)
B = sprand(Float64, 10,20, 0.1)
@save "interop.h5" "data-01/A" A "data-02/B" B 

R

Bio Informatics

Loompy

is an efficient file format for large omics datasets. Loom files contain a main matrix, optional additional layers, a variable number of row and column annotations, and sparse graph objects. Under the hood, Loom files are HDF5 and can be opened from many programming languages, including Python, R, C, C++, Java, MATLAB, Mathematica, and Julia.

10x Genomics

The top level of the file contains a single HDF5 group, called matrix, and metadata stored as HDF5 attributes. Within the matrix group are datasets containing the dimensions of the matrix, the matrix entries, as well as the features and cell-barcodes associated with the matrix rows and columns, respectively. format


Column Type Description
barcodes string Barcode sequences and their corresponding GEM wells (e.g. AAACGGGCAGCTCGAC-1)
data uint32 Nonzero UMI counts in column-major order
indices uint32 Zero-based row index of corresponding element in data
indptr uint32 Zero-based index into data / indices of the start of each column, i.e., the data corresponding to each barcode sequence
shape uint64 Tuple of (# rows, # columns) indicating the matrix dimensions
**Makefile**
```make linenums="1"
#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________
#
apps = arma

CXXFLAGS = -Wno-deprecated  -Wall
LIBS =  -lhdf5 -lz -ldl -lm

test: clean $(apps)
    @./arma

all: $(apps)

arma: arma.cpp
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
.PHONY: test
stl

vector.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */


/** @example vector.cpp
 * A description of the example file, causes the example file to show up in 
 * Examples */
#include <vector>
#include <string>
#include <algorithm>

#include <cstdint>
#include "struct.h"
#include <h5cpp/core>
    // compound type descriptor must be sandwiched between core and io 
    // to satisfy template dependencies in <h5cpp/io>  
    #include "generated.h"
#include <h5cpp/io>
#include "utils.hpp"

#define CHUNK_SIZE 5
#define NROWS 4*CHUNK_SIZE
#define NCOLS 1*CHUNK_SIZE

int main(){
    //RAII will close resource, noo need H5Fclose( any_longer ); 
    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);
    {
        std::vector<double> v(10, 1.0);
        h5::write(fd,"stl/vector/full.dat", v); // simplest example

        //An elaborate example to demonstrate how to use H5CPP when you know the details, but no time/budget
        //to code it. The performance must be on par with the best C implementation -- if not: shoot an email and I fix it
        h5::create<double>(fd,"stl/vector/partial.dat",
                // arguments can be written any order without loss of performance thanks to compile time parsing
                h5::current_dims{21,10},h5::max_dims{H5S_UNLIMITED,10}, h5::chunk{1,10} | h5::gzip{9} );

        // you have some memory region you liked to read/write from, and H5CPP doesn't know of your object + no time to
        // fiddle around you want it done:
        // SOLUTION: write/read from/to memory region, NOTE the type cast: h5::write<DOUBLE>( ... );
        h5::write(fd,"stl/vector/partial.dat",  v, h5::offset{2,3}, h5::count{2,5});
    }

    { // creates + writes entire POD STRUCT tree
        std::vector<sn::example::Record> vec = h5::utils::get_test_data<sn::example::Record>(20);
        h5::write(fd, "orm/partial/vector one_shot", vec );
        // dimensions and other properties specified additional argument 
        h5::write(fd, "orm/partial/vector custom_dims", vec,
            h5::max_dims{H5S_UNLIMITED}, h5::gzip{9} | h5::chunk{20} );
        // you don't need to remember order, compiler will do it for you without runtime penalty:
        h5::write(fd, "orm/partial/vector custom_dims different_order", vec,
            h5::chunk{20} | h5::gzip{9}, 
            h5::block{2}, h5::max_dims{H5S_UNLIMITED}, h5::stride{4}, h5::current_dims{100}, h5::offset{3} );
    }
    { // read entire dataset back
        using T = std::vector<sn::example::Record>;
        auto data = h5::read<T>(fd,"/orm/partial/vector one_shot");
        std::cerr <<"reading back data previously written:\n\t";
        for( auto r:data )
            std::cerr << r.idx <<" ";
        std::cerr << std::endl;
    }
}

generated.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#ifndef H5CPP_GUARD_iBJBS
#define H5CPP_GUARD_iBJBS

namespace h5{
    //template specialization of sn::example::Record to create HDF5 COMPOUND type
    template<> hid_t inline register_struct<sn::example::Record>(){
        hsize_t at_00_[] ={7};            hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
        hsize_t at_01_[] ={3};            hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);

        hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
        H5Tinsert(ct_00, "_char",   HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
        H5Tinsert(ct_00, "_uchar",  HOFFSET(sn::typecheck::Record,_uchar),H5T_NATIVE_UCHAR);
        H5Tinsert(ct_00, "_short",  HOFFSET(sn::typecheck::Record,_short),H5T_NATIVE_SHORT);
        H5Tinsert(ct_00, "_ushort", HOFFSET(sn::typecheck::Record,_ushort),H5T_NATIVE_USHORT);
        H5Tinsert(ct_00, "_int",    HOFFSET(sn::typecheck::Record,_int),H5T_NATIVE_INT);
        H5Tinsert(ct_00, "_uint",   HOFFSET(sn::typecheck::Record,_uint),H5T_NATIVE_UINT);
        H5Tinsert(ct_00, "_long",   HOFFSET(sn::typecheck::Record,_long),H5T_NATIVE_LONG);
        H5Tinsert(ct_00, "_ulong",  HOFFSET(sn::typecheck::Record,_ulong),H5T_NATIVE_ULONG);
        H5Tinsert(ct_00, "_llong",  HOFFSET(sn::typecheck::Record,_llong),H5T_NATIVE_LLONG);
        H5Tinsert(ct_00, "_ullong", HOFFSET(sn::typecheck::Record,_ullong),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_00, "_float",  HOFFSET(sn::typecheck::Record,_float),H5T_NATIVE_FLOAT);
        H5Tinsert(ct_00, "_double", HOFFSET(sn::typecheck::Record,_double),H5T_NATIVE_DOUBLE);
        H5Tinsert(ct_00, "_ldouble",    HOFFSET(sn::typecheck::Record,_ldouble),H5T_NATIVE_LDOUBLE);
        H5Tinsert(ct_00, "_bool",   HOFFSET(sn::typecheck::Record,_bool),H5T_NATIVE_HBOOL);
        hsize_t at_02_[] ={4};            hid_t at_02 = H5Tarray_create(ct_00,1,at_02_);

        hid_t ct_01 = H5Tcreate(H5T_COMPOUND, sizeof (sn::other::Record));
        H5Tinsert(ct_01, "idx", HOFFSET(sn::other::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "aa",  HOFFSET(sn::other::Record,aa),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_01, "field_02",    HOFFSET(sn::other::Record,field_02),at_01);
        H5Tinsert(ct_01, "field_03",    HOFFSET(sn::other::Record,field_03),at_02);
        hsize_t at_03_[] ={5};            hid_t at_03 = H5Tarray_create(ct_01,1,at_03_);
        hsize_t at_04_[] ={8};            hid_t at_04 = H5Tarray_create(ct_01,1,at_04_);
        hsize_t at_05_[] ={3};            hid_t at_05 = H5Tarray_create(at_04,1,at_05_);

        hid_t ct_02 = H5Tcreate(H5T_COMPOUND, sizeof (sn::example::Record));
        H5Tinsert(ct_02, "idx", HOFFSET(sn::example::Record,idx),H5T_NATIVE_ULLONG);
        H5Tinsert(ct_02, "field_02",    HOFFSET(sn::example::Record,field_02),at_00);
        H5Tinsert(ct_02, "field_03",    HOFFSET(sn::example::Record,field_03),at_03);
        H5Tinsert(ct_02, "field_04",    HOFFSET(sn::example::Record,field_04),at_03);
        H5Tinsert(ct_02, "field_05",    HOFFSET(sn::example::Record,field_05),at_05);

        //closing all hid_t allocations to prevent resource leakage
        H5Tclose(at_00); H5Tclose(at_01); H5Tclose(ct_00); H5Tclose(at_02); H5Tclose(ct_01);
        H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05); 

        //if not used with h5cpp framework, but as a standalone code generator then
        //the returned 'hid_t ct_02' must be closed: H5Tclose(ct_02);
        return ct_02;
    };
}
H5CPP_REGISTER_STRUCT(sn::example::Record);

#endif

struct.h

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */

#ifndef  H5TEST_STRUCT_01 
#define  H5TEST_STRUCT_01

#include <string>

/* typedef is allowed */
typedef unsigned long long int MyUInt;

namespace sn {
    namespace typecheck {
        struct Record { /*the types with direct mapping to HDF5*/
            char  _char; unsigned char _uchar; short _short; unsigned short _ushort; int _int; unsigned int _uint;
            long _long; unsigned long _ulong; long long int _llong; unsigned long long _ullong;
            float _float; double _double; long double _ldouble;
            bool _bool;
            // wide characters are not supported in HDF5
            // wchar_t _wchar; char16_t _wchar16; char32_t _wchar32;
        };
    }
    namespace other {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            MyUInt                     aa; // typedef type 
            double            field_02[3]; // const array mapped 
            typecheck::Record field_03[4]; //
        };
    }
    namespace example {
        struct Record {                    // POD struct with nested namespace
            MyUInt                    idx; // typedef type 
            float             field_02[7]; // const array mapped 
            sn::other::Record field_03[5]; // embedded Record
            sn::other::Record field_04[5]; // must be optimized out, same as previous
            other::Record  field_05[3][8]; // array of arrays 
        };
    }
    namespace not_supported_yet {
        // NON POD: not supported in phase 1
        // C++ Class -> PODstruct -> persistence[ HDF5 | ??? ] -> PODstruct -> C++ Class 
        struct Container {
            double                            idx; // 
            std::string                  field_05; // c++ object makes it non-POD
            std::vector<example::Record> field_02; // ditto
        };
    }
    /* BEGIN IGNORED STRUCT */
    // these structs are not referenced with h5::read|h5::write|h5::create operators
    // hence compiler should ignore them.
    struct IgnoredRecord {
        signed long int   idx;
        float        field_0n;
    };
    /* END IGNORED STRUCTS */
}
#endif

utils.hpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include "struct.h"
#include <h5cpp/H5misc.hpp>

namespace h5 { namespace utils {
    // template specialization 
    template <> inline  std::vector<sn::example::Record> get_test_data( size_t n ){
        std::vector<sn::example::Record> vec (n);
        for(int i=0; i<n; i++ )
            vec[i].idx = i;
        return vec;
    }

}}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17  -I/usr/local/include
LIBS =  -lhdf5  -lz -ldl -lm

apps = vector

test: vector
    ./vector
# STL container examples uses generated.h from `../compound` example, where h5cpp compiler 
# automatically produces this file, containing the type descriptors. 
#

generated.h: struct.h
    h5cpp  vector.cpp --  $(CXXFLAGS) -Dgenerated.h

vector.o : vector.cpp generated.h 
    $(CXX) $(INCLUDES) -o vector.o  $(CXXFLAGS) -c vector.cpp

vector: vector.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.h5 $(apps)

dist-clean: clean
    @$(RM) generated.h

.PHONY: test

string

string.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <vector>
#include <string>

#include <h5cpp/all>
// USE: h5dump example.h5 
// some recent version of hdfview fails with variable length strings on my platform
int main(){
    //RAII will close resource, noo need H5Fclose( any_longer ); 
    h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC);
    {
        std::vector<std::string> vec = h5::utils::get_test_data<std::string>(20);
        std::vector<int> vec2(10, 1); // h5::utils::get_test_data<std::string>(20);
        h5::write(fd, "/strings.txt", vec);
    }
    {
        std::cout << "\nsingle shot entire dataset:\n";
        auto vec = h5::read<std::vector<std::string>>(fd, "strings.txt");
        for( auto i : vec )
            std::cout << i <<"\n";
    }

    {
        std::cout << "\npartial IO start = 3, stride/every=2, count=5 :\n";
        std::cout << "will read in 5 strings, every second starting from third position:\n";
        auto vec = h5::read<std::vector<std::string>>(fd, "strings.txt", h5::offset{2}, h5::count{5}, h5::stride{2});
        for( auto i : vec )
            std::cout << i <<"\n";
    }
}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

CXXFLAGS =  -std=c++17  -I/usr/include
LIBS =  -lhdf5  -lz -ldl -lm

apps = string

test: string
    ./string


%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

string: string.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.h5 $(apps)

dist-clean: clean
.PHONY: test

transform

transform.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <h5cpp/all>


// see: https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetTypeConvCb
H5T_conv_ret_t handler( H5T_conv_except_t except_type, hid_t src_id, hid_t dst_id, void *src_buf, void *dst_buf, void *op_data){
    return H5T_CONV_HANDLED;
}

int main(){
    { // CREATE - WRITE
        h5::fd_t fd = h5::create("arma.h5",H5F_ACC_TRUNC);
        arma::mat M(4,7); M.ones();             // create a matrix

        h5::write(fd,"transform",  M,
        // data transform provides linear transformation from character descriptions
        // linear operators {*,/,+,-} allowed on x variable: 
                h5::data_transform{"2*x+5"} );
    }
    { // READ back, pass conversion handler -- if internal conversion fails, and a 
      // data transform expression  
        auto m = h5::read<arma::fmat>("arma.h5","transform", 
             h5::type_conv_cb{ {handler, nullptr} } | h5::data_transform{"x/2 + 1"});
        m.print();
    }

}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

apps = transform
CXXFLAGS =  -std=c++17 
LIBS =  -lhdf5 -lz -ldl -lm
test: $(apps)
    @./transform

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

transform: transform.o
    $(CXX) $^ $(LIBS) -o $@

clean:
    @$(RM) *.o *.prof *.h5 $(apps)

dist-clean: clean
.PHONY: test

utf

utf.cpp

/*
 * Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
 * Author: Varga, Steven <steven@vargaconsulting.ca>
 */
#include <armadillo>
#include <cstdint>
#include <h5cpp/all>
#include <cstddef>


void test_case(const h5::fd_t fd, const std::string& path ){
    arma::mat M = arma::ones(5,6);
    h5::ds_t ds = h5::write(fd, path, M);
    ds[path.data()] = path ;
}

int main(){
    {
        h5::fd_t fd = h5::create("こんにちは世界.h5", H5F_ACC_TRUNC, h5::default_fcpl,
                            h5::libver_bounds({H5F_LIBVER_V18, H5F_LIBVER_V18}) );
        std::string utf8 [] = {
            "hello world", "مرحبا بالعالم", "Բարեւ աշխարհ", "Здравей свят","Прывітанне Сусвет","မင်္ဂလာပါကမ္ဘာလောက","你好,世界",
            "Γειά σου Κόσμε","હેલ્લો વિશ્વ","Helló Világ","こんにちは世界","안녕 세상","سلام دنیا","העלא וועלט"};
        for( auto name : utf8 )
            test_case(fd, name);
    }
    {
        try {
            h5::fd_t fd = h5::open("こんにちは世界.h5", H5F_ACC_RDWR);
            auto m = h5::read<arma::mat>(fd, "مرحبا بالعالم" );
        } catch( const h5::error::any& err ){
            std::cerr << "ERROR:" << err.what();
        }
    }
}

Makefile

#  _________________________________________________________
#  Copyright (c) 2018-2020 Steven Varga, Toronto,ON Canada
#  Author: Varga, Steven <steven@vargaconsulting.ca>
#  _________________________________________________________

apps = utf
CXXFLAGS =  -std=c++17
LIBS =  -lhdf5  -lz -ldl -lm
INCLUDES = -I/usr/local/include -I/usr/include

test: $(apps)
    @./utf

%.o : $(SRC_DIR)/%.cpp 
    $(CXX)   -$(INCLUDES) -o $@  $(CPPFLAGS) $(CXXFLAGS) -c $^

all: $(apps)

utf: utf.o
    $(CXX) $^ $(LIBS) -o $@ 

clean:
    @$(RM) *.o *.prof *.h5 $(apps) generated.h

dist-clean: clean
.PHONY: test


  1. Material based on Netlib Documentation