@INPROCEEDINGS{cluster2011-edo,
    author = {Yuan Tian and Scott Klasky and Hasan Abbasi and Jay Lofstead and Ray Grout and Norbert Podhorszki and Qing Liu and Yandong Wang and Weikuan Yu},
    title = {EDO: Improving Read Performance for Scientific Applications Through Elastic Data Organization},
    booktitle = {In Proceedings of IEEE Cluster 2011},
    year = {2011},
    abstract = {Large scale scientific applications are often bottlenecked due to the writing of checkpoint-restart data. Much work has been focused on improving their write performance. With the mounting needs of scientific discovery from these datasets, it is also important to provide good read performance for many common access patterns, which requires effective data organization. To address this issue, we introduce Elastic Data Organization (EDO), which can transparently enable different data organization strategies for scientific applications. Through its flexible data ordering algorithms, EDO harmonizes different access patterns with the underlying file system. Two levels of data ordering are introduced in EDO. One works at the level of data groups (a.k.a process groups). It uses Hilbert Space Filling Curves (SFC) to balance the distribution of data groups across storage targets. Another governs the ordering of data elements within a data group. It divides a data group into subchunks and strikes a good balance between the size of subchunks and the number of seek operations. Our experimental results demonstrate that EDO is able to achieve balanced data distribution across all dimensions and improve the read performance of multidimensional datasets in scientific applications.}
}
