@INPROCEEDINGS{7965188,

author={A. Champsaur and J. Lofstead and J. Dayal and M. Wolf and G. Eisenhauer and P. Widener and A. Gavrilovska},

booktitle={2017 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)},

title={SmartBlock: An Approach to Standardizing In Situ Workflow Components},

year={2017},

volume={},

number={},

pages={1301-1308},

abstract={Multi-step scientific workflows have become prominent and powerful tools of data-driven scientific discovery. Run-time analytic techniques are now commonly used to mitigate the performance effects of using parallel file systems as staging areas during workflow execution. However, workflow construction and deployment for extreme-scale computing is still largely an ad hoc process with uneven support from existing tools. In this paper, we present SMARTBLOCK, an approach to designing generic, reusable components for end-to-end construction of workflows. Specifically, we demonstrate that a small set of SMARTBLOCK generic components can be reused to build a diverse set of workflows, using examples based on actual analytic processes with three well-known scientific codes. Our evaluation shows promising scaling properties as well as negligible overheads for using a modular approach over a custom, "all-in-one" solution. As extreme-scale systems incorporate data analytics on simulation data as it is generated at rates that far outstrip available I/O bandwidth, tools such as SMARTBLOCK will become increasingly valuable for defining and deploying flexible, efficient workflows.},

keywords={data analysis;digital simulation;parallel processing;scientific information systems;IO bandwidth;SmartBlock;analytic processes;data analytics;data-driven scientific discovery;end-to-end workflows construction;extreme-scale computing;extreme-scale systems;in situ workflow components;modular approach;multistep scientific workflows;parallel file systems;reusable components;run-time analytic techniques;simulation data;workflow execution;Analytical models;Complexity theory;Data analysis;Data models;Electronic mail;Engines;Tools;hpc;in situ;pipeline;scientific workflows},

doi={10.1109/IPDPSW.2017.149},

ISSN={},

month={May},
}