@article{MaloneKJKM:ML2018, title = {Empirical Hardness of Finding Optimal Bayesian Network Structures: Algorithm Selection and Runtime Prediction}, author = {Brandon Malone and Kustaa Kangas and Matti J\"arvisalo and Mikko Koivisto and Petri Myllym\"aki}, journal = {Machine Learning}, volume = {18}, number = {3--4}, pages = {319--336}, year = {2018}, } Abstract: Various algorithms have been proposed for finding a Bayesian network structure that is guaranteed to maximize a given scoring function. As the state-of-the-art algorithms rely on adaptive search strategies, such as branch-and-bound and integer linear programming techniques, the time requirements of the algorithms are not well characterized by simple functions of the instance size. Furthermore, no single algorithm dominates the others in speed. Given a problem instance, it is thus a priori unclear which algorithm will perform best and how fast it will solve the instance. We show that for a given algorithm the hardness of a problem instance can be efficiently predicted based on a collection of non-trivial features which go beyond the basic parameters of instance size. Specifically, we train and test a statistical model on empirical data, based on the largest evaluation of state-of-the-art exact algorithms to date. We demonstrate that we can predict the running times to a reasonable degree of accuracy, and effectively select algorithms that perform well in terms of running times on a particular instance. Moreover, we also show how the results can be utilized in building an algorithm portfolio that combines several individual algorithms in an almost optimal manner.