001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.random;
019
020 import java.io.IOException;
021 import java.io.File;
022 import java.net.URL;
023 import java.util.List;
024
025 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
026 import org.apache.commons.math.stat.descriptive.SummaryStatistics;
027
028 /**
029 * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html">
030 * empirical probability distribution</a> -- a probability distribution derived
031 * from observed data without making any assumptions about the functional form
032 * of the population distribution that the data come from.<p>
033 * Implementations of this interface maintain data structures, called
034 * <i>distribution digests</i>, that describe empirical distributions and
035 * support the following operations: <ul>
036 * <li>loading the distribution from a file of observed data values</li>
037 * <li>dividing the input data into "bin ranges" and reporting bin frequency
038 * counts (data for histogram)</li>
039 * <li>reporting univariate statistics describing the full set of data values
040 * as well as the observations within each bin</li>
041 * <li>generating random values from the distribution</li>
042 * </ul>
043 * Applications can use <code>EmpiricalDistribution</code> implementations to
044 * build grouped frequency histograms representing the input data or to
045 * generate random values "like" those in the input file -- i.e., the values
046 * generated will follow the distribution of the values in the file.</p>
047 *
048 * @version $Revision: 817128 $ $Date: 2009-09-20 21:30:53 -0400 (Sun, 20 Sep 2009) $
049 */
050 public interface EmpiricalDistribution {
051
052 /**
053 * Computes the empirical distribution from the provided
054 * array of numbers.
055 *
056 * @param dataArray the data array
057 */
058 void load(double[] dataArray);
059
060 /**
061 * Computes the empirical distribution from the input file.
062 *
063 * @param file the input file
064 * @throws IOException if an IO error occurs
065 */
066 void load(File file) throws IOException;
067
068 /**
069 * Computes the empirical distribution using data read from a URL.
070 *
071 * @param url url of the input file
072 * @throws IOException if an IO error occurs
073 */
074 void load(URL url) throws IOException;
075
076 /**
077 * Generates a random value from this distribution.
078 * <strong>Preconditions:</strong><ul>
079 * <li>the distribution must be loaded before invoking this method</li></ul>
080 * @return the random value.
081 *
082 * @throws IllegalStateException if the distribution has not been loaded
083 */
084 double getNextValue() throws IllegalStateException;
085
086
087 /**
088 * Returns a
089 * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary}
090 * describing this distribution.
091 * <strong>Preconditions:</strong><ul>
092 * <li>the distribution must be loaded before invoking this method</li>
093 * </ul>
094 *
095 * @return the sample statistics
096 * @throws IllegalStateException if the distribution has not been loaded
097 */
098 StatisticalSummary getSampleStats() throws IllegalStateException;
099
100 /**
101 * Property indicating whether or not the distribution has been loaded.
102 *
103 * @return true if the distribution has been loaded
104 */
105 boolean isLoaded();
106
107 /**
108 * Returns the number of bins.
109 *
110 * @return the number of bins
111 */
112 int getBinCount();
113
114 /**
115 * Returns a list of
116 * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics}
117 * containing statistics describing the values in each of the bins. The
118 * List is indexed on the bin number.
119 *
120 * @return List of bin statistics
121 */
122 List<SummaryStatistics> getBinStats();
123
124 /**
125 * Returns the array of upper bounds for the bins. Bins are: <br/>
126 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
127 * (upperBounds[binCount-2], upperBounds[binCount-1] = max].
128 *
129 * @return array of bin upper bounds
130 */
131 double[] getUpperBounds();
132
133 }