001 /*
002 * Java Genetic Algorithm Library (jenetics-1.5.0).
003 * Copyright (c) 2007-2013 Franz Wilhelmstötter
004 *
005 * Licensed under the Apache License, Version 2.0 (the "License");
006 * you may not use this file except in compliance with the License.
007 * You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 *
017 * Author:
018 * Franz Wilhelmstötter (franz.wilhelmstoetter@gmx.at)
019 */
020 package org.jenetics.stat;
021
022 import static java.lang.Math.max;
023 import static java.lang.Math.round;
024 import static java.lang.String.format;
025 import static java.util.Objects.requireNonNull;
026 import static org.jenetics.util.arrays.forEach;
027 import static org.jenetics.util.functions.DoubleToFloat64;
028 import static org.jenetics.util.functions.LongToInteger64;
029 import static org.jenetics.util.math.statistics.sum;
030 import static org.jenetics.util.object.NonNull;
031 import static org.jenetics.util.object.eq;
032 import static org.jenetics.util.object.hashCodeOf;
033
034 import java.io.IOException;
035 import java.util.Arrays;
036 import java.util.Comparator;
037
038 import org.jscience.mathematics.number.Float64;
039 import org.jscience.mathematics.number.Integer64;
040
041 import org.jenetics.util.Function;
042 import org.jenetics.util.MappedAccumulator;
043 import org.jenetics.util.arrays;
044
045 /**
046 * To create an <i>Histogram Accumulator</i> you have to define the <i>class
047 * border</i> which define the histogram classes. A value is part of the
048 * <i>i</i><sup>th</sup> histogram array element:
049 * <p>
050 * <img
051 * src="doc-files/histogram-class.gif"
052 * alt="i=\left\{\begin{matrix} 0 & when & v < c_0 \\
053 * len(c) & when & v \geq c_{len(c)-1} \\
054 * j & when & c_j< v \leq c_{j-1} \\ \end{matrix}\right."
055 * />
056 * </p>
057 *
058 * Example:
059 * <pre>
060 * Separators: 0 1 2 3 4 5 6 7 8 9
061 * -------+----+----+----+----+----+----+----+----+----+------
062 * Frequencies: 20 | 12 | 14 | 17 | 12 | 11 | 13 | 11 | 10 | 19 | 18
063 * -------+----+----+----+----+----+----+----+----+----+------
064 * Histogram index: 0 1 2 3 4 5 6 7 8 9 10
065 * </pre>
066 * <p/>
067 * <strong>Note that this implementation is not synchronized.</strong> If
068 * multiple threads access this object concurrently, and at least one of the
069 * threads modifies it, it must be synchronized externally.
070 *
071 * @author <a href="mailto:franz.wilhelmstoetter@gmx.at">Franz Wilhelmstötter</a>
072 * @since 1.0
073 * @version 1.0 — <em>$Date: 2013-12-08 $</em>
074 */
075 public class Histogram<C> extends MappedAccumulator<C> {
076
077 private final C[] _separators;
078 private final Comparator<C> _comparator;
079 private final long[] _histogram;
080
081 /**
082 * Create a new Histogram with the given class separators. The number of
083 * classes is {@code separators.length + 1}. A valid histogram consists of
084 * at least two classes (with one separator).
085 *
086 * @see #valueOf(Comparable...)
087 *
088 * @param comparator the comparator for the separators.
089 * @param separators the class separators.
090 * @throws NullPointerException if the classes of one of its elements
091 * or the {@code comparator} is {@code null}.
092 * @throws IllegalArgumentException if the given separators array is empty.
093 */
094 @SafeVarargs
095 public Histogram(final Comparator<C> comparator, final C... separators) {
096 _separators = check(separators);
097 _comparator = requireNonNull(comparator, "Comparator");
098 _histogram = new long[separators.length + 1];
099
100 Arrays.sort(_separators, _comparator);
101 Arrays.fill(_histogram, 0L);
102 }
103
104 @SafeVarargs
105 private Histogram(
106 final long[] histogram,
107 final Comparator<C> comparator,
108 final C... separators
109 ) {
110 _histogram = histogram;
111 _comparator = comparator;
112 _separators = separators;
113 }
114
115 @SuppressWarnings("unchecked")
116 private static <C> C[] check(final C... classes) {
117 forEach(classes, NonNull);
118 if (classes.length == 0) {
119 throw new IllegalArgumentException("Given classes array is empty.");
120 }
121
122 return classes;
123 }
124
125 @Override
126 public void accumulate(final C value) {
127 ++_histogram[index(value)];
128 ++_samples;
129 }
130
131 /**
132 * Do binary search for the index to use.
133 *
134 * @param value the value to search.
135 * @return the histogram index.
136 */
137 final int index(final C value) {
138 int low = 0;
139 int high = _separators.length - 1;
140
141 while (low <= high) {
142 if (_comparator.compare(value, _separators[low]) < 0) {
143 return low;
144 }
145 if (_comparator.compare(value, _separators[high]) >= 0) {
146 return high + 1;
147 }
148
149 final int mid = (low + high) >>> 1;
150 if (_comparator.compare(value, _separators[mid]) < 0) {
151 high = mid;
152 } else if (_comparator.compare(value, _separators[mid]) >= 0) {
153 low = mid + 1;
154 }
155 }
156
157 assert (false): "This line will never be reached.";
158 return -1;
159 }
160
161 /**
162 * Add the given {@code histogram} to this in a newly created one.
163 *
164 * @param histogram the histogram to add.
165 * @return a new histogram with the added values of this and the given one.
166 * @throws IllegalArgumentException if the {@link #length()} and the
167 * separators of {@code this} and the given {@code histogram} are
168 * not the same.
169 * @throws NullPointerException if the given {@code histogram} is {@code null}.
170 */
171 public Histogram<C> plus(final Histogram<C> histogram) {
172 if (!_comparator.equals(histogram._comparator)) {
173 throw new IllegalArgumentException(
174 "The histogram comparators are not equals."
175 );
176 }
177 if (!Arrays.equals(_separators, histogram._separators)) {
178 throw new IllegalArgumentException(
179 "The histogram separators are not equals."
180 );
181 }
182
183 final long[] data = new long[_histogram.length];
184 for (int i = 0; i < data.length; ++i) {
185 data[i] = _histogram[i] + histogram._histogram[i];
186 }
187
188 return new Histogram<>(data, _comparator, _separators);
189 }
190
191 /**
192 * Return the comparator used for class search.
193 *
194 * @return the comparator.
195 */
196 public Comparator<C> getComparator() {
197 return _comparator;
198 }
199
200 /**
201 * Return a copy of the class separators.
202 *
203 * @return the class separators.
204 */
205 public C[] getSeparators() {
206 return _separators.clone();
207 }
208
209 /**
210 * Copy the histogram into the given array. If the array is big enough
211 * the same array is returned, otherwise a new array is created and
212 * returned. The length of the histogram array is the number of separators
213 * plus one ({@code getSeparators().length + 1}).
214 *
215 * @param histogram array to copy the histogram.
216 * @return the histogram array.
217 * @throws NullPointerException if the given array is {@code null}.
218 */
219 public long[] getHistogram(final long[] histogram) {
220 requireNonNull(histogram);
221
222 long[] hist = histogram;
223 if (histogram.length >= _histogram.length) {
224 System.arraycopy(_histogram, 0, hist, 0, _histogram.length);
225 } else {
226 hist = _histogram.clone();
227 }
228
229 return hist;
230 }
231
232 /**
233 * Return a copy of the current histogram.
234 *
235 * @return a copy of the current histogram.
236 */
237 public long[] getHistogram() {
238 return getHistogram(new long[_histogram.length]);
239 }
240
241 /**
242 * Return the number of classes of this histogram.
243 *
244 * @return the number of classes of this histogram.
245 */
246 public int length() {
247 return _histogram.length;
248 }
249
250 /**
251 * Return the <i>histogram</i> as probability array.
252 *
253 * @return the class probabilities.
254 */
255 public double[] getProbabilities() {
256 final double[] probabilities = new double[_histogram.length];
257
258 assert (sum(_histogram) == _samples);
259 for (int i = 0; i < probabilities.length; ++i) {
260 probabilities[i] = (double)_histogram[i]/(double)_samples;
261 }
262
263 return probabilities;
264 }
265
266 /**
267 * Calculate the χ2 value of the current histogram for the assumed
268 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
269 * Cumulative density function</a> {@code cdf}.
270 *
271 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
272 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
273 *
274 * @param cdf the assumed Probability density function.
275 * @param min the lower limit of the CDF domain. A {@code null} value means
276 * an open interval.
277 * @param max the upper limit of the CDF domain. A {@code null} value means
278 * an open interval.
279 * @return the χ2 value of the current histogram.
280 * @throws NullPointerException if {@code cdf} is {@code null}.
281 */
282 public double χ2(final Function<C, Float64> cdf, final C min, final C max) {
283 double χ2 = 0;
284 for (int j = 0; j < _histogram.length; ++j) {
285 final long n0j = n0(j, cdf, min, max);
286 χ2 += ((_histogram[j] - n0j)*(_histogram[j] - n0j))/(double)n0j;
287 }
288 return χ2;
289 }
290
291 /**
292 * Calculate the χ2 value of the current histogram for the assumed
293 * <a href="http://en.wikipedia.org/wiki/Cumulative_distribution_function">
294 * Cumulative density function</a> {@code cdf}.
295 *
296 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_test">χ2-test</a>
297 * @see <a href="http://en.wikipedia.org/wiki/Chi-square_distribution">χ2-distribution</a>
298 *
299 * @param cdf the assumed Probability density function.
300 * @return the χ2 value of the current histogram.
301 * @throws NullPointerException if {@code cdf} is {@code null}.
302 */
303 public double χ2(final Function<C, Float64> cdf) {
304 return χ2(cdf, null, null);
305 }
306
307 private long n0(final int j, final Function<C, Float64> cdf, final C min, final C max) {
308 Float64 p0j = Float64.ZERO;
309 if (j == 0) {
310 p0j = cdf.apply(_separators[0]);
311 if (min != null) {
312 p0j = p0j.minus(cdf.apply(min));
313 }
314 } else if (j == _histogram.length - 1) {
315 if (max != null) {
316 p0j = cdf.apply(max).minus(cdf.apply(_separators[_separators.length - 1]));
317 } else {
318 p0j = Float64.ONE.minus(cdf.apply(_separators[_separators.length - 1]));
319 }
320 } else {
321 p0j = cdf.apply(_separators[j]).minus(cdf.apply(_separators[j - 1]));
322 }
323
324 return max(round(p0j.doubleValue()*_samples), 1L);
325 }
326
327 /**
328 * @see #χ2(Function)
329 */
330 public double chisqr(final Function<C, Float64> cdf) {
331 return χ2(cdf);
332 }
333
334 /**
335 * @see #χ2(Function, Object, Object)
336 */
337 public double chisqr(final Function<C, Float64> cdf, final C min, final C max) {
338 return χ2(cdf, min, max);
339 }
340
341 @Override
342 public int hashCode() {
343 return hashCodeOf(getClass()).
344 and(super.hashCode()).
345 and(_separators).
346 and(_histogram).value();
347 }
348
349 @Override
350 public boolean equals(final Object obj) {
351 if (obj == this) {
352 return true;
353 }
354 if (obj == null || getClass() != obj.getClass()) {
355 return false;
356 }
357
358 final Histogram<?> histogram = (Histogram<?>)obj;
359 return eq(_separators, histogram._separators) &&
360 eq(_histogram, histogram._histogram) &&
361 super.equals(obj);
362 }
363
364 @Override
365 public String toString() {
366 return Arrays.toString(_separators) + "\n" + Arrays.toString(getHistogram()) +
367 "\nSamples: " + _samples;
368 }
369
370 @Override
371 public Histogram<C> clone() {
372 return (Histogram<C>)super.clone();
373 }
374
375
376 /**
377 * Create a new Histogram with the given class separators. The classes are
378 * sorted by its natural order.
379 *
380 * @param separators the class separators.
381 * @return a new Histogram.
382 * @throws NullPointerException if the {@code separators} are {@code null}.
383 * @throws IllegalArgumentException if {@code separators.length == 0}.
384 */
385 @SuppressWarnings("unchecked")
386 public static <C extends Comparable<? super C>> Histogram<C> valueOf(
387 final C... separators
388 ) {
389 return new Histogram<C>(COMPARATOR, separators);
390 }
391
392 @SuppressWarnings({"rawtypes", "unchecked"})
393 private static final Comparator COMPARATOR = new Comparator() {
394 @Override
395 public int compare(final Object o1, final Object o2) {
396 return ((Comparable)o1).compareTo(o2);
397 }
398 };
399
400 /**
401 * Return a <i>histogram</i> for {@link Float64} values. The <i>histogram</i>
402 * array of the returned {@link Histogram} will look like this:
403 *
404 * <pre>
405 * min max
406 * +----+----+----+----+ ~ +----+
407 * | 1 | 2 | 3 | 4 | | nc |
408 * +----+----+----+----+ ~ +----+
409 * </pre>
410 *
411 * The range of all classes will be equal: {@code (max - min)/nclasses}.
412 *
413 * @param min the minimum range value of the returned histogram.
414 * @param max the maximum range value of the returned histogram.
415 * @param nclasses the number of classes of the returned histogram. The
416 * number of separators will be {@code nclasses - 1}.
417 * @return a new <i>histogram</i> for {@link Float64} values.
418 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
419 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
420 * {@code nclasses < 2}.
421 */
422 public static Histogram<Float64> valueOf(
423 final Float64 min,
424 final Float64 max,
425 final int nclasses
426 ) {
427 return valueOf(arrays.map(
428 toSeparators(min.doubleValue(), max.doubleValue(), nclasses),
429 new Float64[nclasses - 1],
430 DoubleToFloat64
431 ));
432 }
433
434 /**
435 * @see #valueOf(Float64, Float64, int)
436 */
437 public static Histogram<Double> valueOf(
438 final Double min,
439 final Double max,
440 final int nclasses
441 ) {
442 return valueOf(toSeparators(min, max, nclasses));
443 }
444
445 private static Double[] toSeparators(
446 final Double min,
447 final Double max,
448 final int nclasses
449 ) {
450 check(min, max, nclasses);
451
452 final double stride = (max - min)/nclasses;
453 final Double[] separators = new Double[nclasses - 1];
454 for (int i = 0; i < separators.length; ++i) {
455 separators[i] = min + stride*(i + 1);
456 }
457
458 return separators;
459 }
460
461 /**
462 * Return a <i>histogram</i> for {@link Integer64} values. The <i>histogram</i>
463 * array of the returned {@link Histogram} will look like this:
464 *
465 * <pre>
466 * min max
467 * +----+----+----+----+ ~ +----+
468 * | 1 | 2 | 3 | 4 | | nc |
469 * +----+----+----+----+ ~ +----+
470 * </pre>
471 *
472 * The range of all classes are more or less the same. But this is not
473 * always possible due to integer rounding issues. Calling this method with
474 * {@code min = 13} and {@code max = 99} will generate the following class
475 * separators for the given number of classes:
476 * <pre>
477 * nclasses = 2: [56]
478 * nclasses = 3: [41, 70]
479 * nclasses = 4: [34, 55, 77]
480 * nclasses = 5: [30, 47, 64, 81]
481 * nclasses = 6: [27, 41, 55, 69, 84]
482 * nclasses = 7: [25, 37, 49, 61, 73, 86]
483 * nclasses = 8: [23, 33, 44, 55, 66, 77, 88]
484 * nclasses = 9: [22, 31, 40, 49, 59, 69, 79, 89]
485 * </pre>
486 *
487 * @param min the minimum range value of the returned histogram.
488 * @param max the maximum range value of the returned histogram.
489 * @param nclasses the number of classes of the returned histogram. The
490 * number of separators will be {@code nclasses - 1}.
491 * @return a new <i>histogram</i> for {@link Integer64} values.
492 * @throws NullPointerException if {@code min} or {@code max} is {@code null}.
493 * @throws IllegalArgumentException if {@code min.compareTo(max) >= 0} or
494 * {@code nclasses < 2}.
495 */
496 public static Histogram<Integer64> valueOf(
497 final Integer64 min,
498 final Integer64 max,
499 final int nclasses
500 ) {
501 return valueOf(arrays.map(
502 toSeparators(min.longValue(), max.longValue(), nclasses),
503 new Integer64[0],
504 LongToInteger64
505 ));
506 }
507
508 /**
509 * @see #valueOf(Integer64, Integer64, int)
510 */
511 public static Histogram<Long> valueOf(
512 final Long min,
513 final Long max,
514 final int nclasses
515 ) {
516 return valueOf(toSeparators(min, max, nclasses));
517 }
518
519 private static Long[] toSeparators(
520 final Long min,
521 final Long max,
522 final int nclasses
523 ) {
524 check(min, max, nclasses);
525
526 final int size = (int)(max - min);
527 final int pts = Math.min(size, nclasses);
528 final int bulk = size/pts;
529 final int rest = size%pts;
530 assert ((bulk*pts + rest) == size);
531
532 final Long[] separators = new Long[pts - 1];
533 for (int i = 1, n = pts - rest; i < n; ++i) {
534 separators[i - 1] = i*bulk + min;
535 }
536 for (int i = 0; i < rest; ++i) {
537 separators[separators.length - rest + i] =
538 (pts - rest)*bulk + i*(bulk + 1) + min;
539 }
540
541 return separators;
542 }
543
544 /*
545 * Check the input values of the valueOf methods.
546 */
547 private static <C extends Comparable<? super C>> void
548 check(final C min, final C max, final int nclasses)
549 {
550 requireNonNull(min, "Minimum");
551 requireNonNull(max, "Maximum");
552 if (min.compareTo(max) >= 0) {
553 throw new IllegalArgumentException(format(
554 "Min must be smaller than max: %s < %s failed.", min, max
555 ));
556 }
557 if (nclasses < 2) {
558 throw new IllegalArgumentException(format(
559 "nclasses should be < 2, but was %s.", nclasses
560 ));
561 }
562 }
563
564
565 public <A extends Appendable> A print(final A out) throws IOException {
566 Object min = "...";
567 Object max = null;
568 for (int i = 0; i < length() - 1; ++i) {
569 max = _separators[i];
570 out.append("[" + min + "," + max + ")");
571 out.append(" " + _histogram[i] + "\n");
572 min = max;
573 }
574 if (length() - 1 > 0) {
575 out.append("[" + min + ",...)");
576 out.append(" " + _histogram[length() - 1] + "\n");
577 }
578
579 return out;
580 }
581
582 }
583
584
585
586
587
588
589
590
591
|