Importing the EstimateHistogram from origin

This commit is contained in:
Amnon Heiman 2015-06-30 18:22:45 +03:00
parent baf7d101d1
commit 919785bc04

View File

@ -0,0 +1,306 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright 2015 Cloudius Systems
*
* Modified by Cloudius Systems
*/
package com.cloudius.urchin.utils;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;
import com.google.common.base.Objects;
import org.slf4j.Logger;
public class EstimatedHistogram {
/**
* The series of values to which the counts in `buckets` correspond: 1, 2,
* 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. Thus, a `buckets` of [0, 0, 1,
* 10] would mean we had seen one value of 3 and 10 values of 4.
*
* The series starts at 1 and grows by 1.2 each time (rounding and removing
* duplicates). It goes from 1 to around 36M by default (creating 90+1
* buckets), which will give us timing resolution from microseconds to 36
* seconds, with less precision as the numbers get larger.
*
* Each bucket represents values from (previous bucket offset, current
* offset].
*/
private final long[] bucketOffsets;
// buckets is one element longer than bucketOffsets -- the last element is
// values greater than the last offset
final AtomicLongArray buckets;
public EstimatedHistogram() {
this(90);
}
public EstimatedHistogram(int bucketCount) {
bucketOffsets = newOffsets(bucketCount);
buckets = new AtomicLongArray(bucketOffsets.length + 1);
}
public EstimatedHistogram(long[] offsets, long[] bucketData) {
assert bucketData.length == offsets.length + 1;
bucketOffsets = offsets;
buckets = new AtomicLongArray(bucketData);
}
private static long[] newOffsets(int size) {
long[] result = new long[size];
long last = 1;
result[0] = last;
for (int i = 1; i < size; i++) {
long next = Math.round(last * 1.2);
if (next == last)
next++;
result[i] = next;
last = next;
}
return result;
}
/**
* @return the histogram values corresponding to each bucket index
*/
public long[] getBucketOffsets() {
return bucketOffsets;
}
/**
* Increments the count of the bucket closest to n, rounding UP.
*
* @param n
*/
public void add(long n) {
int index = Arrays.binarySearch(bucketOffsets, n);
if (index < 0) {
// inexact match, take the first bucket higher than n
index = -index - 1;
}
// else exact match; we're good
buckets.incrementAndGet(index);
}
/**
* @return the count in the given bucket
*/
long get(int bucket) {
return buckets.get(bucket);
}
/**
* @param reset
* zero out buckets afterwards if true
* @return a long[] containing the current histogram buckets
*/
public long[] getBuckets(boolean reset) {
final int len = buckets.length();
long[] rv = new long[len];
if (reset)
for (int i = 0; i < len; i++)
rv[i] = buckets.getAndSet(i, 0L);
else
for (int i = 0; i < len; i++)
rv[i] = buckets.get(i);
return rv;
}
/**
* @return the smallest value that could have been added to this histogram
*/
public long min() {
for (int i = 0; i < buckets.length(); i++) {
if (buckets.get(i) > 0)
return i == 0 ? 0 : 1 + bucketOffsets[i - 1];
}
return 0;
}
/**
* @return the largest value that could have been added to this histogram.
* If the histogram overflowed, returns Long.MAX_VALUE.
*/
public long max() {
int lastBucket = buckets.length() - 1;
if (buckets.get(lastBucket) > 0)
return Long.MAX_VALUE;
for (int i = lastBucket - 1; i >= 0; i--) {
if (buckets.get(i) > 0)
return bucketOffsets[i];
}
return 0;
}
/**
* @param percentile
* @return estimated value at given percentile
*/
public long percentile(double percentile) {
assert percentile >= 0 && percentile <= 1.0;
int lastBucket = buckets.length() - 1;
if (buckets.get(lastBucket) > 0)
throw new IllegalStateException(
"Unable to compute when histogram overflowed");
long pcount = (long) Math.floor(count() * percentile);
if (pcount == 0)
return 0;
long elements = 0;
for (int i = 0; i < lastBucket; i++) {
elements += buckets.get(i);
if (elements >= pcount)
return bucketOffsets[i];
}
return 0;
}
/**
* @return the mean histogram value (average of bucket offsets, weighted by
* count)
* @throws IllegalStateException
* if any values were greater than the largest bucket threshold
*/
public long mean() {
int lastBucket = buckets.length() - 1;
if (buckets.get(lastBucket) > 0)
throw new IllegalStateException(
"Unable to compute ceiling for max when histogram overflowed");
long elements = 0;
long sum = 0;
for (int i = 0; i < lastBucket; i++) {
long bCount = buckets.get(i);
elements += bCount;
sum += bCount * bucketOffsets[i];
}
return (long) Math.ceil((double) sum / elements);
}
/**
* @return the total number of non-zero values
*/
public long count() {
long sum = 0L;
for (int i = 0; i < buckets.length(); i++)
sum += buckets.get(i);
return sum;
}
/**
* @return true if this histogram has overflowed -- that is, a value larger
* than our largest bucket could bound was added
*/
public boolean isOverflowed() {
return buckets.get(buckets.length() - 1) > 0;
}
/**
* log.debug() every record in the histogram
*
* @param log
*/
public void log(Logger log) {
// only print overflow if there is any
int nameCount;
if (buckets.get(buckets.length() - 1) == 0)
nameCount = buckets.length() - 1;
else
nameCount = buckets.length();
String[] names = new String[nameCount];
int maxNameLength = 0;
for (int i = 0; i < nameCount; i++) {
names[i] = nameOfRange(bucketOffsets, i);
maxNameLength = Math.max(maxNameLength, names[i].length());
}
// emit log records
String formatstr = "%" + maxNameLength + "s: %d";
for (int i = 0; i < nameCount; i++) {
long count = buckets.get(i);
// sort-of-hack to not print empty ranges at the start that are only
// used to demarcate the
// first populated range. for code clarity we don't omit this record
// from the maxNameLength
// calculation, and accept the unnecessary whitespace prefixes that
// will occasionally occur
if (i == 0 && count == 0)
continue;
log.debug(String.format(formatstr, names[i], count));
}
}
private static String nameOfRange(long[] bucketOffsets, int index) {
StringBuilder sb = new StringBuilder();
appendRange(sb, bucketOffsets, index);
return sb.toString();
}
private static void appendRange(StringBuilder sb, long[] bucketOffsets,
int index) {
sb.append("[");
if (index == 0)
if (bucketOffsets[0] > 0)
// by original definition, this histogram is for values greater
// than zero only;
// if values of 0 or less are required, an entry of lb-1 must be
// inserted at the start
sb.append("1");
else
sb.append("-Inf");
else
sb.append(bucketOffsets[index - 1] + 1);
sb.append("..");
if (index == bucketOffsets.length)
sb.append("Inf");
else
sb.append(bucketOffsets[index]);
sb.append("]");
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof EstimatedHistogram))
return false;
EstimatedHistogram that = (EstimatedHistogram) o;
return Arrays.equals(getBucketOffsets(), that.getBucketOffsets())
&& Arrays.equals(getBuckets(false), that.getBuckets(false));
}
@Override
public int hashCode() {
return Objects.hashCode(getBucketOffsets(), getBuckets(false));
}
}