diff --git a/src/main/java/com/cloudius/urchin/utils/EstimatedHistogram.java b/src/main/java/com/cloudius/urchin/utils/EstimatedHistogram.java new file mode 100644 index 0000000..9042a7b --- /dev/null +++ b/src/main/java/com/cloudius/urchin/utils/EstimatedHistogram.java @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Copyright 2015 Cloudius Systems + * + * Modified by Cloudius Systems + */ + +package com.cloudius.urchin.utils; + +import java.util.Arrays; +import java.util.concurrent.atomic.AtomicLongArray; + +import com.google.common.base.Objects; + +import org.slf4j.Logger; + +public class EstimatedHistogram { + /** + * The series of values to which the counts in `buckets` correspond: 1, 2, + * 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. Thus, a `buckets` of [0, 0, 1, + * 10] would mean we had seen one value of 3 and 10 values of 4. + * + * The series starts at 1 and grows by 1.2 each time (rounding and removing + * duplicates). It goes from 1 to around 36M by default (creating 90+1 + * buckets), which will give us timing resolution from microseconds to 36 + * seconds, with less precision as the numbers get larger. + * + * Each bucket represents values from (previous bucket offset, current + * offset]. + */ + private final long[] bucketOffsets; + + // buckets is one element longer than bucketOffsets -- the last element is + // values greater than the last offset + final AtomicLongArray buckets; + + public EstimatedHistogram() { + this(90); + } + + public EstimatedHistogram(int bucketCount) { + bucketOffsets = newOffsets(bucketCount); + buckets = new AtomicLongArray(bucketOffsets.length + 1); + } + + public EstimatedHistogram(long[] offsets, long[] bucketData) { + assert bucketData.length == offsets.length + 1; + bucketOffsets = offsets; + buckets = new AtomicLongArray(bucketData); + } + + private static long[] newOffsets(int size) { + long[] result = new long[size]; + long last = 1; + result[0] = last; + for (int i = 1; i < size; i++) { + long next = Math.round(last * 1.2); + if (next == last) + next++; + result[i] = next; + last = next; + } + + return result; + } + + /** + * @return the histogram values corresponding to each bucket index + */ + public long[] getBucketOffsets() { + return bucketOffsets; + } + + /** + * Increments the count of the bucket closest to n, rounding UP. + * + * @param n + */ + public void add(long n) { + int index = Arrays.binarySearch(bucketOffsets, n); + if (index < 0) { + // inexact match, take the first bucket higher than n + index = -index - 1; + } + // else exact match; we're good + buckets.incrementAndGet(index); + } + + /** + * @return the count in the given bucket + */ + long get(int bucket) { + return buckets.get(bucket); + } + + /** + * @param reset + * zero out buckets afterwards if true + * @return a long[] containing the current histogram buckets + */ + public long[] getBuckets(boolean reset) { + final int len = buckets.length(); + long[] rv = new long[len]; + + if (reset) + for (int i = 0; i < len; i++) + rv[i] = buckets.getAndSet(i, 0L); + else + for (int i = 0; i < len; i++) + rv[i] = buckets.get(i); + + return rv; + } + + /** + * @return the smallest value that could have been added to this histogram + */ + public long min() { + for (int i = 0; i < buckets.length(); i++) { + if (buckets.get(i) > 0) + return i == 0 ? 0 : 1 + bucketOffsets[i - 1]; + } + return 0; + } + + /** + * @return the largest value that could have been added to this histogram. + * If the histogram overflowed, returns Long.MAX_VALUE. + */ + public long max() { + int lastBucket = buckets.length() - 1; + if (buckets.get(lastBucket) > 0) + return Long.MAX_VALUE; + + for (int i = lastBucket - 1; i >= 0; i--) { + if (buckets.get(i) > 0) + return bucketOffsets[i]; + } + return 0; + } + + /** + * @param percentile + * @return estimated value at given percentile + */ + public long percentile(double percentile) { + assert percentile >= 0 && percentile <= 1.0; + int lastBucket = buckets.length() - 1; + if (buckets.get(lastBucket) > 0) + throw new IllegalStateException( + "Unable to compute when histogram overflowed"); + + long pcount = (long) Math.floor(count() * percentile); + if (pcount == 0) + return 0; + + long elements = 0; + for (int i = 0; i < lastBucket; i++) { + elements += buckets.get(i); + if (elements >= pcount) + return bucketOffsets[i]; + } + return 0; + } + + /** + * @return the mean histogram value (average of bucket offsets, weighted by + * count) + * @throws IllegalStateException + * if any values were greater than the largest bucket threshold + */ + public long mean() { + int lastBucket = buckets.length() - 1; + if (buckets.get(lastBucket) > 0) + throw new IllegalStateException( + "Unable to compute ceiling for max when histogram overflowed"); + + long elements = 0; + long sum = 0; + for (int i = 0; i < lastBucket; i++) { + long bCount = buckets.get(i); + elements += bCount; + sum += bCount * bucketOffsets[i]; + } + + return (long) Math.ceil((double) sum / elements); + } + + /** + * @return the total number of non-zero values + */ + public long count() { + long sum = 0L; + for (int i = 0; i < buckets.length(); i++) + sum += buckets.get(i); + return sum; + } + + /** + * @return true if this histogram has overflowed -- that is, a value larger + * than our largest bucket could bound was added + */ + public boolean isOverflowed() { + return buckets.get(buckets.length() - 1) > 0; + } + + /** + * log.debug() every record in the histogram + * + * @param log + */ + public void log(Logger log) { + // only print overflow if there is any + int nameCount; + if (buckets.get(buckets.length() - 1) == 0) + nameCount = buckets.length() - 1; + else + nameCount = buckets.length(); + String[] names = new String[nameCount]; + + int maxNameLength = 0; + for (int i = 0; i < nameCount; i++) { + names[i] = nameOfRange(bucketOffsets, i); + maxNameLength = Math.max(maxNameLength, names[i].length()); + } + + // emit log records + String formatstr = "%" + maxNameLength + "s: %d"; + for (int i = 0; i < nameCount; i++) { + long count = buckets.get(i); + // sort-of-hack to not print empty ranges at the start that are only + // used to demarcate the + // first populated range. for code clarity we don't omit this record + // from the maxNameLength + // calculation, and accept the unnecessary whitespace prefixes that + // will occasionally occur + if (i == 0 && count == 0) + continue; + log.debug(String.format(formatstr, names[i], count)); + } + } + + private static String nameOfRange(long[] bucketOffsets, int index) { + StringBuilder sb = new StringBuilder(); + appendRange(sb, bucketOffsets, index); + return sb.toString(); + } + + private static void appendRange(StringBuilder sb, long[] bucketOffsets, + int index) { + sb.append("["); + if (index == 0) + if (bucketOffsets[0] > 0) + // by original definition, this histogram is for values greater + // than zero only; + // if values of 0 or less are required, an entry of lb-1 must be + // inserted at the start + sb.append("1"); + else + sb.append("-Inf"); + else + sb.append(bucketOffsets[index - 1] + 1); + sb.append(".."); + if (index == bucketOffsets.length) + sb.append("Inf"); + else + sb.append(bucketOffsets[index]); + sb.append("]"); + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + + if (!(o instanceof EstimatedHistogram)) + return false; + + EstimatedHistogram that = (EstimatedHistogram) o; + return Arrays.equals(getBucketOffsets(), that.getBucketOffsets()) + && Arrays.equals(getBuckets(false), that.getBuckets(false)); + } + + @Override + public int hashCode() { + return Objects.hashCode(getBucketOffsets(), getBuckets(false)); + } +}