scylla-jmx/src/main/java/com/scylladb/jmx/utils/EstimatedHistogram.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one

 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Copyright 2015 Cloudius Systems
 *
 * Modified by Cloudius Systems
 */

package com.scylladb.jmx.utils;

import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;

import com.google.common.base.Objects;

import org.slf4j.Logger;

public class EstimatedHistogram {
    /**
     * The series of values to which the counts in `buckets` correspond: 1, 2,
     * 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. Thus, a `buckets` of [0, 0, 1,
     * 10] would mean we had seen one value of 3 and 10 values of 4.
     *
     * The series starts at 1 and grows by 1.2 each time (rounding and removing
     * duplicates). It goes from 1 to around 36M by default (creating 90+1
     * buckets), which will give us timing resolution from microseconds to 36
     * seconds, with less precision as the numbers get larger.
     *
     * Each bucket represents values from (previous bucket offset, current
     * offset].
     */
    private final long[] bucketOffsets;

    // buckets is one element longer than bucketOffsets -- the last element is
    // values greater than the last offset
    final AtomicLongArray buckets;

    public EstimatedHistogram() {
        this(90);
    }

    public EstimatedHistogram(int bucketCount) {
        bucketOffsets = newOffsets(bucketCount);
        buckets = new AtomicLongArray(bucketOffsets.length + 1);
    }

    public EstimatedHistogram(long[] offsets, long[] bucketData) {
        assert bucketData.length == offsets.length + 1;
        bucketOffsets = offsets;
        buckets = new AtomicLongArray(bucketData);
    }


    public EstimatedHistogram(long[] bucketData) {
        bucketOffsets = newOffsets(bucketData.length - 1);
        buckets = new AtomicLongArray(bucketData);
    }

    private static long[] newOffsets(int size) {
        if (size <= 0) {
            return new long[0];
        }
        long[] result = new long[size];
        long last = 1;
        result[0] = last;
        for (int i = 1; i < size; i++) {
            long next = Math.round(last * 1.2);
            if (next == last)
                next++;
            result[i] = next;
            last = next;
        }

        return result;
    }

    /**
     * @return the histogram values corresponding to each bucket index
     */
    public long[] getBucketOffsets() {
        return bucketOffsets;
    }

    /**
     * Increments the count of the bucket closest to n, rounding UP.
     * 
     * @param n
     */
    public void add(long n) {
        int index = Arrays.binarySearch(bucketOffsets, n);
        if (index < 0) {
            // inexact match, take the first bucket higher than n
            index = -index - 1;
        }
        // else exact match; we're good
        buckets.incrementAndGet(index);
    }

    /**
     * @return the count in the given bucket
     */
    long get(int bucket) {
        return buckets.get(bucket);
    }

    /**
     * @param reset
     *            zero out buckets afterwards if true
     * @return a long[] containing the current histogram buckets
     */
    public long[] getBuckets(boolean reset) {
        final int len = buckets.length();
        long[] rv = new long[len];

        if (reset)
            for (int i = 0; i < len; i++)
                rv[i] = buckets.getAndSet(i, 0L);
        else
            for (int i = 0; i < len; i++)
                rv[i] = buckets.get(i);

        return rv;
    }

    /**
     * @return the smallest value that could have been added to this histogram
     */
    public long min() {
        for (int i = 0; i < buckets.length(); i++) {
            if (buckets.get(i) > 0)
                return i == 0 ? 0 : 1 + bucketOffsets[i - 1];
        }
        return 0;
    }

    /**
     * @return the largest value that could have been added to this histogram.
     *         If the histogram overflowed, returns Long.MAX_VALUE.
     */
    public long max() {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            return Long.MAX_VALUE;

        for (int i = lastBucket - 1; i >= 0; i--) {
            if (buckets.get(i) > 0)
                return bucketOffsets[i];
        }
        return 0;
    }

    /**
     * @param percentile
     * @return estimated value at given percentile
     */
    public long percentile(double percentile) {
        assert percentile >= 0 && percentile <= 1.0;
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            throw new IllegalStateException(
                    "Unable to compute when histogram overflowed");

        long pcount = (long) Math.floor(count() * percentile);
        if (pcount == 0)
            return 0;

        long elements = 0;
        for (int i = 0; i < lastBucket; i++) {
            elements += buckets.get(i);
            if (elements >= pcount)
                return bucketOffsets[i];
        }
        return 0;
    }

    /**
     * @return the mean histogram value (average of bucket offsets, weighted by
     *         count)
     * @throws IllegalStateException
     *             if any values were greater than the largest bucket threshold
     */
    public long mean() {
        int lastBucket = buckets.length() - 1;
        if (buckets.get(lastBucket) > 0)
            throw new IllegalStateException(
                    "Unable to compute ceiling for max when histogram overflowed");

        long elements = 0;
        long sum = 0;
        for (int i = 0; i < lastBucket; i++) {
            long bCount = buckets.get(i);
            elements += bCount;
            sum += bCount * bucketOffsets[i];
        }

        return (long) Math.ceil((double) sum / elements);
    }

    /**
     * @return the total number of non-zero values
     */
    public long count() {
        long sum = 0L;
        for (int i = 0; i < buckets.length(); i++)
            sum += buckets.get(i);
        return sum;
    }

    /**
     * @return true if this histogram has overflowed -- that is, a value larger
     *         than our largest bucket could bound was added
     */
    public boolean isOverflowed() {
        return buckets.get(buckets.length() - 1) > 0;
    }

    /**
     * log.debug() every record in the histogram
     *
     * @param log
     */
    public void log(Logger log) {
        // only print overflow if there is any
        int nameCount;
        if (buckets.get(buckets.length() - 1) == 0)
            nameCount = buckets.length() - 1;
        else
            nameCount = buckets.length();
        String[] names = new String[nameCount];

        int maxNameLength = 0;
        for (int i = 0; i < nameCount; i++) {
            names[i] = nameOfRange(bucketOffsets, i);
            maxNameLength = Math.max(maxNameLength, names[i].length());
        }

        // emit log records
        String formatstr = "%" + maxNameLength + "s: %d";
        for (int i = 0; i < nameCount; i++) {
            long count = buckets.get(i);
            // sort-of-hack to not print empty ranges at the start that are only
            // used to demarcate the
            // first populated range. for code clarity we don't omit this record
            // from the maxNameLength
            // calculation, and accept the unnecessary whitespace prefixes that
            // will occasionally occur
            if (i == 0 && count == 0)
                continue;
            log.debug(String.format(formatstr, names[i], count));
        }
    }

    private static String nameOfRange(long[] bucketOffsets, int index) {
        StringBuilder sb = new StringBuilder();
        appendRange(sb, bucketOffsets, index);
        return sb.toString();
    }

    private static void appendRange(StringBuilder sb, long[] bucketOffsets,
            int index) {
        sb.append("[");
        if (index == 0)
            if (bucketOffsets[0] > 0)
                // by original definition, this histogram is for values greater
                // than zero only;
                // if values of 0 or less are required, an entry of lb-1 must be
                // inserted at the start
                sb.append("1");
            else
                sb.append("-Inf");
        else
            sb.append(bucketOffsets[index - 1] + 1);
        sb.append("..");
        if (index == bucketOffsets.length)
            sb.append("Inf");
        else
            sb.append(bucketOffsets[index]);
        sb.append("]");
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;

        if (!(o instanceof EstimatedHistogram))
            return false;

        EstimatedHistogram that = (EstimatedHistogram) o;
        return Arrays.equals(getBucketOffsets(), that.getBucketOffsets())
                && Arrays.equals(getBuckets(false), that.getBuckets(false));
    }

    @Override
    public int hashCode() {
        return Objects.hashCode(getBucketOffsets(), getBuckets(false));
    }
}
Importing the EstimateHistogram from origin 2015-06-30 17:22:45 +02:00			`/*`
			`* Licensed to the Apache Software Foundation (ASF) under one`

			`* or more contributor license agreements. See the NOTICE file`
			`* distributed with this work for additional information`
			`* regarding copyright ownership. The ASF licenses this file`
			`* to you under the Apache License, Version 2.0 (the`
			`* "License"); you may not use this file except in compliance`
			`* with the License. You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`/*`
			`* Copyright 2015 Cloudius Systems`
			`*`
			`* Modified by Cloudius Systems`
			`*/`

Rename "com.cloudius.urchin" package to "com.scylladb.jmx" Move the Scylla JMX code under "com.scylladb.jmx" package. Signed-off-by: Pekka Enberg <penberg@scylladb.com> 2015-12-17 08:26:19 +01:00			`package com.scylladb.jmx.utils;`
Importing the EstimateHistogram from origin 2015-06-30 17:22:45 +02:00
			`import java.util.Arrays;`
			`import java.util.concurrent.atomic.AtomicLongArray;`

			`import com.google.common.base.Objects;`

			`import org.slf4j.Logger;`

			`public class EstimatedHistogram {`
			`/**`
			* The series of values to which the counts in `buckets` correspond: 1, 2,
			* 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc. Thus, a `buckets` of [0, 0, 1,
			`* 10] would mean we had seen one value of 3 and 10 values of 4.`
			`*`
			`* The series starts at 1 and grows by 1.2 each time (rounding and removing`
			`* duplicates). It goes from 1 to around 36M by default (creating 90+1`
			`* buckets), which will give us timing resolution from microseconds to 36`
			`* seconds, with less precision as the numbers get larger.`
			`*`
			`* Each bucket represents values from (previous bucket offset, current`
			`* offset].`
			`*/`
			`private final long[] bucketOffsets;`

			`// buckets is one element longer than bucketOffsets -- the last element is`
			`// values greater than the last offset`
			`final AtomicLongArray buckets;`

			`public EstimatedHistogram() {`
			`this(90);`
			`}`

			`public EstimatedHistogram(int bucketCount) {`
			`bucketOffsets = newOffsets(bucketCount);`
			`buckets = new AtomicLongArray(bucketOffsets.length + 1);`
			`}`

			`public EstimatedHistogram(long[] offsets, long[] bucketData) {`
			`assert bucketData.length == offsets.length + 1;`
			`bucketOffsets = offsets;`
			`buckets = new AtomicLongArray(bucketData);`
EstimatedHistogram: Add constructor from data This patch allows to create an EstimatedHistogram from an array of data value. It will be used by the APIClient to return EstimatedHistogram Signed-off-by: Amnon Heiman <amnon@cloudius-systems.com> 2015-10-21 10:34:15 +02:00			`}`


			`public EstimatedHistogram(long[] bucketData) {`
			`bucketOffsets = newOffsets(bucketData.length - 1);`
			`buckets = new AtomicLongArray(bucketData);`
Importing the EstimateHistogram from origin 2015-06-30 17:22:45 +02:00			`}`

			`private static long[] newOffsets(int size) {`
EstimatedHistogram: Support empty histogram When creating an estimated histogram from buckets it is a valid option to get a zero size array as the buckets array. In that case the newOffsets method would get a negative value for its size, which should result in a zero length array of offsets. Signed-off-by: Amnon Heiman <amnon@scylladb.com> 2015-11-19 10:52:17 +01:00			`if (size <= 0) {`
			`return new long[0];`
			`}`
Importing the EstimateHistogram from origin 2015-06-30 17:22:45 +02:00			`long[] result = new long[size];`
			`long last = 1;`
			`result[0] = last;`
			`for (int i = 1; i < size; i++) {`
			`long next = Math.round(last * 1.2);`
			`if (next == last)`
			`next++;`
			`result[i] = next;`
			`last = next;`
			`}`

			`return result;`
			`}`

			`/**`
			`* @return the histogram values corresponding to each bucket index`
			`*/`
			`public long[] getBucketOffsets() {`
			`return bucketOffsets;`
			`}`

			`/**`
			`* Increments the count of the bucket closest to n, rounding UP.`
			`*`
			`* @param n`
			`*/`
			`public void add(long n) {`
			`int index = Arrays.binarySearch(bucketOffsets, n);`
			`if (index < 0) {`
			`// inexact match, take the first bucket higher than n`
			`index = -index - 1;`
			`}`
			`// else exact match; we're good`
			`buckets.incrementAndGet(index);`
			`}`

			`/**`
			`* @return the count in the given bucket`
			`*/`
			`long get(int bucket) {`
			`return buckets.get(bucket);`
			`}`

			`/**`
			`* @param reset`
			`* zero out buckets afterwards if true`
			`* @return a long[] containing the current histogram buckets`
			`*/`
			`public long[] getBuckets(boolean reset) {`
			`final int len = buckets.length();`
			`long[] rv = new long[len];`

			`if (reset)`
			`for (int i = 0; i < len; i++)`
			`rv[i] = buckets.getAndSet(i, 0L);`
			`else`
			`for (int i = 0; i < len; i++)`
			`rv[i] = buckets.get(i);`

			`return rv;`
			`}`

			`/**`
			`* @return the smallest value that could have been added to this histogram`
			`*/`
			`public long min() {`
			`for (int i = 0; i < buckets.length(); i++) {`
			`if (buckets.get(i) > 0)`
			`return i == 0 ? 0 : 1 + bucketOffsets[i - 1];`
			`}`
			`return 0;`
			`}`

			`/**`
			`* @return the largest value that could have been added to this histogram.`
			`* If the histogram overflowed, returns Long.MAX_VALUE.`
			`*/`
			`public long max() {`
			`int lastBucket = buckets.length() - 1;`
			`if (buckets.get(lastBucket) > 0)`
			`return Long.MAX_VALUE;`

			`for (int i = lastBucket - 1; i >= 0; i--) {`
			`if (buckets.get(i) > 0)`
			`return bucketOffsets[i];`
			`}`
			`return 0;`
			`}`

			`/**`
			`* @param percentile`
			`* @return estimated value at given percentile`
			`*/`
			`public long percentile(double percentile) {`
			`assert percentile >= 0 && percentile <= 1.0;`
			`int lastBucket = buckets.length() - 1;`
			`if (buckets.get(lastBucket) > 0)`
			`throw new IllegalStateException(`
			`"Unable to compute when histogram overflowed");`

			`long pcount = (long) Math.floor(count() * percentile);`
			`if (pcount == 0)`
			`return 0;`

			`long elements = 0;`
			`for (int i = 0; i < lastBucket; i++) {`
			`elements += buckets.get(i);`
			`if (elements >= pcount)`
			`return bucketOffsets[i];`
			`}`
			`return 0;`
			`}`

			`/**`
			`* @return the mean histogram value (average of bucket offsets, weighted by`
			`* count)`
			`* @throws IllegalStateException`
			`* if any values were greater than the largest bucket threshold`
			`*/`
			`public long mean() {`
			`int lastBucket = buckets.length() - 1;`
			`if (buckets.get(lastBucket) > 0)`
			`throw new IllegalStateException(`
			`"Unable to compute ceiling for max when histogram overflowed");`

			`long elements = 0;`
			`long sum = 0;`
			`for (int i = 0; i < lastBucket; i++) {`
			`long bCount = buckets.get(i);`
			`elements += bCount;`
			`sum += bCount * bucketOffsets[i];`
			`}`

			`return (long) Math.ceil((double) sum / elements);`
			`}`

			`/**`
			`* @return the total number of non-zero values`
			`*/`
			`public long count() {`
			`long sum = 0L;`
			`for (int i = 0; i < buckets.length(); i++)`
			`sum += buckets.get(i);`
			`return sum;`
			`}`

			`/**`
			`* @return true if this histogram has overflowed -- that is, a value larger`
			`* than our largest bucket could bound was added`
			`*/`
			`public boolean isOverflowed() {`
			`return buckets.get(buckets.length() - 1) > 0;`
			`}`

			`/**`
			`* log.debug() every record in the histogram`
			`*`
			`* @param log`
			`*/`
			`public void log(Logger log) {`
			`// only print overflow if there is any`
			`int nameCount;`
			`if (buckets.get(buckets.length() - 1) == 0)`
			`nameCount = buckets.length() - 1;`
			`else`
			`nameCount = buckets.length();`
			`String[] names = new String[nameCount];`

			`int maxNameLength = 0;`
			`for (int i = 0; i < nameCount; i++) {`
			`names[i] = nameOfRange(bucketOffsets, i);`
			`maxNameLength = Math.max(maxNameLength, names[i].length());`
			`}`

			`// emit log records`
			`String formatstr = "%" + maxNameLength + "s: %d";`
			`for (int i = 0; i < nameCount; i++) {`
			`long count = buckets.get(i);`
			`// sort-of-hack to not print empty ranges at the start that are only`
			`// used to demarcate the`
			`// first populated range. for code clarity we don't omit this record`
			`// from the maxNameLength`
			`// calculation, and accept the unnecessary whitespace prefixes that`
			`// will occasionally occur`
			`if (i == 0 && count == 0)`
			`continue;`
			`log.debug(String.format(formatstr, names[i], count));`
			`}`
			`}`

			`private static String nameOfRange(long[] bucketOffsets, int index) {`
			`StringBuilder sb = new StringBuilder();`
			`appendRange(sb, bucketOffsets, index);`
			`return sb.toString();`
			`}`

			`private static void appendRange(StringBuilder sb, long[] bucketOffsets,`
			`int index) {`
			`sb.append("[");`
			`if (index == 0)`
			`if (bucketOffsets[0] > 0)`
			`// by original definition, this histogram is for values greater`
			`// than zero only;`
			`// if values of 0 or less are required, an entry of lb-1 must be`
			`// inserted at the start`
			`sb.append("1");`
			`else`
			`sb.append("-Inf");`
			`else`
			`sb.append(bucketOffsets[index - 1] + 1);`
			`sb.append("..");`
			`if (index == bucketOffsets.length)`
			`sb.append("Inf");`
			`else`
			`sb.append(bucketOffsets[index]);`
			`sb.append("]");`
			`}`

			`@Override`
			`public boolean equals(Object o) {`
			`if (this == o)`
			`return true;`

			`if (!(o instanceof EstimatedHistogram))`
			`return false;`

			`EstimatedHistogram that = (EstimatedHistogram) o;`
			`return Arrays.equals(getBucketOffsets(), that.getBucketOffsets())`
			`&& Arrays.equals(getBuckets(false), that.getBuckets(false));`
			`}`

			`@Override`
			`public int hashCode() {`
			`return Objects.hashCode(getBucketOffsets(), getBuckets(false));`
			`}`
			`}`