Chris Vest 68795fb1a5 Introduce ByteIterator, and Buf.iterate
We need a simple API to efficiently iterate a buffer.
We've used the ByteProcessor so far, and while its internal iteration API is simple, it looses some efficiency by forcing code to only consider one byte at a time.

The ByteIterator fills the same niche as the ByteProcessor, but uses external iteration instead of internal iteration.
This allows integrators to control the pace of iteration, and it makes it possible to expose methods for consuming bytes in bulk; one long of 8 bytes at a time.
This makes it possible to use the iterator in SIMD-Within-A-Register, or SWAR, data processing algorithms.

We have a ByteIterator for efficiently processing data within a buffer.
2020-11-17 15:26:57 +01:00

121 lines
3.7 KiB

* Copyright 2019 The Netty Project
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
package io.netty.buffer;
import io.netty.buffer.b2.Allocator;
import io.netty.buffer.b2.Buf;
import io.netty.microbench.util.AbstractMicrobenchmark;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import java.util.concurrent.TimeUnit;
@Warmup(iterations = 5, time = 1500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
public class MemSegBufAccessBenchmark extends AbstractMicrobenchmark {
public enum BBufType {
Buf newBuffer() {
// @Override
// Buf newBuffer() {
// return UNSAFE.newBuffer().slice(16, 48);
// }
// },
Buf newBuffer() {
return Allocator.heap().allocate(64);
// @Override
// Buf newBuffer() {
// return Unpooled.wrappedBuffer(UNSAFE.newBuffer(), HEAP.newBuffer());
// }
// },
// NIO {
// @Override
// Buf newBuffer() {
// return new NioFacade(BBuffer.allocateDirect(64));
// }
// }
abstract Buf newBuffer();
public BBufType bufferType;
@Param({ "8" })
public int batchSize; // applies only to readBatch benchmark
public void setup() {
buffer = bufferType.newBuffer();
private Buf buffer;
public void tearDown() {
public long setGetLong() {
return buffer.writeLong(0, 1).readLong(0);
public Buf setLong() {
return buffer.writeLong(0, 1);
public int readBatch() {
int result = 0;
// Please do not replace this sum loop with a BlackHole::consume loop:
// BlackHole::consume could prevent the JVM to perform certain optimizations
// forcing ByteBuf::readByte to be executed in order.
// The purpose of the benchmark is to mimic accesses on ByteBuf
// as in a real (single-threaded) case ie without (compiler) memory barriers that would
// disable certain optimizations or would make bounds checks (if enabled)
// to happen on each access.
for (int i = 0, size = batchSize; i < size; i++) {
result += buffer.readByte();
return result;