libicsneo/include/icsneo/third-party/readerwriterqueue/benchmarks/bench.cpp

435 lines
14 KiB
C++

// ©2013-2015 Cameron Desrochers.
// Distributed under the simplified BSD license (see the LICENSE file that
// should have come with this file).
// Benchmarks for moodycamel::ReaderWriterQueue.
#if defined(_MSC_VER) && _MSC_VER < 1700
#define NO_FOLLY_SUPPORT
#endif
#if !defined(__amd64__) && !defined(_M_X64) && !defined(__x86_64__) && !defined(_M_IX86) && !defined(__i386__)
#define NO_SPSC_SUPPORT // SPSC implementation is for x86 only
#endif
#include "ext/1024cores/spscqueue.h" // Dmitry's (on Intel site)
#ifndef NO_FOLLY_SUPPORT
#include "ext/folly/ProducerConsumerQueue.h" // Facebook's folly (GitHub)
#endif
#include "../readerwriterqueue.h" // Mine
#include "systemtime.h"
#include "../tests/common/simplethread.h"
#include <iostream>
#include <iomanip>
#include <numeric> // For std::accumulate
#include <algorithm>
#include <random>
#include <ctime>
#ifndef UNUSED
#define UNUSED(x) ((void)x);
#endif
using namespace moodycamel;
#ifndef NO_FOLLY_SUPPORT
using namespace folly;
#endif
typedef std::minstd_rand RNG_t;
enum BenchmarkType {
bench_raw_add,
bench_raw_remove,
bench_empty_remove,
bench_single_threaded,
bench_mostly_add,
bench_mostly_remove,
bench_heavy_concurrent,
bench_random_concurrent,
BENCHMARK_COUNT
};
// Returns the number of seconds elapsed (high-precision), and the number of enqueue/dequeue
// operations performed (in the out_Ops parameter)
template<typename TQueue>
double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& out_Ops);
const int BENCHMARK_NAME_MAX = 17; // Not including null terminator
const char* benchmarkName(BenchmarkType benchmark);
int main(int argc, char** argv)
{
#ifdef NDEBUG
const int TEST_COUNT = 25;
#else
const int TEST_COUNT = 2;
#endif
assert(TEST_COUNT >= 2);
const double FASTEST_PERCENT_CONSIDERED = 20; // Consider only the fastest runs in the top 20%
double rwqResults[BENCHMARK_COUNT][TEST_COUNT];
double spscResults[BENCHMARK_COUNT][TEST_COUNT];
double follyResults[BENCHMARK_COUNT][TEST_COUNT];
// Also calculate a rough heuristic of "ops/s" (across all runs, not just fastest)
double rwqOps[BENCHMARK_COUNT][TEST_COUNT];
double spscOps[BENCHMARK_COUNT][TEST_COUNT];
double follyOps[BENCHMARK_COUNT][TEST_COUNT];
// Make sure the randomness of each benchmark run is identical
unsigned int randSeeds[BENCHMARK_COUNT];
for (unsigned int i = 0; i != BENCHMARK_COUNT; ++i) {
randSeeds[i] = ((unsigned int)time(NULL)) * i;
}
// Run benchmarks
for (int benchmark = 0; benchmark < BENCHMARK_COUNT; ++benchmark) {
for (int i = 0; i < TEST_COUNT; ++i) {
rwqResults[benchmark][i] = runBenchmark<ReaderWriterQueue<int>>((BenchmarkType)benchmark, randSeeds[benchmark], rwqOps[benchmark][i]);
}
#ifndef NO_SPSC_SUPPORT
for (int i = 0; i < TEST_COUNT; ++i) {
spscResults[benchmark][i] = runBenchmark<spsc_queue<int>>((BenchmarkType)benchmark, randSeeds[benchmark], spscOps[benchmark][i]);
}
#else
for (int i = 0; i < TEST_COUNT; ++i) {
spscResults[benchmark][i] = 0;
spscOps[benchmark][i] = 0;
}
#endif
#ifndef NO_FOLLY_SUPPORT
for (int i = 0; i < TEST_COUNT; ++i) {
follyResults[benchmark][i] = runBenchmark<ProducerConsumerQueue<int>>((BenchmarkType)benchmark, randSeeds[benchmark], follyOps[benchmark][i]);
}
#else
for (int i = 0; i < TEST_COUNT; ++i) {
follyResults[benchmark][i] = 0;
follyOps[benchmark][i] = 0;
}
#endif
}
// Sort results
for (int benchmark = 0; benchmark < BENCHMARK_COUNT; ++benchmark) {
std::sort(&rwqResults[benchmark][0], &rwqResults[benchmark][0] + TEST_COUNT);
std::sort(&spscResults[benchmark][0], &spscResults[benchmark][0] + TEST_COUNT);
std::sort(&follyResults[benchmark][0], &follyResults[benchmark][0] + TEST_COUNT);
}
// Display results
int max = std::max(2, (int)(TEST_COUNT * FASTEST_PERCENT_CONSIDERED / 100));
assert(max > 0);
#ifdef NO_SPSC_SUPPORT
std::cout << "Note: SPSC queue not supported on this platform, discount its timings" << std::endl;
#endif
#ifdef NO_FOLLY_SUPPORT
std::cout << "Note: Folly queue not supported by this compiler, discount its timings" << std::endl;
#endif
std::cout << std::setw(BENCHMARK_NAME_MAX) << " " << " |----------- Min ------------|------------ Max ------------|------------ Avg ------------|\n";
std::cout << std::left << std::setw(BENCHMARK_NAME_MAX) << "Benchmark" << " | RWQ | SPSC | Folly | RWQ | SPSC | Folly | RWQ | SPSC | Folly | xSPSC | xFolly\n";
std::cout.fill('-');
std::cout << std::setw(BENCHMARK_NAME_MAX) << "---------" << "-+---------+---------+---------+---------+---------+---------+---------+---------+---------+-------+-------\n";
std::cout.fill(' ');
double rwqOpsPerSec = 0, spscOpsPerSec = 0, follyOpsPerSec = 0;
int opTimedBenchmarks = 0;
for (int benchmark = 0; benchmark < BENCHMARK_COUNT; ++benchmark) {
double rwqMin = rwqResults[benchmark][0], rwqMax = rwqResults[benchmark][max - 1];
double spscMin = spscResults[benchmark][0], spscMax = spscResults[benchmark][max - 1];
double follyMin = follyResults[benchmark][0], follyMax = follyResults[benchmark][max - 1];
double rwqAvg = std::accumulate(&rwqResults[benchmark][0], &rwqResults[benchmark][0] + max, 0.0) / max;
double spscAvg = std::accumulate(&spscResults[benchmark][0], &spscResults[benchmark][0] + max, 0.0) / max;
double follyAvg = std::accumulate(&follyResults[benchmark][0], &follyResults[benchmark][0] + max, 0.0) / max;
double spscMult = rwqAvg < 0.00001 ? 0 : spscAvg / rwqAvg;
double follyMult = follyAvg < 0.00001 ? 0 : follyAvg / rwqAvg;
if (rwqResults[benchmark][0] != -1) {
double rwqTotalAvg = std::accumulate(&rwqResults[benchmark][0], &rwqResults[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT;
double spscTotalAvg = std::accumulate(&spscResults[benchmark][0], &spscResults[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT;
double follyTotalAvg = std::accumulate(&follyResults[benchmark][0], &follyResults[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT;
rwqOpsPerSec += rwqTotalAvg == 0 ? 0 : std::accumulate(&rwqOps[benchmark][0], &rwqOps[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT / rwqTotalAvg;
spscOpsPerSec += spscTotalAvg == 0 ? 0 : std::accumulate(&spscOps[benchmark][0], &spscOps[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT / spscTotalAvg;
follyOpsPerSec += follyTotalAvg == 0 ? 0 : std::accumulate(&follyOps[benchmark][0], &follyOps[benchmark][0] + TEST_COUNT, 0.0) / TEST_COUNT / follyTotalAvg;
++opTimedBenchmarks;
}
std::cout
<< std::left << std::setw(BENCHMARK_NAME_MAX) << benchmarkName((BenchmarkType)benchmark) << " | "
<< std::fixed << std::setprecision(4) << rwqMin << "s | "
<< std::fixed << std::setprecision(4) << spscMin << "s | "
<< std::fixed << std::setprecision(4) << follyMin << "s | "
<< std::fixed << std::setprecision(4) << rwqMax << "s | "
<< std::fixed << std::setprecision(4) << spscMax << "s | "
<< std::fixed << std::setprecision(4) << follyMax << "s | "
<< std::fixed << std::setprecision(4) << rwqAvg << "s | "
<< std::fixed << std::setprecision(4) << spscAvg << "s | "
<< std::fixed << std::setprecision(4) << follyAvg << "s | "
<< std::fixed << std::setprecision(2) << spscMult << "x | "
<< std::fixed << std::setprecision(2) << follyMult << "x"
<< "\n"
;
}
rwqOpsPerSec /= opTimedBenchmarks;
spscOpsPerSec /= opTimedBenchmarks;
follyOpsPerSec /= opTimedBenchmarks;
std::cout
<< "\nAverage ops/s:\n"
<< " ReaderWriterQueue: " << std::fixed << std::setprecision(2) << rwqOpsPerSec / 1000000 << " million\n"
<< " SPSC queue: " << std::fixed << std::setprecision(2) << spscOpsPerSec / 1000000 << " million\n"
<< " Folly queue: " << std::fixed << std::setprecision(2) << follyOpsPerSec / 1000000 << " million\n"
;
std::cout << std::endl;
return 0;
}
template<typename TQueue>
double runBenchmark(BenchmarkType benchmark, unsigned int randomSeed, double& out_Ops)
{
typedef unsigned long long counter_t;
SystemTime start;
double result = 0;
volatile int forceNoOptimizeDummy;
switch (benchmark) {
case bench_raw_add: {
const counter_t MAX = 100 * 1000;
out_Ops = MAX;
TQueue q(MAX);
int num = 0;
start = getSystemTime();
for (counter_t i = 0; i != MAX; ++i) {
q.enqueue(num);
++num;
}
result = getTimeDelta(start);
int temp = -1;
q.try_dequeue(temp);
forceNoOptimizeDummy = temp;
} break;
case bench_raw_remove: {
const counter_t MAX = 100 * 1000;
out_Ops = MAX;
TQueue q(MAX);
int num = 0;
for (counter_t i = 0; i != MAX; ++i) {
q.enqueue(num);
++num;
}
int element = -1;
int total = 0;
num = 0;
start = getSystemTime();
for (counter_t i = 0; i != MAX; ++i) {
bool success = q.try_dequeue(element);
assert(success && num++ == element);
UNUSED(success);
total += element;
}
result = getTimeDelta(start);
assert(!q.try_dequeue(element));
forceNoOptimizeDummy = total;
} break;
case bench_empty_remove: {
const counter_t MAX = 2000 * 1000;
out_Ops = MAX;
TQueue q(MAX);
int total = 0;
start = getSystemTime();
SimpleThread consumer([&]() {
int element;
for (counter_t i = 0; i != MAX; ++i) {
if (q.try_dequeue(element)) {
total += element;
}
}
});
SimpleThread producer([&]() {
int num = 0;
for (counter_t i = 0; i != MAX / 2; ++i) {
if ((i & 32767) == 0) { // Just to make sure the loops aren't optimized out entirely
q.enqueue(num);
++num;
}
}
});
producer.join();
consumer.join();
result = getTimeDelta(start);
forceNoOptimizeDummy = total;
} break;
case bench_single_threaded: {
const counter_t MAX = 200 * 1000;
out_Ops = MAX;
RNG_t rng(randomSeed);
std::uniform_int_distribution<int> rand(0, 1);
TQueue q(MAX);
int num = 0;
int element = -1;
start = getSystemTime();
for (counter_t i = 0; i != MAX; ++i) {
if (rand(rng) == 1) {
q.enqueue(num);
++num;
}
else {
q.try_dequeue(element);
}
}
result = getTimeDelta(start);
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
} break;
case bench_mostly_add: {
const counter_t MAX = 1200 * 1000;
out_Ops = MAX;
int readOps = 0;
RNG_t rng(randomSeed);
std::uniform_int_distribution<int> rand(0, 3);
TQueue q(MAX);
int element = -1;
start = getSystemTime();
SimpleThread consumer([&]() {
for (counter_t i = 0; i != MAX / 10; ++i) {
if (rand(rng) == 0) {
q.try_dequeue(element);
++readOps;
}
}
});
SimpleThread producer([&]() {
int num = 0;
for (counter_t i = 0; i != MAX; ++i) {
q.enqueue(num);
++num;
}
});
producer.join();
consumer.join();
result = getTimeDelta(start);
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
out_Ops += readOps;
} break;
case bench_mostly_remove: {
const counter_t MAX = 1200 * 1000;
out_Ops = MAX;
int writeOps = 0;
RNG_t rng(randomSeed);
std::uniform_int_distribution<int> rand(0, 3);
TQueue q(MAX);
int element = -1;
start = getSystemTime();
SimpleThread consumer([&]() {
for (counter_t i = 0; i != MAX; ++i) {
q.try_dequeue(element);
}
});
SimpleThread producer([&]() {
int num = 0;
for (counter_t i = 0; i != MAX / 10; ++i) {
if (rand(rng) == 0) {
q.enqueue(num);
++num;
}
}
writeOps = num;
});
producer.join();
consumer.join();
result = getTimeDelta(start);
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
out_Ops += writeOps;
} break;
case bench_heavy_concurrent: {
const counter_t MAX = 1000 * 1000;
out_Ops = MAX * 2;
TQueue q(MAX);
int element = -1;
start = getSystemTime();
SimpleThread consumer([&]() {
for (counter_t i = 0; i != MAX; ++i) {
q.try_dequeue(element);
}
});
SimpleThread producer([&]() {
int num = 0;
for (counter_t i = 0; i != MAX; ++i) {
q.enqueue(num);
++num;
}
});
producer.join();
consumer.join();
result = getTimeDelta(start);
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
} break;
case bench_random_concurrent: {
const counter_t MAX = 800 * 1000;
int readOps = 0, writeOps = 0;
TQueue q(MAX);
int element = -1;
start = getSystemTime();
SimpleThread consumer([&]() {
RNG_t rng(randomSeed);
std::uniform_int_distribution<int> rand(0, 15);
for (counter_t i = 0; i != MAX; ++i) {
if (rand(rng) == 0) {
q.try_dequeue(element);
++readOps;
}
}
});
SimpleThread producer([&]() {
RNG_t rng(randomSeed * 3 - 1);
std::uniform_int_distribution<int> rand(0, 15);
int num = 0;
for (counter_t i = 0; i != MAX; ++i) {
if (rand(rng) == 0) {
q.enqueue(num);
++num;
}
}
writeOps = num;
});
producer.join();
consumer.join();
result = getTimeDelta(start);
forceNoOptimizeDummy = (int)(q.try_dequeue(element));
out_Ops = readOps + writeOps;
} break;
default:
assert(false);
out_Ops = 0;
return 0;
}
UNUSED(forceNoOptimizeDummy);
return result / 1000.0;
}
const char* benchmarkName(BenchmarkType benchmark)
{
switch (benchmark) {
case bench_raw_add: return "Raw add";
case bench_raw_remove: return "Raw remove";
case bench_empty_remove: return "Raw empty remove";
case bench_single_threaded: return "Single-threaded";
case bench_mostly_add: return "Mostly add";
case bench_mostly_remove: return "Mostly remove";
case bench_heavy_concurrent: return "Heavy concurrent";
case bench_random_concurrent: return "Random concurrent";
default: return "";
}
}