|
| 1 | +#!/usr/bin/env ruby |
| 2 | +# frozen_string_literal: true |
| 3 | + |
| 4 | +# This helps benchmark current performance of Dalli |
| 5 | +# as well as compare performance of optimizated and non-optimized calls like multi-set vs set |
| 6 | +# |
| 7 | +# run with: |
| 8 | +# bundle exec bin/benchmark |
| 9 | +# RUBY_YJIT_ENABLE=1 BENCH_TARGET=get bundle exec bin/benchmark |
| 10 | +require 'bundler/inline' |
| 11 | +require 'json' |
| 12 | + |
| 13 | +gemfile do |
| 14 | + source 'https://rubygems.org' |
| 15 | + gem 'benchmark-ips' |
| 16 | + gem 'logger' |
| 17 | +end |
| 18 | + |
| 19 | +require_relative '../lib/dalli' |
| 20 | +require 'benchmark/ips' |
| 21 | +require 'monitor' |
| 22 | + |
| 23 | +## |
| 24 | +# StringSerializer is a serializer that avoids the overhead of Marshal or JSON. |
| 25 | +## |
| 26 | +class StringSerializer |
| 27 | + def self.dump(value) |
| 28 | + value |
| 29 | + end |
| 30 | + |
| 31 | + def self.load(value) |
| 32 | + value |
| 33 | + end |
| 34 | +end |
| 35 | + |
| 36 | +dalli_url = ENV['BENCH_CACHE_URL'] || '127.0.0.1:11211' |
| 37 | +bench_target = ENV['BENCH_TARGET'] || 'set' |
| 38 | +bench_time = (ENV['BENCH_TIME'] || 10).to_i |
| 39 | +bench_warmup = (ENV['BENCH_WARMUP'] || 3).to_i |
| 40 | +bench_payload_size = (ENV['BENCH_PAYLOAD_SIZE'] || 700_000).to_i |
| 41 | +payload = 'B' * bench_payload_size |
| 42 | +TERMINATOR = "\r\n" |
| 43 | +puts "yjit: #{RubyVM::YJIT.enabled?}" |
| 44 | + |
| 45 | +client = Dalli::Client.new(dalli_url, serializer: StringSerializer, compress: false, raw: true) |
| 46 | +multi_client = Dalli::Client.new('localhost:11211,localhost:11222', serializer: StringSerializer, compress: false, |
| 47 | + raw: true) |
| 48 | + |
| 49 | +# The raw socket implementation is used to benchmark the performance of dalli & the overhead of the various abstractions |
| 50 | +# in the library. |
| 51 | +sock = TCPSocket.new('127.0.0.1', '11211', connect_timeout: 1) |
| 52 | +sock.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, true) |
| 53 | +sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, true) |
| 54 | +# Benchmarks didn't see any performance gains from increasing the SO_RCVBUF buffer size |
| 55 | +# sock.setsockopt(Socket::SOL_SOCKET, ::Socket::SO_RCVBUF, 1024 * 1024 * 8) |
| 56 | +# Benchamrks did see an improvement in performance when increasing the SO_SNDBUF buffer size |
| 57 | +# sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDBUF, 1024 * 1024 * 8) |
| 58 | + |
| 59 | +# ensure the clients are all connected and working |
| 60 | +client.set('key', payload) |
| 61 | +multi_client.set('multi_key', payload) |
| 62 | +sock.write("set sock_key 0 3600 #{payload.bytesize}\r\n") |
| 63 | +sock.write(payload) |
| 64 | +sock.write(TERMINATOR) |
| 65 | +sock.flush |
| 66 | +sock.readline # clear the buffer |
| 67 | + |
| 68 | +raise 'dalli client mismatch' if payload != client.get('key') |
| 69 | + |
| 70 | +raise 'multi dalli client mismatch' if payload != multi_client.get('multi_key') |
| 71 | + |
| 72 | +sock.write("mg sock_key v\r\n") |
| 73 | +sock.readline |
| 74 | +sock_value = sock.read(payload.bytesize) |
| 75 | +sock.read(TERMINATOR.bytesize) |
| 76 | +raise 'sock mismatch' if payload != sock_value |
| 77 | + |
| 78 | +# ensure we have basic data for the benchmarks and get calls |
| 79 | +payload_smaller = 'B' * (bench_payload_size / 10) |
| 80 | +pairs = {} |
| 81 | +100.times do |i| |
| 82 | + pairs["multi_#{i}"] = payload_smaller |
| 83 | +end |
| 84 | +client.quiet do |
| 85 | + pairs.each do |key, value| |
| 86 | + client.set(key, value, 3600, raw: true) |
| 87 | + end |
| 88 | +end |
| 89 | + |
| 90 | +### |
| 91 | +# GC Suite |
| 92 | +# benchmark without GC skewing things |
| 93 | +### |
| 94 | +class GCSuite |
| 95 | + def warming(*) |
| 96 | + run_gc |
| 97 | + end |
| 98 | + |
| 99 | + def running(*) |
| 100 | + run_gc |
| 101 | + end |
| 102 | + |
| 103 | + def warmup_stats(*); end |
| 104 | + |
| 105 | + def add_report(*); end |
| 106 | + |
| 107 | + private |
| 108 | + |
| 109 | + def run_gc |
| 110 | + GC.enable |
| 111 | + GC.start |
| 112 | + GC.disable |
| 113 | + end |
| 114 | +end |
| 115 | +suite = GCSuite.new |
| 116 | + |
| 117 | +# rubocop:disable Metrics/MethodLength |
| 118 | +# rubocop:disable Metrics/PerceivedComplexity |
| 119 | +# rubocop:disable Metrics/AbcSize |
| 120 | +# rubocop:disable Metrics/CyclomaticComplexity |
| 121 | +def sock_get_multi(sock, pairs) |
| 122 | + count = pairs.length |
| 123 | + pairs.each_key do |key| |
| 124 | + count -= 1 |
| 125 | + tail = count.zero? ? '' : 'q' |
| 126 | + sock.write("mg #{key} v f k #{tail}\r\n") |
| 127 | + end |
| 128 | + sock.flush |
| 129 | + # read all the memcached responses back and build a hash of key value pairs |
| 130 | + results = {} |
| 131 | + last_result = false |
| 132 | + while (line = sock.readline.chomp!(TERMINATOR)) != '' |
| 133 | + last_result = true if line.start_with?('EN ') |
| 134 | + next unless line.start_with?('VA ') || last_result |
| 135 | + |
| 136 | + _, value_length, _flags, key = line.split |
| 137 | + results[key[1..]] = sock.read(value_length.to_i) |
| 138 | + sock.read(TERMINATOR.length) |
| 139 | + break if results.size == pairs.size |
| 140 | + break if last_result |
| 141 | + end |
| 142 | + results |
| 143 | +end |
| 144 | +# rubocop:enable Metrics/MethodLength |
| 145 | +# rubocop:enable Metrics/PerceivedComplexity |
| 146 | +# rubocop:enable Metrics/AbcSize |
| 147 | +# rubocop:enable Metrics/CyclomaticComplexity |
| 148 | + |
| 149 | +if %w[all set].include?(bench_target) |
| 150 | + Benchmark.ips do |x| |
| 151 | + x.config(warmup: bench_warmup, time: bench_time, suite: suite) |
| 152 | + x.report('client set') { client.set('key', payload) } |
| 153 | + # x.report('multi client set') { multi_client.set('string_key', payload) } |
| 154 | + x.report('raw sock set') do |
| 155 | + sock.write("ms sock_key #{payload.bytesize} T3600 MS\r\n") |
| 156 | + sock.write(payload) |
| 157 | + sock.write("\r\n") |
| 158 | + sock.flush |
| 159 | + sock.readline # clear the buffer |
| 160 | + end |
| 161 | + x.compare! |
| 162 | + end |
| 163 | +end |
| 164 | + |
| 165 | +@lock = Monitor.new |
| 166 | +if %w[all get].include?(bench_target) |
| 167 | + Benchmark.ips do |x| |
| 168 | + x.config(warmup: bench_warmup, time: bench_time, suite: suite) |
| 169 | + x.report('get dalli') do |
| 170 | + result = client.get('key') |
| 171 | + raise 'mismatch' unless result == payload |
| 172 | + end |
| 173 | + # NOTE: while this is the fastest it is not thread safe and is blocking vs IO sharing friendly |
| 174 | + x.report('get sock') do |
| 175 | + sock.write("mg sock_key v\r\n") |
| 176 | + sock.readline |
| 177 | + result = sock.read(payload.bytesize) |
| 178 | + sock.read(TERMINATOR.bytesize) |
| 179 | + raise 'mismatch' unless result == payload |
| 180 | + end |
| 181 | + # NOTE: This shows that when adding thread safety & non-blocking IO we are slower for single process/thread use case |
| 182 | + x.report('get sock non-blocking') do |
| 183 | + @lock.synchronize do |
| 184 | + sock.write("mg sock_key v\r\n") |
| 185 | + sock.readline |
| 186 | + count = payload.bytesize |
| 187 | + value = String.new(capacity: count + 1) |
| 188 | + loop do |
| 189 | + begin |
| 190 | + value << sock.read_nonblock(count - value.bytesize) |
| 191 | + rescue Errno::EAGAIN |
| 192 | + sock.wait_readable |
| 193 | + retry |
| 194 | + rescue EOFError |
| 195 | + puts 'EOFError' |
| 196 | + break |
| 197 | + end |
| 198 | + break if value.bytesize == count |
| 199 | + end |
| 200 | + sock.read(TERMINATOR.bytesize) |
| 201 | + raise 'mismatch' unless value == payload |
| 202 | + end |
| 203 | + end |
| 204 | + x.compare! |
| 205 | + end |
| 206 | +end |
| 207 | + |
| 208 | +if %w[all get_multi].include?(bench_target) |
| 209 | + Benchmark.ips do |x| |
| 210 | + x.config(warmup: bench_warmup, time: bench_time, suite: suite) |
| 211 | + x.report('get 100 keys') do |
| 212 | + result = client.get_multi(pairs.keys) |
| 213 | + raise 'mismatch' unless result == pairs |
| 214 | + end |
| 215 | + x.report('get 100 keys raw sock') do |
| 216 | + result = sock_get_multi(sock, pairs) |
| 217 | + raise 'mismatch' unless result == pairs |
| 218 | + end |
| 219 | + x.compare! |
| 220 | + end |
| 221 | +end |
| 222 | + |
| 223 | +if %w[all set_multi].include?(bench_target) |
| 224 | + Benchmark.ips do |x| |
| 225 | + x.config(warmup: bench_warmup, time: bench_time, suite: suite) |
| 226 | + x.report('write 100 keys simple') do |
| 227 | + client.quiet do |
| 228 | + pairs.each do |key, value| |
| 229 | + client.set(key, value, 3600, raw: true) |
| 230 | + end |
| 231 | + end |
| 232 | + end |
| 233 | + # TODO: uncomment this once we add PR adding set_multi |
| 234 | + # x.report('multi client set_multi 100') do |
| 235 | + # multi_client.set_multi(pairs, 3600, raw: true) |
| 236 | + # end |
| 237 | + x.report('write 100 keys rawsock') do |
| 238 | + count = pairs.length |
| 239 | + tail = '' |
| 240 | + value_bytesize = payload_smaller.bytesize |
| 241 | + ttl = 3600 |
| 242 | + |
| 243 | + pairs.each do |key, value| |
| 244 | + count -= 1 |
| 245 | + tail = count.zero? ? '' : 'q' |
| 246 | + sock.write(String.new("ms #{key} #{value_bytesize} c F0 T#{ttl} MS #{tail}\r\n", |
| 247 | + capacity: key.size + value_bytesize + 40) << value << TERMINATOR) |
| 248 | + end |
| 249 | + sock.flush |
| 250 | + sock.gets(TERMINATOR) # clear the buffer |
| 251 | + end |
| 252 | + # x.report('write_mutli 100 keys') { client.set_multi(pairs, 3600, raw: true) } |
| 253 | + x.compare! |
| 254 | + end |
| 255 | +end |
0 commit comments