diff --git a/Gemfile.lock b/Gemfile.lock index 39079c4..ef24419 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - graphql-hive (0.3.4) + graphql-hive (0.4.0) graphql (>= 2.3, < 3) GEM diff --git a/README.md b/README.md index 720b5e5..9e3433c 100644 --- a/README.md +++ b/README.md @@ -146,26 +146,39 @@ class MySchema < GraphQL::Schema use( GraphQL::Hive, { + # mandatory token: 'YOUR-TOKEN', - collect_usage: true, # optional - report_schema: true, # optional - enabled: true, # Enable/Disable Hive Client (optional) + + # optional + enabled: true, # enable/disable Hive Client debug: false, # verbose logs - logger: MyLogger.new, # optional - endpoint: 'app.graphql-hive.com', # optional - port: 80, # optional + logger: MyLogger.new, + endpoint: 'app.graphql-hive.com', + port: 80, buffer_size: 50, # forward the operations data to Hive every 50 requests - collect_usage_sampling: 1.0, - reporting: { # mandatory if `report_schema: true` - # mandatory member of `reporting` + + collect_usage: true, # report usage to Hive + collect_usage_sampling: { + # optional members of `collect_usage_sampling` + sample_rate: 0.5, # % of operations reported + sampler: proc { |context| context.operation_name.includes?('someQuery') 1 : 0.5 }, # assign custom sampling rates (overrides `sampling rate`) + at_least_once: true, # sample every distinct operation at least once + key_generator: proc { |context| context.operation_name } # assign custom keys to distinguish between distinct operations + } + + report_schema: true, # publish schema to Hive + # mandatory if `report_schema: true` + reporting: { + # mandatory members of `reporting` author: 'Author of the latest change', - # mandatory member of `reporting` commit: 'git sha or any identifier', - service_name: '', # optional - service_url: '', # optional + # optional members of `reporting + service_name: '', + service_url: '', }, - # you can pass an optional proc that will help identify the client (ex: Apollo web app) that performed the query - client_info: Proc.new { |context| { name: context.client_name, version: context.client_version } } + + # pass an optional proc to client_info to help identify the client (ex: Apollo web app) that performed the query + client_info: proc { |context| { name: context.client_name, version: context.client_version } } } ) @@ -174,6 +187,8 @@ class MySchema < GraphQL::Schema end ``` +See default options for the optional parameters [here](https://github.com/charlypoly/graphql-ruby-hive/blob/01407d8fed80912a7006fee503bf2967fa20a79c/lib/graphql-hive.rb#L53). +
**A note on `buffer_size` and performances** diff --git a/k6/graphql-api/Gemfile.lock b/k6/graphql-api/Gemfile.lock index 2dcc4b1..c018f7e 100644 --- a/k6/graphql-api/Gemfile.lock +++ b/k6/graphql-api/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: ../.. specs: - graphql-hive (0.3.4) + graphql-hive (0.4.0) graphql (>= 2.3, < 3) GEM diff --git a/lib/graphql-hive.rb b/lib/graphql-hive.rb index b4641b5..681e473 100644 --- a/lib/graphql-hive.rb +++ b/lib/graphql-hive.rb @@ -7,31 +7,9 @@ require 'graphql-hive/usage_reporter' require 'graphql-hive/client' -# class MySchema < GraphQL::Schema -# use( -# GraphQL::Hive, -# { -# token: 'YOUR-TOKEN', -# collect_usage: true, -# report_schema: true, -# enabled: true, // Enable/Disable Hive Client -# debug: true, // Debugging mode -# logger: MyLogger.new, -# endpoint: 'app.graphql-hive.com', -# port: 80, -# reporting: { -# author: 'Author of the latest change', -# commit: 'git sha or any identifier', -# service_name: '', -# service_url: '', -# }, -# client_info: Proc.new { |context| { name: context.client_name, version: context.client_version } } -# } -# ) -# -# # ... -# -# end +require 'graphql-hive/sampler' +require 'graphql-hive/sampling/basic_sampler' +require 'graphql-hive/sampling/dynamic_sampler' module GraphQL # GraphQL Hive usage collector and schema reporter @@ -116,10 +94,7 @@ def platform_trace(platform_key, _key, data) elapsed = ending - starting duration = (elapsed.to_f * (10**9)).to_i - # rubocop:disable Layout/LineLength - report_usage(timestamp, queries, results, duration) if !queries.empty? && SecureRandom.random_number <= @options[:collect_usage_sampling] - # rubocop:enable Layout/LineLength - + report_usage(timestamp, queries, results, duration) unless queries.empty? results else yield diff --git a/lib/graphql-hive/sampler.rb b/lib/graphql-hive/sampler.rb new file mode 100644 index 0000000..e1b1bb1 --- /dev/null +++ b/lib/graphql-hive/sampler.rb @@ -0,0 +1,40 @@ +# frozen_string_literal: true + +module GraphQL + class Hive < GraphQL::Tracing::PlatformTracing + # Sampler instance for usage reporter + class Sampler + def initialize(sampling_options, logger = nil) + # backwards compatibility with old `collect_usage_sampling` field + if sampling_options.is_a?(Numeric) + logger&.warn( + '`collect_usage_sampling` is deprecated for fixed sampling rates, ' \ + 'use `collect_usage_sampling: { sample_rate: XX }` instead' + ) + passed_sampling_rate = sampling_options + sampling_options = { sample_rate: passed_sampling_rate } + end + + sampling_options ||= {} + + @sampler = if sampling_options[:sampler] + Sampling::DynamicSampler.new( + sampling_options[:sampler], + sampling_options[:at_least_once], + sampling_options[:key_generator] + ) + else + Sampling::BasicSampler.new( + sampling_options[:sample_rate], + sampling_options[:at_least_once], + sampling_options[:key_generator] + ) + end + end + + def sample?(operation) + @sampler.sample?(operation) + end + end + end +end diff --git a/lib/graphql-hive/sampling/basic_sampler.rb b/lib/graphql-hive/sampling/basic_sampler.rb new file mode 100644 index 0000000..29a0e44 --- /dev/null +++ b/lib/graphql-hive/sampling/basic_sampler.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'graphql-hive/sampling/sampling_context' + +module GraphQL + class Hive + module Sampling + # Basic sampling for operations reporting + class BasicSampler + include GraphQL::Hive::Sampling::SamplingContext + + def initialize(client_sample_rate, at_least_once, key_generator) + @sample_rate = client_sample_rate || 1 + @tracked_operations = {} + @key_generator = key_generator || DEFAULT_SAMPLE_KEY if at_least_once + end + + def sample?(operation) + if @key_generator + sample_context = get_sample_context(operation) + operation_key = @key_generator.call(sample_context) + + unless @tracked_operations.key?(operation_key) + @tracked_operations[operation_key] = true + return true + end + end + + SecureRandom.random_number <= @sample_rate + end + end + end + end +end diff --git a/lib/graphql-hive/sampling/dynamic_sampler.rb b/lib/graphql-hive/sampling/dynamic_sampler.rb new file mode 100644 index 0000000..efe2201 --- /dev/null +++ b/lib/graphql-hive/sampling/dynamic_sampler.rb @@ -0,0 +1,34 @@ +# frozen_string_literal: true + +require 'graphql-hive/sampling/sampling_context' + +module GraphQL + class Hive + module Sampling + # Dynamic sampling for operations reporting + class DynamicSampler + include GraphQL::Hive::Sampling::SamplingContext + + def initialize(client_sampler, at_least_once, key_generator) + @sampler = client_sampler + @tracked_operations = {} + @key_generator = key_generator || DEFAULT_SAMPLE_KEY if at_least_once + end + + def sample?(operation) + sample_context = get_sample_context(operation) + + if @key_generator + operation_key = @key_generator.call(sample_context) + unless @tracked_operations.key?(operation_key) + @tracked_operations[operation_key] = true + return true + end + end + + SecureRandom.random_number <= @sampler.call(sample_context) + end + end + end + end +end diff --git a/lib/graphql-hive/sampling/sampling_context.rb b/lib/graphql-hive/sampling/sampling_context.rb new file mode 100644 index 0000000..7d14ebe --- /dev/null +++ b/lib/graphql-hive/sampling/sampling_context.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module GraphQL + class Hive + module Sampling + # Helper methods for sampling + module SamplingContext + private + + DEFAULT_SAMPLE_KEY = proc { |sample_context| + md5 = Digest::MD5.new + md5.update sample_context[:document].to_query_string + md5.hexdigest + } + + def get_sample_context(operation) + _, queries, results, = operation + + operation_name = queries.map(&:operations).map(&:keys).flatten.compact.join(', ') + + parsed_definitions = [] + queries.each do |query| + query_document = query.document + parsed_definitions.concat(query_document.definitions) if query_document + end + document = GraphQL::Language::Nodes::Document.new(definitions: parsed_definitions) + + context_value = results[0].query.context + + { + operation_name: operation_name, + document: document, + context_value: context_value + } + end + end + end + end +end diff --git a/lib/graphql-hive/usage_reporter.rb b/lib/graphql-hive/usage_reporter.rb index a6b0b50..cc6147a 100644 --- a/lib/graphql-hive/usage_reporter.rb +++ b/lib/graphql-hive/usage_reporter.rb @@ -10,11 +10,6 @@ class Hive < GraphQL::Tracing::PlatformTracing class UsageReporter @@instance = nil - @queue = nil - @thread = nil - @operations_buffer = nil - @client = nil - def self.instance @@instance end @@ -28,6 +23,8 @@ def initialize(options, client) @options_mutex = Mutex.new @queue = Queue.new + @sampler = Sampler.new(options[:collect_usage_sampling], options[:logger]) # NOTE: logs for deprecated field + start_thread end @@ -55,8 +52,9 @@ def start_thread @thread = Thread.new do buffer = [] while (operation = @queue.pop(false)) - @options[:logger].debug("add operation to buffer: #{operation}") - buffer << operation + @options[:logger].debug("processing operation from queue: #{operation}") + buffer << operation if @sampler.sample?(operation) + @options_mutex.synchronize do if buffer.size >= @options[:buffer_size] @options[:logger].debug('buffer is full, sending!') @@ -65,6 +63,7 @@ def start_thread end end end + unless buffer.empty? @options[:logger].debug('shuting down with buffer, sending!') process_operations(buffer) diff --git a/lib/graphql-hive/version.rb b/lib/graphql-hive/version.rb index 2198442..49e8915 100644 --- a/lib/graphql-hive/version.rb +++ b/lib/graphql-hive/version.rb @@ -2,6 +2,6 @@ module Graphql module Hive - VERSION = '0.3.4' + VERSION = '0.4.0' end end diff --git a/spec/graphql/graphql-hive/sampler/basic_sampler_spec.rb b/spec/graphql/graphql-hive/sampler/basic_sampler_spec.rb new file mode 100644 index 0000000..9809ed2 --- /dev/null +++ b/spec/graphql/graphql-hive/sampler/basic_sampler_spec.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GraphQL::Hive::Sampling::BasicSampler do + let(:sampler_instance) { described_class.new(sample_rate, at_least_once, key_generator) } + let(:sample_rate) { 0 } + let(:at_least_once) { false } + let(:key_generator) { nil } + + describe '#initialize' do + it 'sets the sample rate' do + expect(sampler_instance.instance_variable_get(:@sample_rate)).to eq(0) + end + end + + describe '#sample?' do + let(:schema) { GraphQL::Schema.from_definition('type Query { test: String }') } + let(:timestamp) { 1_720_705_946_333 } + let(:queries) { [GraphQL::Query.new(schema, query: '{ test }', context: { header: 'value' })] } + let(:results) { [GraphQL::Query::Result.new(query: queries.first, values: { 'data' => { 'test' => 'test' } })] } + let(:duration) { 100 } + let(:operation) { [timestamp, queries, results, duration] } + + it 'follows the sample rate for all operations' do + expect(sampler_instance.sample?(operation)).to eq(false) + end + + context 'with at least once sampling' do + let(:at_least_once) { true } + + it 'returns true for the first operation, then follows the sample rate for remaining operations' do + expect(sampler_instance.sample?(operation)).to eq(true) + expect(sampler_instance.sample?(operation)).to eq(false) + end + + context 'when provided a custom key generator' do + let(:key_generator) { proc { |_sample_context| 'same_key' } } + + it 'tracks operations by their custom keys' do + expect(sampler_instance.sample?(operation)).to eq(true) + + queries = [GraphQL::Query.new(schema, query: '{ something_else }')] + different_operation = [timestamp, queries, results, duration] + + expect(sampler_instance.sample?(different_operation)).to eq(false) + end + end + end + end +end diff --git a/spec/graphql/graphql-hive/sampler/dynamic_sampler_spec.rb b/spec/graphql/graphql-hive/sampler/dynamic_sampler_spec.rb new file mode 100644 index 0000000..4b4c282 --- /dev/null +++ b/spec/graphql/graphql-hive/sampler/dynamic_sampler_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GraphQL::Hive::Sampling::DynamicSampler do + let(:sampler_instance) { described_class.new(sampler, at_least_once, key_generator) } + let(:sampler) { proc { |_sample_context| 0 } } + let(:at_least_once) { false } + let(:key_generator) { nil } + + describe '#initialize' do + it 'sets the sampler and tracked operations hash' do + expect(sampler_instance.instance_variable_get(:@sampler)).to eq(sampler) + expect(sampler_instance.instance_variable_get(:@tracked_operations)).to eq({}) + end + end + + describe '#sample?' do + let(:schema) { GraphQL::Schema.from_definition('type Query { test: String }') } + let(:timestamp) { 1_720_705_946_333 } + let(:queries) { [GraphQL::Query.new(schema, query: '{ test }')] } + let(:results) { [GraphQL::Query::Result.new(query: queries.first, values: { 'data' => { 'test' => 'test' } })] } + let(:duration) { 100 } + let(:operation) { [timestamp, queries, results, duration] } + + it 'follows the sampler for all operations' do + expect(sampler_instance.sample?(operation)).to eq(false) + end + + context 'when the sampler does not return a number' do + let(:sampler) { proc { |_sample_context| 'not a number' } } + + it 'raises an error' do + expect { sampler_instance.sample?(operation) }.to raise_error(ArgumentError) + end + end + + context 'with at least once sampling' do + let(:at_least_once) { true } + + it 'returns true for the first operation, then follows the sampler for remaining operations' do + expect(sampler_instance.sample?(operation)).to eq(true) + expect(sampler_instance.sample?(operation)).to eq(false) + end + + context 'when provided a custom key generator' do + let(:key_generator) { proc { |_sample_context| 'same_key' } } + + it 'tracks operations by their custom keys' do + expect(sampler_instance.sample?(operation)).to eq(true) + + queries = [GraphQL::Query.new(schema, query: '{ something_else }')] + different_operation = [timestamp, queries, results, duration] + + expect(sampler_instance.sample?(different_operation)).to eq(false) + end + end + end + end +end diff --git a/spec/graphql/graphql-hive/sampler_spec.rb b/spec/graphql/graphql-hive/sampler_spec.rb new file mode 100644 index 0000000..20ff0b1 --- /dev/null +++ b/spec/graphql/graphql-hive/sampler_spec.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe GraphQL::Hive::Sampler do + let(:sampler_instance) { described_class.new(sampling_options, logger) } + let(:sampling_options) { nil } + let(:logger) { instance_double('Logger') } + + describe '#initialize' do + it 'creates a basic sampler' do + expect(sampler_instance.instance_variable_get(:@sampler)).to be_an_instance_of(GraphQL::Hive::Sampling::BasicSampler) + end + + context 'when provided a sampling rate' do + let(:sampling_options) { { sample_rate: 0.5 } } + + it 'creates a basic sampler' do + expect(sampler_instance.instance_variable_get(:@sampler)).to be_an_instance_of(GraphQL::Hive::Sampling::BasicSampler) + end + + context 'using the deprecated field' do + let(:sampling_options) { 1 } + + before do + allow(logger).to receive(:warn) + end + + it 'creates a basic sampler' do + expect(sampler_instance.instance_variable_get(:@sampler)).to be_an_instance_of(GraphQL::Hive::Sampling::BasicSampler) + end + + it 'logs a warning' do + sampler_instance + expect(logger).to have_received(:warn) + end + end + end + + context 'when provided a sampler' do + let(:sampling_options) { { sampler: proc {} } } + + it 'creates a dynamic sampler' do + expect(sampler_instance.instance_variable_get(:@sampler)).to be_an_instance_of(GraphQL::Hive::Sampling::DynamicSampler) + end + end + end +end diff --git a/spec/graphql/graphql-hive/usage_reporter_spec.rb b/spec/graphql/graphql-hive/usage_reporter_spec.rb index 72608bc..889e70d 100644 --- a/spec/graphql/graphql-hive/usage_reporter_spec.rb +++ b/spec/graphql/graphql-hive/usage_reporter_spec.rb @@ -3,125 +3,143 @@ require 'spec_helper' RSpec.describe GraphQL::Hive::UsageReporter do - let(:subject) { described_class.instance } - let(:client) { instance_double('Hive::Client') } - let(:options) do - { - logger: logger, - buffer_size: 1 - } - end + let(:usage_reporter_instance) { described_class.new(options, client) } + let(:options) { { logger: logger } } let(:logger) { instance_double('Logger') } + let(:client) { instance_double('Hive::Client') } + + let(:timestamp) { 1_720_705_946_333 } + let(:queries) { [] } + let(:results) { [] } + let(:duration) { 100_000 } + let(:operation) { [timestamp, queries, results, duration] } before do allow(logger).to receive(:warn) allow(logger).to receive(:debug) + allow(logger).to receive(:error) + end + + # NOTE: creating a new instance of usage_reporter starts a new thread, so we must call on_exit after each test to close the thread + + after do + usage_reporter_instance.on_exit end describe '#initialize' do it 'sets the instance' do - expect(described_class.instance).to eq(nil) - described_class.new(options, client) - expect(described_class.instance).to_not eq(nil) + expect(usage_reporter_instance.instance_variable_get(:@options)).to eq(options) + expect(usage_reporter_instance.instance_variable_get(:@client)).to eq(client) + + expect(usage_reporter_instance.instance_variable_get(:@options_mutex)).to be_an_instance_of(Mutex) + expect(usage_reporter_instance.instance_variable_get(:@queue)).to be_an_instance_of(Queue) + expect(usage_reporter_instance.instance_variable_get(:@sampler)).to be_an_instance_of(GraphQL::Hive::Sampler) end end describe '#add_operation' do it 'adds an operation to the queue' do operation = { operation: 'test' } - described_class.new(options, client) - subject.add_operation(operation) - expect(subject.instance_variable_get(:@queue).pop).to eq(operation) + usage_reporter_instance.add_operation(operation) + expect(usage_reporter_instance.instance_variable_get(:@queue).pop).to eq(operation) end end describe '#on_exit' do it 'closes the queue and joins the thread' do - described_class.new(options, client) - expect(subject.instance_variable_get(:@queue)).to receive(:close) - expect(subject.instance_variable_get(:@thread)).to receive(:join) - subject.on_exit + usage_reporter_instance = described_class.new(options, client) + + expect(usage_reporter_instance.instance_variable_get(:@queue)).to receive(:close) + expect(usage_reporter_instance.instance_variable_get(:@thread)).to receive(:join) + + usage_reporter_instance.on_exit end end describe '#on_start' do it 'starts the thread' do - described_class.new(options, client) - expect(subject).to receive(:start_thread) - subject.on_start + expect(usage_reporter_instance).to receive(:start_thread) + usage_reporter_instance.on_start end end describe '#start_thread' do it 'logs a warning if the thread is already alive' do - described_class.new(options, client) - subject.instance_variable_set(:@thread, Thread.new { p 'test' }) + usage_reporter_instance.instance_variable_set(:@thread, Thread.new do + # do nothing + end) expect(logger).to receive(:warn) - subject.on_start - end - end - - describe '#add_operation' do - let(:timestamp) { 1_720_705_946_333 } - let(:queries) { [] } - let(:results) { [] } - let(:duration) { 100_000 } - let(:operation) do - [timestamp, queries, results, duration] - end - let(:schema) do - GraphQL::Schema.from_definition('type Query { test: String }') - end - let(:query_string) { 'query TestingHive { test }' } - - it 'adds an operation to the buffer' do - described_class.new(options, client) - subject.add_operation(operation) - expect(subject.instance_variable_get(:@queue).pop).to eq(operation) + usage_reporter_instance.on_start end - context 'successful operation' do + context 'when configured with sampling' do let(:options) do - { logger: logger, buffer_size: 2 } - end - let(:queries) do - [GraphQL::Query.new(schema, query_string, variables: {})] - end - let(:results) do - [GraphQL::Query::Result.new(query: queries.first, values: { 'data' => { 'test' => 'test' } })] + { + logger: logger, + buffer_size: 1 + } end + let(:sampler_class) { class_double(GraphQL::Hive::Sampler).as_stubbed_const } + let(:sampler_instance) { instance_double('GraphQL::Hive::Sampler') } + before do + allow(sampler_class).to receive(:new).and_return(sampler_instance) allow(client).to receive(:send) end - it 'processes the operations if the buffer is full' do - described_class.new(options, client) - subject.add_operation(operation) - - # call process_operation with send - subject.send(:process_operations, [operation]) - expect(client).to have_received(:send).with( - '/usage', - { - map: { '8b8412ce86f3ea7accb931b1a5de335d' => - { - fields: %w[Query Query.test], - operation: "query TestingHive {\n test\n}", - operationName: 'TestingHive' - } }, - operations: [ - { - execution: { duration: 100_000, errors: [], errorsTotal: 0, ok: true }, - operationMapKey: '8b8412ce86f3ea7accb931b1a5de335d', - timestamp: 1_720_705_946_333 - } - ], - size: 1 - }, - :usage - ) + it 'reports the operation if it should be included' do + allow(sampler_instance).to receive(:sample?).and_return(true) + + expect(sampler_instance).to receive(:sample?).with(operation) + expect(client).to receive(:send) + + usage_reporter_instance.add_operation(operation) end + + it 'does not report the operation if it should not be included' do + allow(sampler_instance).to receive(:sample?).and_return(false) + + expect(sampler_instance).to receive(:sample?).with(operation) + expect(client).not_to receive(:send) + + usage_reporter_instance.add_operation(operation) + end + end + end + + describe '#process_operation' do + let(:schema) { GraphQL::Schema.from_definition('type Query { test: String }') } + let(:query_string) { 'query TestingHive { test }' } + let(:queries) { [GraphQL::Query.new(schema, query_string, variables: {})] } + let(:results) { [GraphQL::Query::Result.new(query: queries.first, values: { 'data' => { 'test' => 'test' } })] } + + before do + allow(client).to receive(:send) + end + + it 'processes and reports the operation to the client' do + usage_reporter_instance.send(:process_operations, [operation]) + expect(client).to have_received(:send).with( + '/usage', + { + map: { '8b8412ce86f3ea7accb931b1a5de335d' => + { + fields: %w[Query Query.test], + operation: "query TestingHive {\n test\n}", + operationName: 'TestingHive' + } }, + operations: [ + { + execution: { duration: 100_000, errors: [], errorsTotal: 0, ok: true }, + operationMapKey: '8b8412ce86f3ea7accb931b1a5de335d', + timestamp: 1_720_705_946_333 + } + ], + size: 1 + }, + :usage + ) end end end