Merge branch 'v0.8.0'

avito-tech · Dec 15, 2021 · afd073a · afd073a
2 parents c1b165d + 9301c95
commit afd073a
Show file tree

Hide file tree

Showing 26 changed files with 1,938 additions and 1,935 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,8 +1,40 @@
-# Release 0.7.1 #
+# Release 0.8.0 #
+**WARNING!** This release has **lots** of incompatible changes. Please make sure to read the section below before doing
+important changes in critical infrastructures.
 
-* fixed a bug when all snapshots were going to first thread, making this thread more busy than others bottlenecking aggregation
+## Incompatible changes ##
+* Consul support has been completely removed. Ensure to remove the consul section from your configuration files and use internal Raft.
+* Threading model received a big rework and should lead to lower memory consumption and lower latencies, especially on higher metric cardinalities. See docs/threading.md for more details.
+* Due to changes in threading:
+    * the following options are removed: `aggregation.mode`, `aggregation.threads`
+    * the following option has been added: `p-threads`
+    * the `c-threads` option is now `a-threads`
+* statsd sampling rate is now completely supported
+* The peer protocol and it's capnp schema is now considered only for internal use. It received a new version and is better structured for this purpose and internal
+metrics representation. Due to this changes:
+    * the new option `network.peer-protocol` has been added to specify an exact version
+    * version 2 **is the default value in 0.8.0**, please consider setting `network.peer-protocol = "1"` explicitly in client configs before upgrading
+    * version 1 will be removed in 0.9.0, all users are recommended to migrate to using v2 since it's release
+* diff-counter metric type has been deprecated to being unituitive and therefore avoided of being unused by anyone
+* bioynio's own internal metric - `egress` is now called `egress-carbon` which better points to it's real meaning
+
+See config.toml for further configuration instructions
+
+## Major changes ##
+* `.rate` aggregate has been added to show number of incoming values per second
+* new internal own metrics added - `egress-peer` - showing number of metrics sent to peers per second
+
+## Internal changes ##
+* the internal structure of metrics has changed to make a type system more helping
+* many libraries have their versions updated to latest major versions
+
+## Minor changes
+* bioyino can now be compiled with 32-bit floats instead of 64-bit. This may bring some storage economy at the price of
+lower precision
+* two new internal metrics has been added:
+    * `slow-q-len` - number of pending tasks in "slow" threadpool queue
+    * `ingress-metrics-peer` - number of metrics received via TCP
 
-
 # Release 0.7.0 #
 
 ## Incompatible changes ##

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "bioyino"
-version = "0.7.2"
+version = "0.8.0"
 authors = ["Sergey Noskov aka Albibek <[email protected]>"]
 description = "StatsD-compatible, high-performance, fault-tolerant metric aggregator"
 edition = "2018"
@@ -18,27 +18,30 @@ thiserror="^1.0"
 
 libc="^0.2"
 num_cpus="^1.8"
-futures="^0.1"
-tokio="^0.1"
+futures1 = { package = "futures", version = "^0.1" }
+tokio1 = { package = "tokio", version = "^0.1" }
 tokio-io="^0.1"
 #tokio-codec="^0.1"
 
 # new wave
-tokio2 = { package = "tokio", version = "^0.2", features = ["net", "time", "sync", "macros", "io-util", "io-driver", "rt-core", "rt-util", "rt-threaded"] }
-futures3 = { package = "futures", version = "^0.3", features = ["compat"] }
-hyper13 = { package = "hyper", version = "^0.13" }
-trust-dns-resolver = { version = "^0.19" }
+tokio = { version = "^1.5", features = ["net", "time", "sync", "macros", "rt", "rt-multi-thread", "io-util"] }
+tokio-stream = { version = "^0.1", features = ["sync" ]}
+tokio-util = { version = "^0.6", features = ["codec", "compat"] }
+futures = { version = "^0.3", features = ["compat"] }
+hyper = { version = "^0.14", features = ["client", "server", "http1", "http2", "tcp"] }
+trust-dns-resolver = { version = "^0.20" }
 url = "^2.1"
-tokio-util = { version = "^0.3", features = ["codec", "compat"] }
-bytes = { version = "^0.5", features = [ "serde" ] }
-ring-channel = {version = "^0.8", features = ["futures_api"]}
-capnp = "^0.13"
-capnp-futures = "^0.13"
+bytes = { version = "^1.0", features = [ "serde" ] }
+async-channel = "^1.6"
+ring-channel = { version = "^0.9", features = ["futures_api"] }
+capnp = "^0.14"
+capnp-futures = "^0.14"
+crossbeam-channel =  "^0.5"
 
 resolve="^0.2"
-socket2 = { version = "^0.3", features = ["reuseport"] }
+array-init="^2.0"
+socket2 = { version = "^0.4", features = ["all"] }
 combine="^4.0"
-hyper="^0.12"
 mime="^0.3"
 serde="^1.0"
 serde_derive="^1.0"
@@ -48,16 +51,19 @@ slog-term="^2.4"
 slog-async="^2.3"
 slog-scope="^4.0"
 slog-stdlog="^4.0"
-slog-syslog="^0.12"
+slog-syslog="^0.13"
 log="^0.4"
 toml="^0.5"
-ftoa = "^0.1"
+dtoa = "^0.4"
 
 raft-tokio = { git = "https://github.com/Albibek/raft-tokio" }
-rand = "^0.7"
-rayon = "^1.0"
-bioyino-metric = "^0.3"
+rand = "^0.8"
+bioyino-metric = "^0.5"
 
 [build-dependencies]
-capnpc = "^0.12"
-vergen = "^3"
+capnpc = "^0.14"
+vergen = "^5"
+
+[features]
+default = []
+f32 = []
diff --git a/build.rs b/build.rs
@@ -1,5 +1,11 @@
-use vergen::{generate_cargo_keys, ConstantsFlags};
+use vergen::{vergen, Config, ShaKind};
 
 fn main() {
-    generate_cargo_keys(ConstantsFlags::all()).expect("Unable to generate cargo keys!");
+    let mut config = Config::default();
+    *config.build_mut().timestamp_mut() = true;
+    *config.git_mut().commit_timestamp_mut() = true;
+    *config.git_mut().sha_kind_mut() = ShaKind::Short;
+
+    // Generate the instructions
+    vergen(config).expect("Unable to generate cargo keys!");
 }
diff --git a/config.toml b/config.toml
@@ -16,10 +16,18 @@ verbosity-console = "warn"
 # Number of network worker threads in any mode, use 0(not recommended) to use all CPU cores
 n-threads = 4
 
-# Number of aggregating and counting threads, use 0(not recommended) to use all CPU cores
+# Number of parsing "fast" threads, use 0(not recommended) to use all CPU cores
+p-threads = 4
+
+# Number of "slow" worker threads, use 0(not recommended) to use all CPU cores
 w-threads = 4
 
-# Queue size for single counting thread before task is dropped
+# Number of asynchronous  threads
+# use 0(not recommended) to use all CPU cores
+# setting this value to lower than 4 may lead to random lockups and timer delays
+a-threads = 4
+
+# Queue size for fast and slow threads
 task-queue-size = 1024
 
 # If server should become leader from it's very start
@@ -32,7 +40,7 @@ stats-interval = 10000
 # Prefix for sending own stats
 stats-prefix = "resources.monitoring.bioyino"
 
-# What consensus to use: "consul", "internal" or "none"
+# What consensus to use: "internal" or "none"
 consensus = "none"
 
 [carbon]
@@ -60,12 +68,17 @@ connect-delay-max = 10000
 #note, that 0 means 1 try
 send-retries = 30
 
+# The resulting aggregated set can be split into chunks to be sent to i.e. a cluster
+# of receiving nodes in parallel in `chunks` parallel TCP connections.
+# This may speedup a delivery and make a postprocessing in parallel
+chunks = 1
+
 # Network settings
 [network]
-# Address:port to listen for metrics at
+# address and UDP port to listen for statsd metrics at
 listen = "127.0.0.1:8125"
 
-# Address and port for replication server to listen on
+# Address and port for peer protocol TCP server to listen on
 peer-listen = "127.0.0.1:8136"
 
 # Address for peer client to bind to
@@ -118,7 +131,7 @@ async-sockets = 4
 # List of nodes to replicate metrics to
 nodes = []
 
-# Interval to send snapshots to nodes, ms
+# Interval(milliseconds) to take snapshots for aggregation and remote replication
 snapshot-interval = 1000
 
 [metrics]
@@ -127,7 +140,7 @@ snapshot-interval = 1000
 # it to false if you have such use case
 # consistent-parsing = true
 
-# Log all buffers being dropped due to parsing errors. Can be very spammy.
+# Log all buffers being dropped due to parsing errors. Can be very spammyif you have many incorrect data coming in.
 # log-parse-errors = false
 
 # Size of buffer that parser considers invalid. Used to avoid DoS attacks on parser.
@@ -137,41 +150,38 @@ snapshot-interval = 1000
 # Size of tags part of a metric (after semicolon character, not including the leading semicolon itself)
 # max-tags-len = 9000
 
-# Since tagged metric becomes a totally different metric in many systems the timeseries for such metrics
+# In some systems tagged metric becomes a totally different metric.  This means the timeseries for such metrics
 # can be broken. To avoid this for the time the metrics is adopted, this option allows to create a copy of
 # such tagged metric, but without tags
-# Please note that this is effectively the double amount of memory and processing times
+# Please note that this is effectively doubles the amount of memory and processing times
 # create-untagged-copy = false
 
 [aggregation]
 # round timestamp to interval of aggregation
 # possible values:
-# "up" - round to uppor bound of interval
+# "up" - round to upper bound of interval
 # "down" - round to lower bound
 # "no" - use exact timestamp, i.e. no rounding
 # round-timestamp = "no"
 
-# the threading mode of aggregation:
-# single - in a single thread
-# common - in the general thread pool along with parsing ang aggregating metrics
-# separate - in a separate thread pool
-mode = "single"
-
-# updates aggregate is usually a debug value than some real one
-# also this is the only one used for every type of metric, not only ms
-# so it's reasonable to avoid receiving a doubled amount of all metrics
-# This option allows to receive only metrics updated too often
+# `updates` aggregate is usually a debug value than some real one.
+# Usually one want it to see which metrics come too fast taking too much resources.
+# Since this is the only one used for every type of metric, not only timers,
+# it is reasonable to avoid receiving a doubled amount of all metrics.
+#
+# This option allows to receive only metrics updated too often.
+# In this example is is more than 200 per interval.
 update-count-threshold = 200
 
-# a list of aggregates gathered per type of metric
+# A list of aggregates gathered per type of metric.
 # Setting this value will define the exact way, so, i.e. ["mean"] will gather ONLY ONE aggregate
 #
 # To add new percentiles an integer value with "percentile-" prefix should be used.
 # It will then be "appended" (in reality this is not string based calculation) with "0."
 # For example, "percentile-9999" will become 0.9999th percentile
 #
 # this is the full list of default values for timer
-#aggregates.timer = [ "count", "last", "min", "max", "sum", "median", "mean", "updates", "percentile-75", "percentile-95", "percentile-98", "percentile-99", "percentile-999" ]
+#aggregates.timer = [ "count", "rate", "last", "min", "max", "sum", "median", "mean", "updates", "percentile-75", "percentile-95", "percentile-98", "percentile-99", "percentile-999" ]
 
 # an example for gauge with "updates" aggregate turned off
 #aggregates.gauge = [ "value" ]
@@ -181,6 +191,7 @@ update-count-threshold = 200
 # Keys of map in this section are metric types: gauge, counter, diff-counter, timer, set
 # All sections have same set of parameters.
 # More examples can be seen in docs/aggregation.md file.
+
 [naming.default]
 # These setting will be applied for all unspecified naming parts.
 # # for example if naming.set does not exist or naming.set.tag-values is not specified,
@@ -246,20 +257,3 @@ nodes = {}
 # allow binding raft outgoing connnections to specific IP
 # default: not specified, so no bind happens
 # client-bind = "127.0.0.1:8138"
-
-[consul]
-# Start in disabled leader finding mode. This only works while consul is bootstrapping.
-# Can be helpful when there is a danger of agent being inaccessible.
-start-as = "disabled"
-
-# Consul agent address
-agent = "127.0.0.1:8500"
-
-# TTL of consul session, ms (Consul cannot set it to less than 10s)
-session-ttl = 11000
-
-# How often to renew Consul session, ms
-renew-time = 1000
-
-# Key name to lock in Consul
-key-name = "service/bioyino/lock"
diff --git a/doc/aggregation.md b/doc/aggregation.md
@@ -4,11 +4,10 @@ Each metric type has different list of aggregates which can be counted:
 
 | Metric type name | statsd name | Aggregates |
 | ---           | ---   | --- |
-| counter       | c     | value, updates |
-| diff-counter  | C     | value, updates |
-| timer         | ms    | last, min, max, sum, median, mean, updates, percentile-\* |
-| gauge         | g     | value, updates |
-| set           | s     | count, updates |
+| counter       | c     | value, updates, rate |
+| timer         | ms    | last, min, max, sum, median, mean, updates, rate, percentile-\* |
+| gauge         | g     | value, updates, rate |
+| set           | s     | count, updates, rate |
 
 `value` is the aggregate of the resulting value of that type.
 
@@ -19,6 +18,8 @@ By default the percentiles for timer are: 75th, 95th, 98th, 99th and 999th.
 `updates` is a special aggregate, showing the number of metrics with this name that came in during the aggregation period.
 It is counted always, even if disabled by configuration and can be additionally filtered by `aggregation.update-count-threshold` parameter.
 
+`rate` aggregate takes sampling rate in account, and tries to restore the original value rate on the metric source rather that "system" rate of receiving metric on the server
+
 By default, bioyino counts all the aggregates. This behaviour can be changed by `aggregation.aggregates` option.
 If a type is not specified, the default value(meaning all aggregates) will be used.
 
@@ -93,31 +94,3 @@ prefix-overrides = {"updates" = "global.namespace.debug-updates.gauges" }
 destination = "tag"
 tag = "agg"
 ```
-
-# Aggregation modes
-Depending of aggregation heaviness, there may be different ways to perform it internally.
-
-The most notable parameter here is the size of and array for a single metric. There may be a lot of i.e. `ms`-typed
-metrics, but when only few metrics come during the aggregation period, counting stats for all of them is fast and one
-thread will most probably be enough.
-
-## Single threaded
-In this mode the whole array of metrics is counted in a single thread. Usually this is enough for a low-sized batches
-i.e. when only a few metrics with the same name are received during aggregation period. Another use case is when
-aggregation time is not important and it is ok to wait some time leaving all other cores for processing.
-
-## Common pool multithreaded
-Can be enabled by setting `aggregation-mode` to "common" in `metrics` section.
-
-The aggregation is distributed between same worker threads, that do parsing and initial metric processing.
-Only one thread is started to join the results received from these workers.
-
-The biggest disadvantage of this approach is that aggregation of some big metric packs can block worker threads, so for
-some time they could not process incoming data and therefore UDP drops may increase.
-
-## Separate pool multithreaded
-Can be enabled by setting `aggregation-mode` to "separate" in `metrics` section.
-
-This mode runs a totally separate thread pool which will try to aggregate all the metrics as fast as possible.
-To set the size of the pool `aggregaton-threads` option can be used. It only works in "separate" mode. If set to 0,
-the number of threads will be taken automatically by number of cpu cores.