diff --git a/.github/workflows/multichain-integration.yml b/.github/workflows/multichain-integration.yml index bceab31fa..20ba6457c 100644 --- a/.github/workflows/multichain-integration.yml +++ b/.github/workflows/multichain-integration.yml @@ -39,11 +39,11 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Pull Relayer & Sandbox Docker Images + - name: Pull Relayer, Sandbox, Redis Docker Images run: | - docker pull ghcr.io/near/os-relayer:12ba6e35690df3979fce0b36a41d0ca0db9c0ab4 docker pull ghcr.io/near/near-lake-indexer:node-2.3.0 docker pull localstack/localstack:3.5.0 + docker pull redis:7.0.15 - name: Install stable toolchain uses: actions-rs/toolchain@v1 diff --git a/Dockerfile.multichain b/Dockerfile.multichain index 3d02c9d7c..d65cc2c9d 100644 --- a/Dockerfile.multichain +++ b/Dockerfile.multichain @@ -19,9 +19,18 @@ RUN sed -i 's#target-dir = "../target"#target-dir = "target"#' .cargo/config.tom RUN cargo build --release --package mpc-node FROM debian:stable-slim as runtime -RUN apt-get update && apt-get install --assume-yes libssl-dev ca-certificates curl +RUN apt-get update && apt-get install --assume-yes libssl-dev ca-certificates curl redis-server + RUN update-ca-certificates + COPY --from=builder /usr/src/app/target/release/mpc-node /usr/local/bin/mpc-node +COPY chain-signatures/node/redis.conf /etc/redis/redis.conf + +# Create a script to start both Redis and the Rust app +RUN echo "#!/bin/bash\nchown redis:redis /data\nservice redis-server start &\nexec mpc-node start" > /start.sh \ + && chmod +x /start.sh + WORKDIR /usr/local/bin -ENTRYPOINT [ "mpc-node" ] \ No newline at end of file +# Start Redis and the Rust application +ENTRYPOINT [ "/start.sh" ] diff --git a/chain-signatures/Cargo.lock b/chain-signatures/Cargo.lock index 5df45e303..fecf0a24a 100644 --- a/chain-signatures/Cargo.lock +++ b/chain-signatures/Cargo.lock @@ -182,6 +182,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayvec" version = "0.7.4" @@ -1297,6 +1303,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -2560,7 +2576,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2 0.5.7", "tokio", "tower-service", "tracing", @@ -2959,7 +2975,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if 1.0.0", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -3197,6 +3213,7 @@ dependencies = [ "once_cell", "prometheus", "rand", + "redis", "reqwest 0.11.27", "semver", "serde", @@ -4067,6 +4084,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -4089,7 +4116,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07" dependencies = [ "autocfg", - "num-bigint", + "num-bigint 0.3.3", "num-integer", "num-traits", "serde", @@ -4668,6 +4695,23 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redis" +version = "0.27.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6baebe319ef5e4b470f248335620098d1c2e9261e995be05f56f719ca4bdb2" +dependencies = [ + "arc-swap", + "combine", + "itoa", + "num-bigint 0.4.6", + "percent-encoding 2.3.1", + "ryu", + "sha1_smol", + "socket2 0.5.7", + "url 2.5.2", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -5396,6 +5440,12 @@ dependencies = [ "digest", ] +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + [[package]] name = "sha2" version = "0.10.8" diff --git a/chain-signatures/node/Cargo.toml b/chain-signatures/node/Cargo.toml index 68b91637d..cdd8ce22e 100644 --- a/chain-signatures/node/Cargo.toml +++ b/chain-signatures/node/Cargo.toml @@ -60,4 +60,5 @@ itertools = "0.12.0" http = "1.1.0" prometheus = { version = "0.13.3" } once_cell = "1.13.1" +redis = "0.27.2" sysinfo = "0.32.0" diff --git a/chain-signatures/node/redis.conf b/chain-signatures/node/redis.conf new file mode 100644 index 000000000..4c98a8708 --- /dev/null +++ b/chain-signatures/node/redis.conf @@ -0,0 +1,1316 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## MODULES ##################################### + +# Load modules at startup. If the server is not able to load modules +# it will abort. It is possible to use multiple loadmodule directives. +# +# loadmodule /path/to/my_module.so +# loadmodule /path/to/other_module.so + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 lookback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 127.0.0.1 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode yes + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /tmp/redis.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize no + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis_6379.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel notice + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile "" + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY. Basically this means +# that normally a logo is displayed only in interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo yes + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +save 900 1 +save 300 10 +save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression yes + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir /data + +################################# REPLICATION ################################# + +# Master-Slave replication. Use slaveof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of slaves. +# 2) Redis slaves are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition slaves automatically try to reconnect to masters +# and resynchronize with them. +# +# slaveof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the slave to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the slave request. +# +# masterauth + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes + +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. +slave-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New slaves and reconnecting slaves that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the slaves. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the slaves incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to slave sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more slaves +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new slaves arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple slaves +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the slaves. +# +# This is important since once the transfer starts, it is not possible to serve +# new slaves arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more slaves arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of slave. +# 2) Master timeout from the point of view of slaves (data, pings). +# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# slave data when slaves are disconnected for some time, so that when a slave +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the slave missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the slave can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a slave connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. +# +# Note that slaves never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with the slaves: hence they should always accumulate backlog. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The slave priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-slaves-to-write is set to 0 (feature disabled) and +# min-slaves-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# slaves in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover slave instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP and address normally reported by a slave is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the slave to connect with the master. +# +# Port: The port is communicated by the slave during the replication +# handshake, and is normally the port that the slave is using to +# list for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the slave may be actually reachable via different IP and port +# pairs. The following two options can be used by a slave in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# slave-announce-ip 5.5.5.5 +# slave-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. + +################################### CLIENTS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +############################## MEMORY MANAGEMENT ################################ + +# Set a memory usage limit to the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key among the ones with an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs more CPU. 3 is faster but not very accurate. +# +# maxmemory-samples 5 + +############################# LAZY FREEING #################################### + +# Redis has two primitives to delete keys. One is called DEL and is a blocking +# deletion of the object. It means that the server stops processing new commands +# in order to reclaim all the memory associated with an object in a synchronous +# way. If the key deleted is associated with a small object, the time needed +# in order to execute the DEL command is very small and comparable to most other +# O(1) or O(log_N) commands in Redis. However if the key is associated with an +# aggregated value containing millions of elements, the server can block for +# a long time (even seconds) in order to complete the operation. +# +# For the above reasons Redis also offers non blocking deletion primitives +# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and +# FLUSHDB commands, in order to reclaim memory in background. Those commands +# are executed in constant time. Another thread will incrementally free the +# object in the background as fast as possible. +# +# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. +# It's up to the design of the application to understand when it is a good +# idea to use one or the other. However the Redis server sometimes has to +# delete keys or flush the whole database as a side effect of other operations. +# Specifically Redis deletes objects independently of a user call in the +# following scenarios: +# +# 1) On eviction, because of the maxmemory and maxmemory policy configurations, +# in order to make room for new data, without going over the specified +# memory limit. +# 2) Because of expire: when a key with an associated time to live (see the +# EXPIRE command) must be deleted from memory. +# 3) Because of a side effect of a command that stores data on a key that may +# already exist. For example the RENAME command may delete the old key +# content when it is replaced with another one. Similarly SUNIONSTORE +# or SORT with STORE option may delete existing keys. The SET command +# itself removes any old content of the specified key in order to replace +# it with the specified string. +# 4) During replication, when a slave performs a full resynchronization with +# its master, the content of the whole database is removed in order to +# load the RDB file just transfered. +# +# In all the above cases the default is to delete objects in a blocking way, +# like if DEL was called. However you can configure each case specifically +# in order to instead release memory in a non-blocking way like if UNLINK +# was called, using the following configuration directives: + +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +slave-lazy-flush no + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly yes + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, and continues loading the AOF +# tail. +# +# This is currently turned off by default in order to avoid the surprise +# of a format change, but will at some point be used as the default. +aof-use-rdb-preamble no + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however +# in order to mark it as "mature" we need to wait for a non trivial percentage +# of users to deploy it in production. +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A slave of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a slave to actually have an exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple slaves able to failover, they exchange messages +# in order to try to give an advantage to the slave with the best +# replication offset (more data from the master processed). +# Slaves will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single slave computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the slave will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a slave will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * slave-validity-factor) + repl-ping-slave-period +# +# So for example if node-timeout is 30 seconds, and the slave-validity-factor +# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the +# slave will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large slave-validity-factor may allow slaves with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a slave at all. +# +# For maximum availability, it is possible to set the slave-validity-factor +# to a value of 0, which means, that slaves will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-slave-validity-factor 10 + +# Cluster slaves are able to migrate to orphaned masters, that are masters +# that are left without working slaves. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working slaves. +# +# Slaves migrate to orphaned masters only if there are still at least a +# given number of other working slaves for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a slave +# will migrate only if there is at least 1 other working slave for its master +# and so forth. It usually reflects the number of slaves you want for every +# master in your cluster. +# +# Default is 1 (slaves migrate only if their masters remain with at least +# one slave). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# This option, when set to yes, prevents slaves from trying to failover its +# master during master failures. However the master can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-slave-no-failover no + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +########################## CLUSTER DOCKER/NAT support ######################## + +# In certain deployments, Redis Cluster nodes address discovery fails, because +# addresses are NAT-ted or because ports are forwarded (the typical case is +# Docker and other containers). +# +# In order to make Redis Cluster working in such environments, a static +# configuration where each node knows its public address is needed. The +# following two options are used for this scope, and are: +# +# * cluster-announce-ip +# * cluster-announce-port +# * cluster-announce-bus-port +# +# Each instruct the node about its address, client port, and cluster message +# bus port. The information is then published in the header of the bus packets +# so that other nodes will be able to correctly map the address of the node +# publishing the information. +# +# If the above options are not used, the normal Redis Cluster auto-detection +# will be used instead. +# +# Note that when remapped, the bus port may not be at the fixed offset of +# clients port + 10000, so you can specify any port and bus-port depending +# on how they get remapped. If the bus-port is not set, a fixed offset of +# 10000 will be used as usually. +# +# Example: +# +# cluster-announce-ip 10.1.1.5 +# cluster-announce-port 6379 +# cluster-announce-bus-port 6380 + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# slave -> slave clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Client query buffers accumulate new commands. They are limited to a fixed +# amount by default in order to avoid that a protocol desynchronization (for +# instance due to a bug in the client) will lead to unbound memory usage in +# the query buffer. However you can configure it here if you have very special +# needs, such us huge multi/exec requests or alike. +# +# client-query-buffer-limit 1gb + +# In the Redis protocol, bulk requests, that are, elements representing single +# strings, are normally limited ot 512 mb. However you can change this limit +# here. +# +# proto-max-bulk-len 512mb + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### +# +# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested +# even in production and manually tested by multiple engineers for some +# time. +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in an "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. + +# Enabled active defragmentation +# activedefrag yes + +# Minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# Minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# Maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# Minimal effort for defrag in CPU percentage +# active-defrag-cycle-min 25 + +# Maximal effort for defrag in CPU percentage +# active-defrag-cycle-max 75 diff --git a/chain-signatures/node/src/cli.rs b/chain-signatures/node/src/cli.rs index 25bbf7449..f08c6c644 100644 --- a/chain-signatures/node/src/cli.rs +++ b/chain-signatures/node/src/cli.rs @@ -1,6 +1,7 @@ use crate::config::{Config, LocalConfig, NetworkConfig, OverrideConfig}; use crate::gcp::GcpService; use crate::protocol::{MpcSignProtocol, SignQueue}; +use crate::storage::presignature_storage::LockRedisPresignatureStorage; use crate::storage::triple_storage::LockTripleNodeStorageBox; use crate::{http_client, indexer, mesh, storage, web}; use clap::Parser; @@ -106,6 +107,8 @@ impl Cli { cipher_pk, "--cipher-sk".to_string(), cipher_sk, + "--redis-url".to_string(), + storage_options.redis_url.to_string(), ]; if let Some(sign_sk) = sign_sk { args.extend(["--sign-sk".to_string(), sign_sk.to_string()]); @@ -208,6 +211,11 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { storage::triple_storage::init(Some(&gcp_service), &account_id), )); + let redis_url: Url = Url::parse(storage_options.redis_url.as_str())?; + let presignature_storage: LockRedisPresignatureStorage = Arc::new(RwLock::new( + storage::presignature_storage::init(redis_url, &account_id), + )); + let sign_sk = sign_sk.unwrap_or_else(|| account_sk.clone()); let my_address = my_address .map(|mut addr| { @@ -240,6 +248,7 @@ pub fn run(cmd: Cli) -> anyhow::Result<()> { sign_queue, key_storage, triple_storage, + presignature_storage, Config::new(LocalConfig { over: override_config.unwrap_or_else(Default::default), network: NetworkConfig { diff --git a/chain-signatures/node/src/protocol/consensus.rs b/chain-signatures/node/src/protocol/consensus.rs index af03d18a5..50cf67020 100644 --- a/chain-signatures/node/src/protocol/consensus.rs +++ b/chain-signatures/node/src/protocol/consensus.rs @@ -13,6 +13,7 @@ use crate::protocol::presignature::PresignatureManager; use crate::protocol::signature::SignatureManager; use crate::protocol::state::{GeneratingState, ResharingState}; use crate::protocol::triple::TripleManager; +use crate::storage::presignature_storage::LockRedisPresignatureStorage; use crate::storage::secret_storage::SecretNodeStorageBox; use crate::storage::triple_storage::LockTripleNodeStorageBox; use crate::storage::triple_storage::TripleData; @@ -42,6 +43,7 @@ pub trait ConsensusCtx { fn sign_queue(&self) -> Arc>; fn secret_storage(&self) -> &SecretNodeStorageBox; fn triple_storage(&self) -> LockTripleNodeStorageBox; + fn presignature_storage(&self) -> LockRedisPresignatureStorage; fn cfg(&self) -> &Config; fn message_options(&self) -> http_client::Options; } @@ -134,12 +136,6 @@ impl ConsensusProtocol for StartedState { tracing::info!( "started: contract state is running and we are already a participant" ); - let presignature_manager = PresignatureManager::new( - me, - contract_state.threshold, - epoch, - ctx.my_account_id(), - ); let triple_manager = Arc::new(RwLock::new(TripleManager::new( me, contract_state.threshold, @@ -148,6 +144,24 @@ impl ConsensusProtocol for StartedState { ctx.triple_storage(), ctx.my_account_id(), ))); + + let presignature_manager = + Arc::new(RwLock::new(PresignatureManager::new( + me, + contract_state.threshold, + epoch, + ctx.my_account_id(), + ctx.presignature_storage(), + ))); + + let signature_manager = + Arc::new(RwLock::new(SignatureManager::new( + me, + public_key, + epoch, + ctx.my_account_id(), + ))); + let stuck_monitor = Arc::new(RwLock::new( StuckMonitor::new(&triple_manager).await, )); @@ -161,17 +175,8 @@ impl ConsensusProtocol for StartedState { sign_queue, stuck_monitor, triple_manager, - presignature_manager: Arc::new(RwLock::new( - presignature_manager, - )), - signature_manager: Arc::new(RwLock::new( - SignatureManager::new( - me, - contract_state.public_key, - epoch, - ctx.my_account_id(), - ), - )), + presignature_manager, + signature_manager, messages: Arc::new(RwLock::new(MessageQueue::new( ctx.message_options().clone(), ))), @@ -376,6 +381,22 @@ impl ConsensusProtocol for WaitingForConsensusState { ctx.triple_storage(), ctx.my_account_id(), ))); + + let presignature_manager = Arc::new(RwLock::new(PresignatureManager::new( + me, + self.threshold, + self.epoch, + ctx.my_account_id(), + ctx.presignature_storage(), + ))); + + let signature_manager = Arc::new(RwLock::new(SignatureManager::new( + me, + self.public_key, + self.epoch, + ctx.my_account_id(), + ))); + let stuck_monitor = Arc::new(RwLock::new(StuckMonitor::new(&triple_manager).await)); @@ -388,18 +409,8 @@ impl ConsensusProtocol for WaitingForConsensusState { sign_queue: ctx.sign_queue(), stuck_monitor, triple_manager, - presignature_manager: Arc::new(RwLock::new(PresignatureManager::new( - me, - self.threshold, - self.epoch, - ctx.my_account_id(), - ))), - signature_manager: Arc::new(RwLock::new(SignatureManager::new( - me, - self.public_key, - self.epoch, - ctx.my_account_id(), - ))), + presignature_manager, + signature_manager, messages: self.messages, })) } diff --git a/chain-signatures/node/src/protocol/cryptography.rs b/chain-signatures/node/src/protocol/cryptography.rs index 92e46da9a..8f67670d6 100644 --- a/chain-signatures/node/src/protocol/cryptography.rs +++ b/chain-signatures/node/src/protocol/cryptography.rs @@ -407,20 +407,23 @@ impl CryptographicProtocol for RunningState { tracing::warn!(?err, "running: failed to stockpile presignatures"); } drop(triple_manager); - for (p, msg) in presignature_manager.poke() { + for (p, msg) in presignature_manager.poke().await { let info = self.fetch_participant(&p)?; messages.push(info.clone(), MpcMessage::Presignature(msg)); } crate::metrics::NUM_PRESIGNATURES_MINE .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.my_len() as i64); + .set(presignature_manager.count_mine().await as i64); crate::metrics::NUM_PRESIGNATURES_TOTAL .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.len() as i64); + .set(presignature_manager.count_all().await as i64); crate::metrics::NUM_PRESIGNATURE_GENERATORS_TOTAL .with_label_values(&[my_account_id.as_str()]) - .set(presignature_manager.potential_len() as i64 - presignature_manager.len() as i64); + .set( + presignature_manager.count_potential().await as i64 + - presignature_manager.count_all().await as i64, + ); // NOTE: signatures should only use stable and not active participants. The difference here is that // stable participants utilizes more than the online status of a node, such as whether or not their @@ -442,13 +445,15 @@ impl CryptographicProtocol for RunningState { .set(my_requests.len() as i64); let mut signature_manager = self.signature_manager.write().await; - signature_manager.handle_requests( - self.threshold, - &stable, - my_requests, - &mut presignature_manager, - protocol_cfg, - ); + signature_manager + .handle_requests( + self.threshold, + &stable, + my_requests, + &mut presignature_manager, + protocol_cfg, + ) + .await; drop(sign_queue); drop(presignature_manager); diff --git a/chain-signatures/node/src/protocol/message.rs b/chain-signatures/node/src/protocol/message.rs index 1d91071c8..fd4b67791 100644 --- a/chain-signatures/node/src/protocol/message.rs +++ b/chain-signatures/node/src/protocol/message.rs @@ -309,7 +309,7 @@ impl MessageHandler for RunningState { } let protocol = match presignature_manager - .get_or_generate( + .get_or_start_generation( participants, *id, *triple0, @@ -412,17 +412,20 @@ impl MessageHandler for RunningState { // continue; // }; // TODO: Validate that the message matches our sign_queue - let protocol = match signature_manager.get_or_generate( - participants, - *receipt_id, - *proposer, - *presignature_id, - request, - *epsilon, - *entropy, - &mut presignature_manager, - protocol_cfg, - ) { + let protocol = match signature_manager + .get_or_start_protocol( + participants, + *receipt_id, + *proposer, + *presignature_id, + request, + *epsilon, + *entropy, + &mut presignature_manager, + protocol_cfg, + ) + .await + { Ok(protocol) => protocol, Err(GenerationError::PresignatureIsGenerating(_)) => { // We will revisit this this signature request later when the presignature has been generated. diff --git a/chain-signatures/node/src/protocol/mod.rs b/chain-signatures/node/src/protocol/mod.rs index 7c87e9b4f..6d407fef8 100644 --- a/chain-signatures/node/src/protocol/mod.rs +++ b/chain-signatures/node/src/protocol/mod.rs @@ -30,6 +30,7 @@ use crate::protocol::consensus::ConsensusProtocol; use crate::protocol::cryptography::CryptographicProtocol; use crate::protocol::message::{MessageHandler, MpcMessageQueue}; use crate::rpc_client; +use crate::storage::presignature_storage::LockRedisPresignatureStorage; use crate::storage::secret_storage::SecretNodeStorageBox; use crate::storage::triple_storage::LockTripleNodeStorageBox; @@ -54,6 +55,7 @@ struct Ctx { sign_queue: Arc>, secret_storage: SecretNodeStorageBox, triple_storage: LockTripleNodeStorageBox, + presignature_storage: LockRedisPresignatureStorage, cfg: Config, mesh: Mesh, message_options: http_client::Options, @@ -100,6 +102,10 @@ impl ConsensusCtx for &mut MpcSignProtocol { self.ctx.triple_storage.clone() } + fn presignature_storage(&self) -> LockRedisPresignatureStorage { + self.ctx.presignature_storage.clone() + } + fn message_options(&self) -> http_client::Options { self.ctx.message_options.clone() } @@ -173,6 +179,7 @@ impl MpcSignProtocol { sign_queue: Arc>, secret_storage: SecretNodeStorageBox, triple_storage: LockTripleNodeStorageBox, + presignature_storage: LockRedisPresignatureStorage, cfg: Config, mesh_options: mesh::Options, message_options: http_client::Options, @@ -200,6 +207,7 @@ impl MpcSignProtocol { signer, secret_storage, triple_storage, + presignature_storage, cfg, mesh: Mesh::new(mesh_options), message_options, diff --git a/chain-signatures/node/src/protocol/presignature.rs b/chain-signatures/node/src/protocol/presignature.rs index b0158b2ed..71f830e08 100644 --- a/chain-signatures/node/src/protocol/presignature.rs +++ b/chain-signatures/node/src/protocol/presignature.rs @@ -1,6 +1,7 @@ use super::message::PresignatureMessage; use super::triple::{Triple, TripleId, TripleManager}; use crate::protocol::contract::primitives::Participants; +use crate::storage::presignature_storage::LockRedisPresignatureStorage; use crate::types::{PresignatureProtocol, SecretKeyShare}; use crate::util::AffinePointExt; @@ -8,11 +9,13 @@ use cait_sith::protocol::{Action, InitializationError, Participant, ProtocolErro use cait_sith::{KeygenOutput, PresignArguments, PresignOutput}; use chrono::Utc; use crypto_shared::PublicKey; -use k256::Secp256k1; +use k256::{AffinePoint, Scalar, Secp256k1}; use mpc_contract::config::ProtocolConfig; +use serde::ser::SerializeStruct; +use serde::{Deserialize, Serialize}; use sha3::{Digest, Sha3_256}; use std::collections::hash_map::Entry; -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, HashSet}; use std::time::{Duration, Instant}; use near_account_id::AccountId; @@ -29,6 +32,49 @@ pub struct Presignature { pub participants: Vec, } +impl Serialize for Presignature { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut state = serializer.serialize_struct("Presignature", 5)?; + state.serialize_field("id", &self.id)?; + state.serialize_field("output_big_r", &self.output.big_r)?; + state.serialize_field("output_k", &self.output.k)?; + state.serialize_field("output_sigma", &self.output.sigma)?; + state.serialize_field("participants", &self.participants)?; + state.end() + } +} + +impl<'de> Deserialize<'de> for Presignature { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + #[derive(Deserialize)] + struct PresignatureFields { + id: PresignatureId, + output_big_r: AffinePoint, + output_k: Scalar, + output_sigma: Scalar, + participants: Vec, + } + + let fields = PresignatureFields::deserialize(deserializer)?; + + Ok(Self { + id: fields.id, + output: PresignOutput { + big_r: fields.output_big_r, + k: fields.output_k, + sigma: fields.output_sigma, + }, + participants: fields.participants, + }) + } +} + /// An ongoing presignature generator. pub struct PresignatureGenerator { pub participants: Vec, @@ -97,17 +143,16 @@ pub enum GenerationError { PresignatureIsMissing(PresignatureId), #[error("presignature {0} is in garbage collection")] PresignatureIsGarbageCollected(TripleId), + #[error("presignature bad parameters")] + PresignatureBadParameters, } /// Abstracts how triples are generated by providing a way to request a new triple that will be /// complete some time in the future and a way to take an already generated triple. pub struct PresignatureManager { - /// Completed unspent presignatures. - presignatures: HashMap, + presignature_storage: LockRedisPresignatureStorage, /// Ongoing presignature generation protocols. generators: HashMap, - /// List of presignature ids generation of which was initiated by the current node. - mine: VecDeque, /// The set of presignatures that were introduced to the system by the current node. introduced: HashSet, /// Garbage collection for presignatures that have either been taken or failed. This @@ -121,11 +166,16 @@ pub struct PresignatureManager { } impl PresignatureManager { - pub fn new(me: Participant, threshold: usize, epoch: u64, my_account_id: &AccountId) -> Self { + pub fn new( + me: Participant, + threshold: usize, + epoch: u64, + my_account_id: &AccountId, + presignature_storage: LockRedisPresignatureStorage, + ) -> Self { Self { - presignatures: HashMap::new(), + presignature_storage, generators: HashMap::new(), - mine: VecDeque::new(), introduced: HashSet::new(), gc: HashMap::new(), me, @@ -135,25 +185,131 @@ impl PresignatureManager { } } + pub async fn insert(&mut self, presignature: Presignature) { + tracing::info!(id = ?presignature.id, "inserting presignature"); + if let Err(e) = self.presignature_storage.write().await.insert(presignature) { + tracing::error!(?e, "failed to insert presignature"); + } + } + + pub async fn insert_mine(&mut self, presignature: Presignature) { + tracing::info!(id = ?presignature.id, "inserting mine presignature"); + // Remove from taken list if it was there + self.gc.remove(&presignature.id); + if let Err(e) = self + .presignature_storage + .write() + .await + .insert_mine(presignature) + { + tracing::error!(?e, "failed to insert mine presignature"); + } + } + + /// Returns true if the presignature with the given id is already generated + pub async fn contains(&self, id: &PresignatureId) -> bool { + self.presignature_storage + .write() + .await + .contains(id) + .map_err(|e| { + tracing::warn!(?e, "failed to check if presignature exist"); + }) + .unwrap_or(false) + } + + /// Returns true if the mine presignature with the given id is already generated + pub async fn contains_mine(&self, id: &PresignatureId) -> bool { + self.presignature_storage + .write() + .await + .contains_mine(id) + .map_err(|e| { + tracing::warn!(?e, "failed to check if mine presignature exist"); + }) + .unwrap_or(false) + } + + pub async fn take(&mut self, id: PresignatureId) -> Result { + if let Some(presignature) = + self.presignature_storage + .write() + .await + .take(&id) + .map_err(|e| { + tracing::error!(?e, "failed to look for presignature"); + GenerationError::PresignatureIsMissing(id) + })? + { + self.gc.insert(id, Instant::now()); + tracing::info!(id, "took presignature"); + return Ok(presignature); + }; + + if self.generators.contains_key(&id) { + tracing::warn!(id, "presignature is still generating"); + return Err(GenerationError::PresignatureIsGenerating(id)); + } + if self.gc.contains_key(&id) { + tracing::warn!(id, "presignature was garbage collected"); + return Err(GenerationError::PresignatureIsGarbageCollected(id)); + } + tracing::warn!(id, "presignature is missing"); + Err(GenerationError::PresignatureIsMissing(id)) + } + + pub async fn take_mine(&mut self) -> Option { + if let Some(presignature) = self + .presignature_storage + .write() + .await + .take_mine() + .map_err(|e| { + tracing::error!(?e, "failed to look for mine presignature"); + }) + .ok()? + { + tracing::info!(id = ?presignature.id, "took presignature of mine"); + return Some(presignature); + } + None + } + /// Returns the number of unspent presignatures available in the manager. - pub fn len(&self) -> usize { - self.presignatures.len() + pub async fn count_all(&self) -> usize { + self.presignature_storage + .write() + .await + .count_all() + .map_err(|e| { + tracing::error!(?e, "failed to count all presignatures"); + }) + .unwrap_or(0) } /// Returns the number of unspent presignatures assigned to this node. - pub fn my_len(&self) -> usize { - self.mine.len() + pub async fn count_mine(&self) -> usize { + self.presignature_storage + .write() + .await + .count_mine() + .map_err(|e| { + tracing::error!(?e, "failed to count mine presignatures"); + }) + .unwrap_or(0) } - /// Returns the number of unspent presignatures we will have in the manager once - /// all ongoing generation protocols complete. - pub fn potential_len(&self) -> usize { - self.presignatures.len() + self.generators.len() + /// Returns if there are unspent presignatures available in the manager. + pub async fn is_empty(&self) -> bool { + self.count_all().await == 0 } - /// Returns if there are unspent presignatures available in the manager. - pub fn is_empty(&self) -> bool { - self.len() == 0 + /// Returns the number of unspent presignatures we will have in the manager once + /// all ongoing generation protocols complete. + pub async fn count_potential(&self) -> usize { + let complete_presignatures = self.count_all().await; + let ongoing_generators = self.generators.len(); + complete_presignatures + ongoing_generators } pub fn garbage_collect(&mut self, cfg: &ProtocolConfig) { @@ -212,7 +368,7 @@ impl PresignatureManager { } /// Starts a new presignature generation protocol. - pub fn generate( + pub async fn generate( &mut self, participants: &Participants, triple0: Triple, @@ -225,7 +381,7 @@ impl PresignatureManager { // Check if the `id` is already in the system. Error out and have the next cycle try again. if self.generators.contains_key(&id) - || self.presignatures.contains_key(&id) + || self.contains(&id).await || self.gc.contains_key(&id) { tracing::warn!(id, "presignature id collision"); @@ -269,11 +425,11 @@ impl PresignatureManager { // Stopgap to prevent too many presignatures in the system. This should be around min_presig*nodes*2 // for good measure so that we have enough presignatures to do sig generation while also maintain // the minimum number of presignature where a single node can't flood the system. - if self.potential_len() >= cfg.presignature.max_presignatures as usize { + if self.count_potential().await >= cfg.presignature.max_presignatures as usize { false } else { // We will always try to generate a new triple if we have less than the minimum - self.my_len() < cfg.presignature.min_presignatures as usize + self.count_mine().await < cfg.presignature.min_presignatures as usize && self.introduced.len() < cfg.max_concurrent_introduction as usize } }; @@ -304,7 +460,8 @@ impl PresignatureManager { pk, sk_share, cfg.presignature.generation_timeout, - )?; + ) + .await?; } } else { tracing::warn!("running: we don't have enough triples to generate a presignature"); @@ -321,7 +478,7 @@ impl PresignatureManager { /// 4) Depends on triples (`triple0`/`triple1`) that are unknown to the node // TODO: What if the presignature completed generation and is already spent? #[allow(clippy::too_many_arguments)] - pub async fn get_or_generate( + pub async fn get_or_start_generation( &mut self, participants: &Participants, id: PresignatureId, @@ -332,8 +489,11 @@ impl PresignatureManager { private_share: &SecretKeyShare, cfg: &ProtocolConfig, ) -> Result<&mut PresignatureProtocol, GenerationError> { - if self.presignatures.contains_key(&id) { - tracing::warn!(id, "presignature already generated"); + if id != hash_as_id(triple0, triple1) { + tracing::error!(id, "presignature id does not match the expected hash"); + Err(GenerationError::PresignatureBadParameters) + } else if self.contains(&id).await { + tracing::debug!(id, "presignature already generated"); Err(GenerationError::AlreadyGenerated) } else if self.gc.contains_key(&id) { tracing::warn!(id, "presignature was garbage collected"); @@ -403,48 +563,15 @@ impl PresignatureManager { } } - pub fn take_mine(&mut self) -> Option { - let my_presignature_id = self.mine.pop_front()?; - tracing::info!(my_presignature_id, "take presignature of mine"); - // SAFETY: This unwrap is safe because taking mine will always succeed since it is only - // present when generation completes where the determination of ownership is made. - Some(self.take(my_presignature_id).unwrap()) - } - - pub fn take(&mut self, id: PresignatureId) -> Result { - if let Some(presignature) = self.presignatures.remove(&id) { - self.gc.insert(id, Instant::now()); - tracing::info!(id, "took presignature"); - return Ok(presignature); - } - - if self.generators.contains_key(&id) { - tracing::warn!(id, "presignature is still generating"); - return Err(GenerationError::PresignatureIsGenerating(id)); - } - if self.gc.contains_key(&id) { - tracing::warn!(id, "presignature was garbage collected"); - return Err(GenerationError::PresignatureIsGarbageCollected(id)); - } - tracing::warn!(id, "presignature is missing"); - Err(GenerationError::PresignatureIsMissing(id)) - } - - pub fn insert_mine(&mut self, presig: Presignature) { - tracing::debug!(id = ?presig.id, "inserting presignature"); - // Remove from taken list if it was there - self.gc.remove(&presig.id); - self.mine.push_back(presig.id); - self.presignatures.insert(presig.id, presig); - } - /// Pokes all of the ongoing generation protocols and returns a vector of /// messages to be sent to the respective participant. /// /// An empty vector means we cannot progress until we receive a new message. - pub fn poke(&mut self) -> Vec<(Participant, PresignatureMessage)> { + pub async fn poke(&mut self) -> Vec<(Participant, PresignatureMessage)> { let mut messages = Vec::new(); let mut errors = Vec::new(); + let mut new_presignatures = Vec::new(); + let mut new_mine_presignatures = Vec::new(); self.generators.retain(|id, generator| { loop { let action = match generator.poke() { @@ -500,20 +627,19 @@ impl PresignatureManager { big_r = ?output.big_r.to_base58(), "completed presignature generation" ); - self.presignatures.insert( - *id, - Presignature { - id: *id, - output, - participants: generator.participants.clone(), - }, - ); + let presignature = Presignature { + id: *id, + output, + participants: generator.participants.clone(), + }; if generator.mine { tracing::info!(id, "assigning presignature to myself"); - self.mine.push_back(*id); + new_mine_presignatures.push(presignature); crate::metrics::NUM_TOTAL_HISTORICAL_PRESIGNATURE_GENERATORS_MINE_SUCCESS .with_label_values(&[self.my_account_id.as_str()]) .inc(); + } else { + new_presignatures.push(presignature); } self.introduced.remove(id); @@ -530,6 +656,14 @@ impl PresignatureManager { } }); + for presignature in new_presignatures { + self.insert(presignature).await; + } + + for presignature in new_mine_presignatures { + self.insert_mine(presignature).await; + } + if !errors.is_empty() { tracing::warn!(?errors, "failed to generate some presignatures"); } @@ -557,3 +691,39 @@ const fn first_8_bytes(input: [u8; 32]) -> [u8; 8] { } output } + +#[cfg(test)] +mod tests { + use cait_sith::{protocol::Participant, PresignOutput}; + use k256::{elliptic_curve::CurveArithmetic, Secp256k1}; + + use crate::protocol::presignature::Presignature; + + #[tokio::test] + async fn test_presignature_serialize_deserialize() { + let presignature = Presignature { + id: 1, + output: PresignOutput { + big_r: ::AffinePoint::default(), + k: ::Scalar::ZERO, + sigma: ::Scalar::ONE, + }, + participants: vec![Participant::from(1), Participant::from(2)], + }; + + // Serialize Presignature to JSON + let serialized = + serde_json::to_string(&presignature).expect("Failed to serialize Presignature"); + + // Deserialize JSON back to Presignature + let deserialized: Presignature = + serde_json::from_str(&serialized).expect("Failed to deserialize Presignature"); + + // Assert that the original and deserialized Presignature are equal + assert_eq!(presignature.id, deserialized.id); + assert_eq!(presignature.output.big_r, deserialized.output.big_r); + assert_eq!(presignature.output.k, deserialized.output.k); + assert_eq!(presignature.output.sigma, deserialized.output.sigma); + assert_eq!(presignature.participants, deserialized.participants); + } +} diff --git a/chain-signatures/node/src/protocol/signature.rs b/chain-signatures/node/src/protocol/signature.rs index e148ca107..8b42e54be 100644 --- a/chain-signatures/node/src/protocol/signature.rs +++ b/chain-signatures/node/src/protocol/signature.rs @@ -428,7 +428,7 @@ impl SignatureManager { /// 4) Depends on triples (`triple0`/`triple1`) that are unknown to the node // TODO: What if the presignature completed generation and is already spent? #[allow(clippy::too_many_arguments)] - pub fn get_or_generate( + pub async fn get_or_start_protocol( &mut self, participants: &Participants, receipt_id: ReceiptId, @@ -447,7 +447,7 @@ impl SignatureManager { match self.generators.entry(receipt_id) { Entry::Vacant(entry) => { tracing::info!(%receipt_id, me = ?self.me, presignature_id, "joining protocol to generate a new signature"); - let presignature = match presignature_manager.take(presignature_id) { + let presignature = match presignature_manager.take(presignature_id).await { Ok(presignature) => presignature, Err(err @ GenerationError::PresignatureIsGenerating(_)) => { tracing::warn!(me = ?self.me, presignature_id, "presignature is generating, can't join signature generation protocol"); @@ -481,7 +481,7 @@ impl SignatureManager { ) { Ok(generator) => generator, Err((presignature, err @ InitializationError::BadParameters(_))) => { - presignature_manager.insert_mine(presignature); + presignature_manager.insert_mine(presignature).await; tracing::warn!(%receipt_id, presignature_id, ?err, "failed to start signature generation"); return Err(GenerationError::CaitSithInitializationError(err)); } @@ -604,7 +604,7 @@ impl SignatureManager { messages } - pub fn handle_requests( + pub async fn handle_requests( &mut self, threshold: usize, stable: &Participants, @@ -626,7 +626,7 @@ impl SignatureManager { if self.failed.is_empty() && my_requests.is_empty() { None } else { - presignature_manager.take_mine() + presignature_manager.take_mine().await } } { let sig_participants = stable.intersection(&[&presignature.participants]); @@ -659,7 +659,7 @@ impl SignatureManager { continue; } - if let Some(another_presignature) = presignature_manager.take_mine() { + if let Some(another_presignature) = presignature_manager.take_mine().await { presignature = another_presignature; } else { break; @@ -689,7 +689,7 @@ impl SignatureManager { // add back the failed presignatures that were incompatible to be made into // signatures due to failures or lack of participants. for presignature in failed_presigs { - presignature_manager.insert_mine(presignature); + presignature_manager.insert_mine(presignature).await; } } diff --git a/chain-signatures/node/src/protocol/triple.rs b/chain-signatures/node/src/protocol/triple.rs index 2bcca581b..13c294181 100644 --- a/chain-signatures/node/src/protocol/triple.rs +++ b/chain-signatures/node/src/protocol/triple.rs @@ -164,28 +164,28 @@ impl TripleManager { } /// Returns the number of unspent triples available in the manager. - pub fn len(&self) -> usize { + pub fn count(&self) -> usize { self.triples.len() } /// Returns if there's any unspent triple in the manager. pub fn is_empty(&self) -> bool { - self.len() == 0 + self.count() == 0 } /// Returns the number of unspent triples assigned to this node. - pub fn my_len(&self) -> usize { + pub fn count_mine(&self) -> usize { self.mine.len() } /// Returns the number of unspent triples we will have in the manager once /// all ongoing generation protocols complete. - pub fn potential_len(&self) -> usize { - self.len() + self.generators.len() + pub fn count_potential(&self) -> usize { + self.count() + self.generators.len() } pub fn has_min_triples(&self, cfg: &ProtocolConfig) -> bool { - self.my_len() >= cfg.triple.min_triples as usize + self.count_mine() >= cfg.triple.min_triples as usize } /// Clears an entry from failed triples if that triple protocol was created more than 2 hrs ago @@ -256,11 +256,11 @@ impl TripleManager { // Stopgap to prevent too many triples in the system. This should be around min_triple*nodes*2 // for good measure so that we have enough triples to do presig generation while also maintain // the minimum number of triples where a single node can't flood the system. - if self.potential_len() >= cfg.triple.max_triples as usize { + if self.count_potential() >= cfg.triple.max_triples as usize { false } else { // We will always try to generate a new triple if we have less than the minimum - self.my_len() < cfg.triple.min_triples as usize + self.count_mine() < cfg.triple.min_triples as usize && self.introduced.len() < cfg.max_concurrent_introduction as usize && self.generators.len() < cfg.max_concurrent_generation as usize } @@ -412,7 +412,7 @@ impl TripleManager { if self.triples.contains_key(&id) || self.gc.contains_key(&id) { Ok(None) } else { - let potential_len = self.potential_len(); + let potential_len = self.count_potential(); match self.generators.entry(id) { Entry::Vacant(e) => { if potential_len >= cfg.triple.max_triples as usize { diff --git a/chain-signatures/node/src/storage/mod.rs b/chain-signatures/node/src/storage/mod.rs index 393356c03..550fe2378 100644 --- a/chain-signatures/node/src/storage/mod.rs +++ b/chain-signatures/node/src/storage/mod.rs @@ -1,3 +1,4 @@ +pub mod presignature_storage; pub mod secret_storage; pub mod triple_storage; @@ -20,6 +21,8 @@ pub struct Options { pub gcp_datastore_url: Option, #[arg(long, env("MPC_SK_SHARE_LOCAL_PATH"))] pub sk_share_local_path: Option, + #[arg(long, env("MPC_REDIS_URL"))] + pub redis_url: String, } impl Options { diff --git a/chain-signatures/node/src/storage/presignature_storage.rs b/chain-signatures/node/src/storage/presignature_storage.rs new file mode 100644 index 000000000..d1d523eb9 --- /dev/null +++ b/chain-signatures/node/src/storage/presignature_storage.rs @@ -0,0 +1,136 @@ +use std::sync::Arc; + +use anyhow::Ok; +use near_sdk::AccountId; +use redis::{Commands, Connection, FromRedisValue, RedisWrite, ToRedisArgs}; +use tokio::sync::RwLock; +use url::Url; + +use crate::protocol::presignature::{Presignature, PresignatureId}; + +type PresigResult = std::result::Result; +pub type LockRedisPresignatureStorage = Arc>; + +// Can be used to "clear" redis storage in case of a breaking change +const STORAGE_VERSION: &str = "v1"; + +pub fn init(redis_url: Url, node_account_id: &AccountId) -> RedisPresignatureStorage { + RedisPresignatureStorage::new(redis_url, node_account_id) +} + +pub struct RedisPresignatureStorage { + redis_connection: Connection, + node_account_id: AccountId, +} + +impl RedisPresignatureStorage { + fn new(redis_url: Url, node_account_id: &AccountId) -> Self { + Self { + redis_connection: redis::Client::open(redis_url.as_str()) + .expect("Failed to connect to Redis") + .get_connection() + .expect("Failed to get Redis connection"), + node_account_id: node_account_id.clone(), + } + } +} + +impl RedisPresignatureStorage { + pub fn insert(&mut self, presignature: Presignature) -> PresigResult<()> { + self.redis_connection + .hset::<&str, PresignatureId, Presignature, ()>( + &self.presignature_key(), + presignature.id, + presignature, + )?; + Ok(()) + } + + pub fn insert_mine(&mut self, presignature: Presignature) -> PresigResult<()> { + self.redis_connection + .sadd::<&str, PresignatureId, ()>(&self.mine_key(), presignature.id)?; + self.insert(presignature)?; + Ok(()) + } + + pub fn contains(&mut self, id: &PresignatureId) -> PresigResult { + let result: bool = self.redis_connection.hexists(self.presignature_key(), id)?; + Ok(result) + } + + pub fn contains_mine(&mut self, id: &PresignatureId) -> PresigResult { + let result: bool = self.redis_connection.sismember(self.mine_key(), id)?; + Ok(result) + } + + pub fn take(&mut self, id: &PresignatureId) -> PresigResult> { + let result: Option = + self.redis_connection.hget(self.presignature_key(), id)?; + match result { + Some(presignature) => { + self.redis_connection + .hdel::<&str, PresignatureId, ()>(&self.presignature_key(), *id)?; + Ok(Some(presignature)) + } + None => Ok(None), + } + } + + pub fn take_mine(&mut self) -> PresigResult> { + let id: Option = self.redis_connection.spop(self.mine_key())?; + match id { + Some(id) => self.take(&id), + None => Ok(None), + } + } + + pub fn count_all(&mut self) -> PresigResult { + let result: usize = self.redis_connection.hlen(self.presignature_key())?; + Ok(result) + } + + pub fn count_mine(&mut self) -> PresigResult { + let result: usize = self.redis_connection.scard(self.mine_key())?; + Ok(result) + } + + fn presignature_key(&self) -> String { + format!("presignatures:{}:{}", STORAGE_VERSION, self.node_account_id) + } + + fn mine_key(&self) -> String { + format!( + "presignatures_mine:{}:{}", + STORAGE_VERSION, self.node_account_id + ) + } +} + +impl ToRedisArgs for Presignature { + fn write_redis_args(&self, out: &mut W) + where + W: ?Sized + RedisWrite, + { + match serde_json::to_string(self) { + std::result::Result::Ok(json) => out.write_arg(json.as_bytes()), + Err(e) => { + tracing::error!("Failed to serialize Presignature: {}", e); + out.write_arg("failed_to_serialize".as_bytes()) + } + } + } +} + +impl FromRedisValue for Presignature { + fn from_redis_value(v: &redis::Value) -> redis::RedisResult { + let json: String = String::from_redis_value(v)?; + + serde_json::from_str(&json).map_err(|e| { + redis::RedisError::from(( + redis::ErrorKind::TypeError, + "Failed to deserialize Presignature", + e.to_string(), + )) + }) + } +} diff --git a/chain-signatures/node/src/test_utils.rs b/chain-signatures/node/src/test_utils.rs index 4771441e9..8027295f5 100644 --- a/chain-signatures/node/src/test_utils.rs +++ b/chain-signatures/node/src/test_utils.rs @@ -41,6 +41,7 @@ impl TestTripleManagers { gcp_datastore_url: Some(url.clone()), env: "triple-test".to_string(), sk_share_local_path: None, + redis_url: "redis://localhost".to_string(), // not used }; Some( GcpService::init(&account_id, &storage_options) @@ -186,8 +187,8 @@ pub async fn test_triple_generation(datastore_url: Option) { let inputs = tm.managers.into_iter().map(|m| { ( - m.my_len(), - m.len(), + m.count_mine(), + m.count(), m.generators, m.triples, m.triple_storage, diff --git a/chain-signatures/node/src/web/mod.rs b/chain-signatures/node/src/web/mod.rs index 9161b05a1..33b9e96f7 100644 --- a/chain-signatures/node/src/web/mod.rs +++ b/chain-signatures/node/src/web/mod.rs @@ -137,13 +137,13 @@ async fn state(Extension(state): Extension>) -> Result { let triple_manager_read = state.triple_manager.read().await; - let triple_potential_count = triple_manager_read.potential_len(); - let triple_count = triple_manager_read.len(); - let triple_mine_count = triple_manager_read.my_len(); + let triple_potential_count = triple_manager_read.count_potential(); + let triple_count = triple_manager_read.count(); + let triple_mine_count = triple_manager_read.count_mine(); let presignature_read = state.presignature_manager.read().await; - let presignature_count = presignature_read.len(); - let presignature_mine_count = presignature_read.my_len(); - let presignature_potential_count = presignature_read.potential_len(); + let presignature_count = presignature_read.count_all().await; + let presignature_mine_count = presignature_read.count_mine().await; + let presignature_potential_count = presignature_read.count_potential().await; let participants = state.participants.keys_vec(); Ok(Json(StateView::Running { diff --git a/infra/multichain-dev/main.tf b/infra/multichain-dev/main.tf index b864fa427..72b579fe4 100644 --- a/infra/multichain-dev/main.tf +++ b/infra/multichain-dev/main.tf @@ -11,9 +11,16 @@ module "gce-container" { container = { image = "us-east1-docker.pkg.dev/pagoda-discovery-platform-dev/multichain-public/multichain-dev:latest" - args = ["start"] port = "3000" + volumeMounts = [ + { + mountPath = "/data" + name = "host-path" + readOnly = false + } + ] + env = concat(var.static_env, [ { name = "MPC_NODE_ID" @@ -58,9 +65,22 @@ module "gce-container" { { name = "MPC_ENV", value = var.env + }, + { + name = "MPC_REDIS_URL", + value = var.redis_url } ]) } + + volumes = [ + { + name = "host-path" + hostPath = { + path = "/var/redis" + } + } + ] } resource "google_compute_address" "internal_ips" { diff --git a/infra/multichain-dev/variables.tf b/infra/multichain-dev/variables.tf index 37426e8dc..d712fc960 100644 --- a/infra/multichain-dev/variables.tf +++ b/infra/multichain-dev/variables.tf @@ -79,6 +79,11 @@ variable "env" { default = "dev" } +variable "redis_url" { + type = string + default = "redis://127.0.0.1:6379" +} + variable "static_env" { type = list(object({ name = string @@ -99,7 +104,7 @@ variable "static_env" { }, { name = "MPC_INDEXER_START_BLOCK_HEIGHT" - value = 175970237 + value = 177069137 }, { name = "AWS_DEFAULT_REGION" diff --git a/integration-tests/README.md b/integration-tests/README.md index c14113225..03a9db88d 100644 --- a/integration-tests/README.md +++ b/integration-tests/README.md @@ -7,6 +7,7 @@ Running integration tests requires you to have relayer and sandbox docker images ```BASH docker pull ghcr.io/near/os-relayer docker pull ghcr.io/near/sandbox +docker pull redis:7.0.15 ``` For M1 you may want to pull the following image instead: diff --git a/integration-tests/chain-signatures/Cargo.lock b/integration-tests/chain-signatures/Cargo.lock index e76ab76df..6f2d03ed8 100644 --- a/integration-tests/chain-signatures/Cargo.lock +++ b/integration-tests/chain-signatures/Cargo.lock @@ -182,6 +182,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arc-swap" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" + [[package]] name = "arrayvec" version = "0.7.4" @@ -1384,6 +1390,16 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "concurrent-queue" version = "2.5.0" @@ -3722,6 +3738,7 @@ dependencies = [ "once_cell", "prometheus", "rand 0.8.5", + "redis", "reqwest 0.11.27", "semver", "serde", @@ -4592,6 +4609,16 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -4614,7 +4641,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12ac428b1cb17fce6f731001d307d351ec70a6d202fc2e60f7d4c5e42d8f4f07" dependencies = [ "autocfg", - "num-bigint", + "num-bigint 0.3.3", "num-integer", "num-traits", "serde", @@ -5353,6 +5380,23 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "redis" +version = "0.27.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc6baebe319ef5e4b470f248335620098d1c2e9261e995be05f56f719ca4bdb2" +dependencies = [ + "arc-swap", + "combine", + "itoa", + "num-bigint 0.4.6", + "percent-encoding 2.3.1", + "ryu", + "sha1_smol", + "socket2 0.5.7", + "url 2.5.1", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -6172,6 +6216,12 @@ dependencies = [ "digest 0.10.7", ] +[[package]] +name = "sha1_smol" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" + [[package]] name = "sha2" version = "0.10.8" diff --git a/integration-tests/chain-signatures/src/containers.rs b/integration-tests/chain-signatures/src/containers.rs index b6b69d173..460d51d23 100644 --- a/integration-tests/chain-signatures/src/containers.rs +++ b/integration-tests/chain-signatures/src/containers.rs @@ -622,3 +622,43 @@ impl<'a> Datastore<'a> { }) } } + +pub struct Redis<'a> { + pub container: Container<'a, GenericImage>, + pub internal_address: String, + pub external_address: String, +} + +impl<'a> Redis<'a> { + const DEFAULT_REDIS_PORT: u16 = 6379; + + pub async fn run(docker_client: &'a DockerClient, network: &str) -> anyhow::Result> { + tracing::info!("Running Redis container..."); + let image = GenericImage::new("redis", "7.0.15") + .with_exposed_port(Self::DEFAULT_REDIS_PORT) + .with_wait_for(WaitFor::message_on_stdout("Ready to accept connections")); + let image: RunnableImage = image.into(); + let image = image.with_network(network); + let container = docker_client.cli.run(image); + let network_ip = docker_client + .get_network_ip_address(&container, network) + .await?; + + let external_address = format!("redis://{}:{}", network_ip, Self::DEFAULT_REDIS_PORT); + + let host_port = container.get_host_port_ipv4(Self::DEFAULT_REDIS_PORT); + let internal_address = format!("redis://127.0.0.1:{host_port}"); + + tracing::info!( + "Redis container is running. External address: {}. Internal address: {}", + external_address, + internal_address + ); + + Ok(Redis { + container, + internal_address, + external_address, + }) + } +} diff --git a/integration-tests/chain-signatures/src/lib.rs b/integration-tests/chain-signatures/src/lib.rs index 0771a0ca0..6bc2ad5b6 100644 --- a/integration-tests/chain-signatures/src/lib.rs +++ b/integration-tests/chain-signatures/src/lib.rs @@ -236,6 +236,9 @@ pub async fn setup(docker_client: &DockerClient) -> anyhow::Result> let datastore = crate::containers::Datastore::run(docker_client, docker_network, gcp_project_id).await?; + let redis_container = crate::containers::Redis::run(docker_client, docker_network).await?; + let redis_url = redis_container.internal_address.clone(); + let sk_share_local_path = "multichain-integration-secret-manager".to_string(); let storage_options = mpc_node::storage::Options { env: "local-test".to_string(), @@ -243,6 +246,7 @@ pub async fn setup(docker_client: &DockerClient) -> anyhow::Result> sk_share_secret_id: None, gcp_datastore_url: Some(datastore.local_address.clone()), sk_share_local_path: Some(sk_share_local_path), + redis_url, }; let mesh_options = mpc_node::mesh::Options { diff --git a/integration-tests/chain-signatures/tests/cases/mod.rs b/integration-tests/chain-signatures/tests/cases/mod.rs index 2cb2cad70..4a79177e7 100644 --- a/integration-tests/chain-signatures/tests/cases/mod.rs +++ b/integration-tests/chain-signatures/tests/cases/mod.rs @@ -1,19 +1,29 @@ use std::str::FromStr; +use std::sync::Arc; use crate::actions::{self, add_latency, wait_for}; use crate::with_multichain_nodes; +use cait_sith::protocol::Participant; +use cait_sith::PresignOutput; use crypto_shared::{self, derive_epsilon, derive_key, x_coordinate, ScalarExt}; +use elliptic_curve::CurveArithmetic; use integration_tests_chain_signatures::containers::{self, DockerClient}; use integration_tests_chain_signatures::MultichainConfig; use k256::elliptic_curve::point::AffineCoordinates; +use k256::Secp256k1; use mpc_contract::config::Config; use mpc_contract::update::ProposeUpdateArgs; use mpc_node::kdf::into_eth_sig; -use mpc_node::test_utils; +use mpc_node::protocol::presignature::{Presignature, PresignatureId, PresignatureManager}; +use mpc_node::storage::presignature_storage::LockRedisPresignatureStorage; use mpc_node::types::LatestBlockHeight; use mpc_node::util::NearPublicKeyExt; +use mpc_node::{storage, test_utils}; +use near_account_id::AccountId; use test_log::test; +use tokio::sync::RwLock; +use url::Url; pub mod nightly; @@ -226,6 +236,87 @@ async fn test_triples_persistence_for_deletion() -> anyhow::Result<()> { Ok(()) } +#[test(tokio::test)] +async fn test_presignature_persistence() -> anyhow::Result<()> { + let docker_client = DockerClient::default(); + let docker_network = "test-presignature-persistence"; + docker_client.create_network(docker_network).await?; + let redis = containers::Redis::run(&docker_client, docker_network).await?; + let redis_url = Url::parse(&&redis.internal_address.as_str())?; + let presignature_storage: LockRedisPresignatureStorage = Arc::new(RwLock::new( + storage::presignature_storage::init(redis_url, &AccountId::from_str("test.near").unwrap()), + )); + let mut presignature_manager = PresignatureManager::new( + Participant::from(0), + 5, + 123, + &AccountId::from_str("test.near").unwrap(), + presignature_storage, + ); + + let presignature = dummy_presignature(); + let presignature_id: PresignatureId = presignature.id; + + // Check that the storage is empty at the start + assert!(!presignature_manager.contains(&presignature_id).await); + assert!(!presignature_manager.contains_mine(&presignature_id).await); + assert_eq!(presignature_manager.count_all().await, 0); + assert_eq!(presignature_manager.count_mine().await, 0); + assert!(presignature_manager.is_empty().await); + assert_eq!(presignature_manager.count_potential().await, 0); + + presignature_manager.insert(presignature).await; + + // Check that the storage contains the foreign presignature + assert!(presignature_manager.contains(&presignature_id).await); + assert!(!presignature_manager.contains_mine(&presignature_id).await); + assert_eq!(presignature_manager.count_all().await, 1); + assert_eq!(presignature_manager.count_mine().await, 0); + assert_eq!(presignature_manager.count_potential().await, 1); + + // Take presignature and check that it is removed from the storage + presignature_manager.take(presignature_id).await.unwrap(); + assert!(!presignature_manager.contains(&presignature_id).await); + assert!(!presignature_manager.contains_mine(&presignature_id).await); + assert_eq!(presignature_manager.count_all().await, 0); + assert_eq!(presignature_manager.count_mine().await, 0); + assert_eq!(presignature_manager.count_potential().await, 0); + + let mine_presignature = dummy_presignature(); + let mine_presig_id: PresignatureId = mine_presignature.id; + + // Add mine presignature and check that it is in the storage + presignature_manager.insert_mine(mine_presignature).await; + assert!(presignature_manager.contains(&mine_presig_id).await); + assert!(presignature_manager.contains_mine(&mine_presig_id).await); + assert_eq!(presignature_manager.count_all().await, 1); + assert_eq!(presignature_manager.count_mine().await, 1); + assert_eq!(presignature_manager.count_potential().await, 1); + + // Take mine presignature and check that it is removed from the storage + presignature_manager.take_mine().await.unwrap(); + assert!(!presignature_manager.contains(&mine_presig_id).await); + assert!(!presignature_manager.contains_mine(&mine_presig_id).await); + assert_eq!(presignature_manager.count_all().await, 0); + assert_eq!(presignature_manager.count_mine().await, 0); + assert!(presignature_manager.is_empty().await); + assert_eq!(presignature_manager.count_potential().await, 0); + + Ok(()) +} + +fn dummy_presignature() -> Presignature { + Presignature { + id: 1, + output: PresignOutput { + big_r: ::AffinePoint::default(), + k: ::Scalar::ZERO, + sigma: ::Scalar::ONE, + }, + participants: vec![Participant::from(1), Participant::from(2)], + } +} + #[test(tokio::test)] async fn test_latest_block_height() -> anyhow::Result<()> { with_multichain_nodes(MultichainConfig::default(), |ctx| {