From 3673094bf31aa91099eca0dd65b4464179636303 Mon Sep 17 00:00:00 2001 From: louis Date: Fri, 16 Apr 2021 17:25:05 +0200 Subject: [PATCH 1/2] make expiration duration for pads configurable --- README.md | 28 +++++++---- cmd/metrics.go | 7 ++- cmd/purge.go | 31 +++++++----- pkg/helper/expiration.go | 89 +++++++++++++++++++++++++++++++++++ pkg/helper/expiration_test.go | 77 ++++++++++++++++++++++++++++++ pkg/helper/sort.go | 20 -------- pkg/helper/sort_test.go | 16 ------- pkg/metrics/collector.go | 6 ++- pkg/purge/purger.go | 46 +++++++++--------- 9 files changed, 235 insertions(+), 85 deletions(-) create mode 100644 pkg/helper/expiration.go create mode 100644 pkg/helper/expiration_test.go delete mode 100644 pkg/helper/sort.go delete mode 100644 pkg/helper/sort_test.go diff --git a/README.md b/README.md index 7352cc6..f3d51ef 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,8 @@ Usage: Flags: -h, --help help for metrics - --listen.addr string (default ":9012") + --listen.addr string Address on which to expose metrics. (default ":9012") + --suffixes string Suffixes to group the pads. (default "keep,temp") ``` ### Move Pad @@ -91,19 +92,28 @@ Flags: ### Purge -The command checks every Pad if the last edited date is older than the defined limit. Older Pads will be deleted. +The command checks every Pad for it’s last edited date. If it is older than the defined limit, the pad will be deleted. -Pads without any changes (revisions) will be deleted. -Pads without a suffix will be deleted after 30 days of inactivity. -Pads with the suffix "-temp" will be deleted after 24 hours of inactivity. -Pads with the suffix "-keep" will be deleted after 365 days of inactivity. +Pads without any changes (revisions) will be deleted. This can happen when no content was changed in the pad +(e.g. a person misspelles a pad). +Pads will grouped by the pre-defined suffixes. Every suffix has a defined expiration time. If the pad is older than the +defined expiration time, the pad will be deleted. + +Example: + +`etherpad-toolkit purge --expiration "default:720h,temp:24h,keep:8760h"` + +This configuration will group the pads in three clusters: default (expiration: 30 days, suffix is required!), +temp (expiration: 24 hours), keep (expiration: 365 days). If pads in the clusters older than the given expiration the +pads will be deleted. ``` Usage: etherpad-toolkit purge [flags] Flags: - --concurrency int Concurrency for the purge process (default 4) - --dry-run Enable dry-run - -h, --help help for purge + --concurrency int Concurrency for the purge process (default 4) + --dry-run Enable dry-run + --expiration string Configuration for pad expiration duration. Example: "default:720h,temp:24h,keep:8760h" + -h, --help help for purge ``` diff --git a/cmd/metrics.go b/cmd/metrics.go index adb12b1..9e1ccd5 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -2,6 +2,7 @@ package cmd import ( "net/http" + "strings" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -13,6 +14,7 @@ import ( var ( listenAddr string + suffixes string metricsCmd = &cobra.Command{ Use: "metrics", @@ -23,14 +25,15 @@ var ( ) func init() { - metricsCmd.Flags().StringVar(&listenAddr, "listen.addr", ":9012", "") + metricsCmd.Flags().StringVar(&listenAddr, "listen.addr", ":9012", "Address on which to expose metrics.") + metricsCmd.Flags().StringVar(&suffixes, "suffixes", "keep,temp", "Suffixes to group the pads.") rootCmd.AddCommand(metricsCmd) } func runMetrics(cmd *cobra.Command, args []string) { etherpad := pkg.NewEtherpadClient(etherpadUrl, etherpadApiKey) - prometheus.MustRegister(metrics.NewPadCollector(etherpad)) + prometheus.MustRegister(metrics.NewPadCollector(etherpad, strings.Split(suffixes, ","))) http.Handle("/metrics", promhttp.Handler()) log.Fatal(http.ListenAndServe(listenAddr, nil)) diff --git a/cmd/purge.go b/cmd/purge.go index dc008b1..7102b40 100644 --- a/cmd/purge.go +++ b/cmd/purge.go @@ -11,14 +11,23 @@ import ( var ( concurrency int dryRun bool + expiration string longDescription = ` -The command checks every Pad if the last edited date is older than the defined limit. Older Pads will be deleted. +The command checks every Pad for it’s last edited date. If it is older than the defined limit, the pad will be deleted. -Pads without any changes (revisions) will be deleted. -Pads without a suffix will be deleted after 30 days of inactivity. -Pads with the suffix "-temp" will be deleted after 24 hours of inactivity. -Pads with the suffix "-keep" will be deleted after 365 days of inactivity. +Pads without any changes (revisions) will be deleted. This can happen when no content was changed in the pad +(e.g. a person misspelles a pad). +Pads will grouped by the pre-defined suffixes. Every suffix has a defined expiration time. If the pad is older than the +defined expiration time, the pad will be deleted. + +Example: + +etherpad-toolkit purge --expiration "default:720h,temp:24h,keep:8760h" + +This configuration will group the pads in three clusters: default (expiration: 30 days, suffix is required!), +temp (expiration: 24 hours), keep (expiration: 365 days). If pads in the clusters older than the given expiration the +pads will be deleted. ` purgeCmd = &cobra.Command{ @@ -30,6 +39,7 @@ Pads with the suffix "-keep" will be deleted after 365 days of inactivity. ) func init() { + purgeCmd.Flags().StringVar(&expiration, "expiration", "", "Configuration for pad expiration duration. Example: \"default:720h,temp:24h,keep:8760h\"") purgeCmd.Flags().IntVar(&concurrency, "concurrency", 4, "Concurrency for the purge process") purgeCmd.Flags().BoolVar(&dryRun, "dry-run", false, "Enable dry-run") @@ -38,14 +48,11 @@ func init() { func runPurger(cmd *cobra.Command, args []string) { etherpad := pkg.NewEtherpadClient(etherpadUrl, etherpadApiKey) - purger := purge.NewPurger(etherpad, dryRun) - - pads, err := etherpad.ListAllPads() + exp, err := helper.ParsePadExpiration(expiration) if err != nil { - log.WithError(err).Error("failed to fetch pads") + log.WithError(err).Error("failed to parse expiration string") return } - sorted := helper.SortPads(pads) - - purger.PurgePads(sorted, concurrency) + purger := purge.NewPurger(etherpad, exp, dryRun) + purger.PurgePads(concurrency) } diff --git a/pkg/helper/expiration.go b/pkg/helper/expiration.go new file mode 100644 index 0000000..dacd5cb --- /dev/null +++ b/pkg/helper/expiration.go @@ -0,0 +1,89 @@ +package helper + +import ( + "errors" + "fmt" + "strings" + "time" + + log "github.com/sirupsen/logrus" +) + +const DefaultSuffix = "default" + +type PadExpiration map[string]time.Duration + +// ParsePadExpiration splits a string with format "default:30d,temp:24h,keep:365d" and returns a PadExpiration type. +// The key "default:" is mandatory in the input string. +func ParsePadExpiration(s string) (PadExpiration, error) { + exp := make(PadExpiration) + + if s == "" { + return exp, errors.New("input string is empty") + } + + for _, str := range strings.Split(s, ",") { + split := strings.Split(str, ":") + if len(split) != 2 { + log.WithField("string", str).Error("string is not valid") + continue + } + duration, err := time.ParseDuration(split[1]) + if err != nil { + log.WithError(err).WithField("duration", split[1]).Error("unable to parse the duration") + continue + } + + exp[split[0]] = duration + } + + if _, ok := exp[DefaultSuffix]; !ok { + return exp, errors.New("missing default expiration duration") + } + + return exp, nil +} + +// GetDuration tries to get the Duration by pad name, returns the default duration if no suffix matches. +func (pe *PadExpiration) GetDuration(pad string) time.Duration { + for suffix, duration := range *pe { + if strings.HasSuffix(pad, fmt.Sprintf("-%s", suffix)) { + return -duration + } + } + + return -(*pe)[DefaultSuffix] +} + +// GroupPadsByExpiration sorts pads for the given expiration and returns a map with string keys and string slices. +func GroupPadsByExpiration(pads []string, expiration PadExpiration) map[string][]string { + var suffixes []string + for suffix := range expiration { + if suffix == DefaultSuffix { + continue + } + suffixes = append(suffixes, suffix) + } + + return GroupPadsBySuffixes(pads, suffixes) +} + +// GroupPadsBySuffixes sorts pads for the given suffixes and returns a map with string keys and string slices. +func GroupPadsBySuffixes(pads, suffixes []string) map[string][]string { + sorted := make(map[string][]string) + + for _, pad := range pads { + found := false + for _, suffix := range suffixes { + if strings.HasSuffix(pad, fmt.Sprintf("-%s", suffix)) { + sorted[suffix] = append(sorted[suffix], pad) + found = true + } + } + if !found { + sorted[DefaultSuffix] = append(sorted[DefaultSuffix], pad) + } + } + + return sorted +} diff --git a/pkg/helper/expiration_test.go b/pkg/helper/expiration_test.go new file mode 100644 index 0000000..8713bc3 --- /dev/null +++ b/pkg/helper/expiration_test.go @@ -0,0 +1,77 @@ +package helper + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestParsePadExpiration(t *testing.T) { + s := "default:720h,temp:24h,keep:262800h" + exp, err := ParsePadExpiration(s) + assert.Nil(t, err) + assert.Equal(t, time.Duration(2592000000000000), exp[DefaultSuffix]) + assert.Equal(t, time.Duration(946080000000000000), exp["keep"]) + assert.Equal(t, time.Duration(86400000000000), exp["temp"]) + + s = "wrong:1d" + + _, err = ParsePadExpiration(s) + assert.Error(t, err) + assert.Equal(t, "missing default expiration duration", err.Error()) + + s = "wrong:1h:2h" + _, err = ParsePadExpiration(s) + assert.Error(t, err) + assert.Equal(t, "missing default expiration duration", err.Error()) + + s = "" + _, err = ParsePadExpiration(s) + assert.Error(t, err) + assert.Equal(t, "input string is empty", err.Error()) +} + +func TestPadExpiration_GetDuration(t *testing.T) { + s := "default:24h" + exp, err := ParsePadExpiration(s) + assert.Nil(t, err) + + dur := exp.GetDuration("pad") + assert.Equal(t, "-24h0m0s", dur.String()) + + s = "default:24h,temp:10m" + exp, err = ParsePadExpiration(s) + assert.Nil(t, err) + + dur = exp.GetDuration("pad") + assert.Equal(t, "-24h0m0s", dur.String()) + + dur = exp.GetDuration("pad-temp") + assert.Equal(t, "-10m0s", dur.String()) +} + +func TestGroupPadsByExpiration(t *testing.T) { + s := "default:720h,temp:24h,keep:262800h" + pads := []string{"pad", "pad2", "pad-keep", "pad-temp"} + exp, err := ParsePadExpiration(s) + assert.Nil(t, err) + + sorted := GroupPadsByExpiration(pads, exp) + + if _, ok := sorted[DefaultSuffix]; !ok { + t.Fail() + } + + if _, ok := sorted["keep"]; !ok { + t.Fail() + } + + if _, ok := sorted["temp"]; !ok { + t.Fail() + } + + assert.Equal(t, []string{"pad", "pad2"}, sorted[DefaultSuffix]) + assert.Equal(t, []string{"pad-keep"}, sorted["keep"]) + assert.Equal(t, []string{"pad-temp"}, sorted["temp"]) +} diff --git a/pkg/helper/sort.go b/pkg/helper/sort.go deleted file mode 100644 index 48fa289..0000000 --- a/pkg/helper/sort.go +++ /dev/null @@ -1,20 +0,0 @@ -package helper - -import "strings" - -// SortPads will put the padIds into a string map organized by their suffixes -func SortPads(padIds []string) map[string][]string { - sorted := make(map[string][]string) - - for _, pad := range padIds { - if strings.HasSuffix(pad, "-keep") { - sorted["keep"] = append(sorted["keep"], pad) - } else if strings.HasSuffix(pad, "-temp") { - sorted["temp"] = append(sorted["temp"], pad) - } else { - sorted["none"] = append(sorted["none"], pad) - } - } - - return sorted -} diff --git a/pkg/helper/sort_test.go b/pkg/helper/sort_test.go deleted file mode 100644 index ce73055..0000000 --- a/pkg/helper/sort_test.go +++ /dev/null @@ -1,16 +0,0 @@ -package helper - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestSortPads(t *testing.T) { - pads := []string{"pad", "pad-keep", "pad-temp"} - sorted := SortPads(pads) - - assert.Equal(t, []string{"pad"}, sorted["none"]) - assert.Equal(t, []string{"pad-keep"}, sorted["keep"]) - assert.Equal(t, []string{"pad-temp"}, sorted["temp"]) -} diff --git a/pkg/metrics/collector.go b/pkg/metrics/collector.go index eb2e207..e345f0c 100644 --- a/pkg/metrics/collector.go +++ b/pkg/metrics/collector.go @@ -9,12 +9,14 @@ import ( type PadCollector struct { etherpad *pkg.Etherpad + suffixes []string PadGaugeDesc *prometheus.Desc } -func NewPadCollector(etherpad *pkg.Etherpad) *PadCollector { +func NewPadCollector(etherpad *pkg.Etherpad, suffixes []string) *PadCollector { return &PadCollector{ etherpad: etherpad, + suffixes: suffixes, PadGaugeDesc: prometheus.NewDesc("etherpad_toolkit_pads", "The current number of pads", []string{"suffix"}, nil), } } @@ -30,7 +32,7 @@ func (pc *PadCollector) Collect(ch chan<- prometheus.Metric) { return } - sorted := helper.SortPads(allPads) + sorted := helper.GroupPadsBySuffixes(allPads, pc.suffixes) for suffix, pads := range sorted { ch <- prometheus.MustNewConstMetric( diff --git a/pkg/purge/purger.go b/pkg/purge/purger.go index e7189dd..89531d5 100644 --- a/pkg/purge/purger.go +++ b/pkg/purge/purger.go @@ -1,30 +1,39 @@ package purge import ( - "strings" "sync" "time" log "github.com/sirupsen/logrus" "github.com/systemli/etherpad-toolkit/pkg" + "github.com/systemli/etherpad-toolkit/pkg/helper" ) -// Purger type Purger struct { - Etherpad *pkg.Etherpad - DryRun bool + etherpad *pkg.Etherpad + expiration helper.PadExpiration + dryRun bool } // NewPurger returns a instance of Purger. -func NewPurger(ep *pkg.Etherpad, dryRun bool) *Purger { +func NewPurger(ep *pkg.Etherpad, exp helper.PadExpiration, dryRun bool) *Purger { return &Purger{ - Etherpad: ep, - DryRun: dryRun, + etherpad: ep, + expiration: exp, + dryRun: dryRun, } } // PurgePads loops over a sorted map of pads and removes pads which are not edited for some times. -func (p *Purger) PurgePads(sorted map[string][]string, concurrency int) { +func (p *Purger) PurgePads(concurrency int) { + pads, err := p.etherpad.ListAllPads() + if err != nil { + log.WithError(err).Error("failed to list all pads") + return + } + + sorted := helper.GroupPadsByExpiration(pads, p.expiration) + var wg sync.WaitGroup for suffix, padIds := range sorted { @@ -66,42 +75,31 @@ func (p *Purger) worker(pads chan string, out chan int) { for pad := range pads { log.WithField("pad", pad).Debug("Process Pad") - revisions, err := p.Etherpad.GetRevisionsCount(pad) + revisions, err := p.etherpad.GetRevisionsCount(pad) if err != nil { log.WithError(err).WithField("pad", pad).Error("failed to get last edited time") continue } - lastEdited, err := p.Etherpad.GetLastEdited(pad) + lastEdited, err := p.etherpad.GetLastEdited(pad) if err != nil { log.WithError(err).Error("") continue } - deletable := lastEdited.Before(time.Now().Add(padDuration(pad))) || revisions == 0 + deletable := lastEdited.Before(time.Now().Add(p.expiration.GetDuration(pad))) || revisions == 0 if !deletable { continue } log.WithFields(log.Fields{"pad": pad, "lastEdited": lastEdited, "revisions": revisions}).Info("Delete Pad") - if p.DryRun { + if p.dryRun { continue } - err = p.Etherpad.DeletePad(pad) + err = p.etherpad.DeletePad(pad) if err != nil { log.WithError(err).WithField("pad", pad).Error("failed to delete pad") } } out <- 0 } - -// padDuration returns the time frame in which the pad should be edited. -func padDuration(padID string) time.Duration { - if strings.HasSuffix(padID, "-keep") { - return -365 * 24 * time.Hour - } else if strings.HasSuffix(padID, "-temp") { - return -24 * time.Hour - } else { - return -30 * 24 * time.Hour - } -} From 20b4e48acd994b75bcc0e6a0ec0866a6731c781b Mon Sep 17 00:00:00 2001 From: louis Date: Sat, 17 Apr 2021 19:07:09 +0200 Subject: [PATCH 2/2] test: add testcase for purger --- pkg/purge/purger_test.go | 173 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 pkg/purge/purger_test.go diff --git a/pkg/purge/purger_test.go b/pkg/purge/purger_test.go new file mode 100644 index 0000000..fb91dc4 --- /dev/null +++ b/pkg/purge/purger_test.go @@ -0,0 +1,173 @@ +package purge + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/systemli/etherpad-toolkit/pkg" + "github.com/systemli/etherpad-toolkit/pkg/helper" +) + +var pads = map[string]struct { + Revisions int + LastEdited time.Time +}{ + "pad": { + Revisions: 30, + LastEdited: time.Now().Add(-1 * time.Hour), + }, + "pad+empty": { + Revisions: 0, + LastEdited: time.Now().Add(-1 * time.Hour), + }, + "pad+expired": { + Revisions: 1, + LastEdited: time.Now().Add(-999 * time.Hour), + }, +} + +var handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if strings.Contains(r.URL.String(), "listAllPads") { + var body struct { + Code int `json:"code"` + Message string `json:"message"` + Data struct { + PadIDs []string `json:"padIDs"` + } `json:"data"` + } + + var padIDs []string + + for padId, _ := range pads { + padIDs = append(padIDs, padId) + } + + body.Code = 0 + body.Message = "ok" + body.Data.PadIDs = padIDs + + b, err := json.Marshal(body) + if err != nil { + w.WriteHeader(500) + return + } + + w.WriteHeader(200) + w.Write(b) + return + } + + if strings.Contains(r.URL.String(), "getRevisionsCount") { + padID := r.URL.Query().Get("padID") + revisions := pads[padID].Revisions + + var body struct { + Code int `json:"code"` + Message string `json:"message"` + Data struct { + Revisions int `json:"revisions"` + } + } + + body.Code = 0 + body.Message = "ok" + body.Data.Revisions = revisions + + b, err := json.Marshal(body) + if err != nil { + w.WriteHeader(500) + return + } + + w.WriteHeader(200) + w.Write(b) + return + } + + if strings.Contains(r.URL.String(), "getLastEdited") { + padID := r.URL.Query().Get("padID") + lastEdited := pads[padID].LastEdited + + var body struct { + Code int `json:"code"` + Message string `json:"message"` + Data struct { + LastEdited int64 `json:"lastEdited"` + } `json:"data"` + } + + body.Code = 0 + body.Message = "ok" + body.Data.LastEdited = lastEdited.Unix() * 1000 + + b, err := json.Marshal(body) + if err != nil { + w.WriteHeader(500) + return + } + + w.WriteHeader(200) + w.Write(b) + return + } + + if strings.Contains(r.URL.String(), "deletePad") { + padID := r.URL.Query().Get("padID") + delete(pads, padID) + + var body struct { + Code int `json:"code"` + Message string `json:"message"` + } + + body.Code = 0 + body.Message = "ok" + + b, err := json.Marshal(body) + if err != nil { + w.WriteHeader(500) + return + } + + w.WriteHeader(200) + w.Write(b) + return + } +}) + +func TestPurger_PurgePads_DryRun(t *testing.T) { + rec := httptest.NewServer(handler) + etherpad := pkg.NewEtherpadClient(rec.URL, "") + expiration, err := helper.ParsePadExpiration("default:720h") + if err != nil { + t.Fail() + } + purger := NewPurger(etherpad, expiration, true) + + assert.Equal(t, 3, len(pads)) + + purger.PurgePads(1) + + assert.Equal(t, 3, len(pads)) +} + +func TestPurger_PurgePads(t *testing.T) { + rec := httptest.NewServer(handler) + etherpad := pkg.NewEtherpadClient(rec.URL, "") + expiration, err := helper.ParsePadExpiration("default:720h") + if err != nil { + t.Fail() + } + purger := NewPurger(etherpad, expiration, false) + + assert.Equal(t, 3, len(pads)) + + purger.PurgePads(1) + + assert.Equal(t, 1, len(pads)) +}