Skip to content

Commit

Permalink
Merge pull request #4 from DEXPRO-Solutions-GmbH/feat/sqz-benchmark
Browse files Browse the repository at this point in the history
Add "squeeze bench" command
  • Loading branch information
fabiante authored Aug 14, 2024
2 parents 8e5a199 + 404ede7 commit 6effad2
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 11 deletions.
6 changes: 5 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package cmd

import "github.com/spf13/cobra"
import (
"github.com/DEXPRO-Solutions-GmbH/xd/cmd/squeeze"
"github.com/spf13/cobra"
)

func NewRootCmd() *cobra.Command {
cmd := &cobra.Command{
Expand All @@ -9,6 +12,7 @@ func NewRootCmd() *cobra.Command {
}

cmd.AddCommand(newGenCmd())
cmd.AddCommand(squeeze.NewRootCmd())

return cmd
}
167 changes: 167 additions & 0 deletions cmd/squeeze/benchmark.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
package squeeze

import (
"context"
"fmt"
"io/fs"
"log/slog"
"os"
"path/filepath"
"strings"
"time"

squeezegoclient "github.com/dexpro-solutions-gmbh/squeeze-go-client"
"github.com/spf13/cobra"
)

func newBenchmarkCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "bench",
}

flags := cmd.Flags()

sqzKey := flags.String("sqz-key", "", "Squeeze API key")
sqzBasePath := flags.String("sqz-base-path", "http://squeeze.docker.localhost/api/v2", "Squeeze API base path")

dataDir := flags.String("data-dir", "", "Directory containing files to be uploaded")
delay := flags.Duration("delay", 5*time.Second, "How long should the tool wait before starting the benchmark?")
pollInterval := flags.Duration("poll-interval", 5*time.Second, "How often should the tool check for completion of the benchmark?")
timeout := flags.Duration("timeout", 30*time.Minute, "When should the benchmark be considered failed if it hasn't completed yet?")

cmd.Run = func(cmd *cobra.Command, _ []string) {
ctx := cmd.Context()

ctx, cancel := context.WithTimeout(ctx, *timeout)
defer cancel()

slog.Info("Setting up API client", "basePath", *sqzBasePath)
time.Sleep(time.Second) // Give users some time to abort if wrong URL was set.

client := squeezegoclient.NewClient(*sqzBasePath)
client.ApiKey = *sqzKey

documentCount := 0

// Get initial count in validation step so we can later check if everything reached the validation
// This also ensures the API credentials are valid.
initialStepCount, err := getStepCount(client.Queue, "Validation")
if err != nil {
panic(err)
}

slog.Info("Initial Validation step Count", "count", initialStepCount)

err = filepath.WalkDir(*dataDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}

keep := filterFile(path, d)
if !keep {
return nil
}

file, err := os.OpenFile(path, os.O_RDONLY, 0)
if err != nil {
return err
}
defer func(file *os.File) {
err := file.Close()
if err != nil {
slog.Error("Closing path failed", "err", err)
}
}(file)

fileName := filepath.Base(path)

_, resErr := client.Document.ProcessDocument(1, 0, "", nil, file, fileName)
if resErr != nil {
// Ignore the upload error and continue with next file - this allows the benchmark
// to still be run. That is most often what we want since Squeeze may decide
// to reject uploads of invalid files / file types.
slog.Error("Failed to upload document", "path", fileName, "err", resErr)
return nil
}

slog.Info("File uploaded", "path", fileName)

documentCount += 1

return nil
})
if err != nil {
panic(err)
}

if documentCount == 0 {
slog.Error("No files uploaded, stopping benchmark")
return
}

slog.Info("All files uploaded", "count", documentCount)

slog.Info("Waiting you can start the worker to process the document", "duration", *delay)

time.Sleep(*delay)

slog.Info("Timer started")
start := time.Now()

for {
// Check if all documents have been processed
count, err := getStepCount(client.Queue, "Validation")
if err != nil {
panic(err)
}

doneCount := count - initialStepCount

if doneCount == documentCount {
break
}

slog.Debug("Validation count", "done", doneCount)

select {
case <-ctx.Done():
panic(ctx.Err())
case <-time.After(*pollInterval):
// Continue with next iteration
}
}

elapsed := time.Since(start)
slog.Info("All documents processed", "elapsed", elapsed)
}

return cmd
}

// filterFile is used to filter files which should not be uploaded for a benchmark.
//
// This function respects common file types which would not be processed by Squeeze
// and should generally not be uploaded.
//
// Returns true if the file should be uploaded, false otherwise.
func filterFile(_ string, d fs.DirEntry) bool {
if d.IsDir() {
return false
}

// Ignore all hidden files (at least on Unix like systems where . prefix is used)
if strings.HasPrefix(d.Name(), ".") {
return false
}

return true
}

func getStepCount(client *squeezegoclient.QueueApi, stepName string) (int, error) {
step, err := client.GetQueueStep(stepName)
if err != nil {
return 0, fmt.Errorf("failed to get step count: %w", err)
}

return step.Count, nil
}
80 changes: 80 additions & 0 deletions cmd/squeeze/benchmark_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package squeeze

import (
"io/fs"
"testing"

"github.com/stretchr/testify/require"
)

type mockDirEntry struct {
name string
dir bool
}

func (m mockDirEntry) Name() string {
return m.name
}

func (m mockDirEntry) IsDir() bool {
return m.dir
}

func (m mockDirEntry) Type() fs.FileMode {
panic("not implemented")
}

func (m mockDirEntry) Info() (fs.FileInfo, error) {
panic("not implemented")
}

func Test_filterFile(t *testing.T) {
type test struct {
name string
path string
entry mockDirEntry
expected bool
}

tests := []test{
{
name: "valid pdf",
path: "invoice.pdf",
entry: mockDirEntry{
name: "invoice.pdf",
},
expected: true,
},
{
name: "valid xml",
path: "invoice.xml",
entry: mockDirEntry{
name: "invoice.xml",
},
expected: true,
},
{
name: "hidden path",
path: ".env",
entry: mockDirEntry{
name: ".env",
},
expected: false,
},
{
name: "directory",
path: "my-dir",
entry: mockDirEntry{
name: "my-dir",
dir: true,
},
expected: false,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
require.Equal(t, test.expected, filterFile(test.path, test.entry))
})
}
}
13 changes: 13 additions & 0 deletions cmd/squeeze/root.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package squeeze

import "github.com/spf13/cobra"

func NewRootCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "squeeze",
}

cmd.AddCommand(newBenchmarkCmd())

return cmd
}
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@ module github.com/DEXPRO-Solutions-GmbH/xd
go 1.21.4

require (
github.com/dexpro-solutions-gmbh/squeeze-go-client v0.0.0-20221218135945-bb2e6f6c7ee4
github.com/google/uuid v1.5.0
github.com/spf13/cobra v1.8.0
github.com/stretchr/testify v1.8.4
)

require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.0 // indirect
github.com/stretchr/testify v1.8.4 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
11 changes: 3 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dexpro-solutions-gmbh/squeeze-go-client v0.0.0-20221218135945-bb2e6f6c7ee4 h1:dv+rT0sIha+bk+Ggta49CTsEiLdu8888U003pTjVTH0=
github.com/dexpro-solutions-gmbh/squeeze-go-client v0.0.0-20221218135945-bb2e6f6c7ee4/go.mod h1:P/fmG65Y2vIWDfv1EyRk1/gEKZJmZo8h6DsWs01sJzI=
github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU=
github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
Expand All @@ -13,15 +14,9 @@ github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0=
github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

0 comments on commit 6effad2

Please sign in to comment.