From c89de5edf22b9b6c2e01f18e870aa879595642d5 Mon Sep 17 00:00:00 2001 From: Yufan Sheng Date: Wed, 16 Nov 2022 20:47:07 +0800 Subject: [PATCH] fix: chinese character in zip file. --- go.mod | 2 +- go.sum | 4 +- internal/client/client.go | 2 +- internal/fetcher/fetcher.go | 3 +- internal/fetcher/wordpress.go | 2 +- internal/unzip/unzip.go | 152 ++++++++++++++++++++++++++++++++++ 6 files changed, 158 insertions(+), 7 deletions(-) create mode 100644 internal/unzip/unzip.go diff --git a/go.mod b/go.mod index 40e9ffd..b5a2fcd 100644 --- a/go.mod +++ b/go.mod @@ -16,10 +16,10 @@ require ( github.com/spf13/cobra v1.6.1 github.com/stretchr/testify v1.8.1 github.com/tickstep/cloudpan189-api v0.0.9 - github.com/yi-ge/unzip v1.0.2 go.uber.org/ratelimit v0.2.0 golang.org/x/net v0.2.0 golang.org/x/term v0.2.0 + golang.org/x/text v0.4.0 ) require ( diff --git a/go.sum b/go.sum index 7e6fafa..d6cac02 100644 --- a/go.sum +++ b/go.sum @@ -138,8 +138,6 @@ github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= github.com/ugorji/go v1.1.7/go.mod h1:kZn38zHttfInRq0xu/PH0az30d+z6vm202qpg1oXVMw= github.com/ugorji/go/codec v1.1.7 h1:2SvQaVZ1ouYrrKKwoSk2pzd4A9evlKJb9oTL+OaLUSs= github.com/ugorji/go/codec v1.1.7/go.mod h1:Ax+UKWsSmolVDwsd+7N3ZtXu+yMGCf907BLYF3GoBXY= -github.com/yi-ge/unzip v1.0.2 h1:Mz1+LxvTNV78eWDSyYOitiB+wNS7aSGzDj8KG0SeTaA= -github.com/yi-ge/unzip v1.0.2/go.mod h1:CdcCDDd6KIIr9DK3TaaliK0hJzzTlw9WypEcTcdlYKI= go.opentelemetry.io/otel v1.11.1 h1:4WLLAmcfkmDk2ukNXJyq3/kiz/3UzCaYq6PskJsaou4= go.opentelemetry.io/otel v1.11.1/go.mod h1:1nNhXBbWSD0nsL38H6btgnFN2k4i0sNLHNNMZMSbUGE= go.opentelemetry.io/otel/trace v1.11.1 h1:ofxdnzsNrGBYXbP7t7zpUK281+go5rF7dvdIZXF8gdQ= @@ -185,6 +183,8 @@ golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg= +golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/internal/client/client.go b/internal/client/client.go index fdca094..5bdd3ff 100644 --- a/internal/client/client.go +++ b/internal/client/client.go @@ -151,7 +151,7 @@ func New(c *Config) (*Client, error) { SetRetryWaitTime(3*time.Second). SetRetryMaxWaitTime(10*time.Second). SetAllowGetMethodPayload(true). - SetTimeout(1*time.Minute). + SetTimeout(5*time.Minute). SetContentLength(true). SetDebug(log.EnableDebug). SetDisableWarn(true). diff --git a/internal/fetcher/fetcher.go b/internal/fetcher/fetcher.go index 62fc699..fbf8056 100644 --- a/internal/fetcher/fetcher.go +++ b/internal/fetcher/fetcher.go @@ -5,12 +5,11 @@ import ( "path/filepath" "sync" - "github.com/yi-ge/unzip" - "github.com/bookstairs/bookhunter/internal/driver" "github.com/bookstairs/bookhunter/internal/file" "github.com/bookstairs/bookhunter/internal/log" "github.com/bookstairs/bookhunter/internal/progress" + "github.com/bookstairs/bookhunter/internal/unzip" ) const ( diff --git a/internal/fetcher/wordpress.go b/internal/fetcher/wordpress.go index b1daf16..d569970 100644 --- a/internal/fetcher/wordpress.go +++ b/internal/fetcher/wordpress.go @@ -121,7 +121,7 @@ func (w *wordpressService) formats(id int64) (map[Format]driver.Share, error) { return map[Format]driver.Share{}, nil } -func (w *wordpressService) fetch(i int64, format Format, share driver.Share, writer file.Writer) error { +func (w *wordpressService) fetch(_ int64, _ Format, share driver.Share, writer file.Writer) error { content, size, err := w.driver.Download(share) if err != nil { return err diff --git a/internal/unzip/unzip.go b/internal/unzip/unzip.go new file mode 100644 index 0000000..5b9a5c9 --- /dev/null +++ b/internal/unzip/unzip.go @@ -0,0 +1,152 @@ +package unzip + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "golang.org/x/text/encoding/simplifiedchinese" + "golang.org/x/text/transform" +) + +// Encoding to use. Since this implements the encoding.Encoding +// interface from golang.org/x/text/encoding you can trivially +// change this out for any of the other implemented encoders, +// e.g. `traditionalchinese.Big5`, `charmap.Windows1252`, +// `korean.EUCKR`, etc. +var encoding = simplifiedchinese.GB18030 + +// Unzip - struct +type Unzip struct { + Src string + Dest string +} + +// New - Create a new Unzip. +func New(src string, dest string) Unzip { + return Unzip{src, dest} +} + +// Extract - Extract zip file. +func (uz Unzip) Extract() error { + r, err := zip.OpenReader(uz.Src) + if err != nil { + return err + } + defer func() { + if err := r.Close(); err != nil { + panic(err) + } + }() + + _ = os.MkdirAll(uz.Dest, 0755) + + for _, f := range r.File { + err := uz.extractAndWriteFile(f) + if err != nil { + return err + } + } + + return nil +} + +// Closure to address file descriptors issue with all the deferred Close() methods. +func (uz Unzip) extractAndWriteFile(f *zip.File) error { + rc, err := f.Open() + if err != nil { + return err + } + defer func() { + if err := rc.Close(); err != nil { + panic(err) + } + }() + + path, err := sanitizeArchivePath(uz.Dest, encodingFilename(f.Name)) + if err != nil { + return err + } + + if !strings.HasPrefix(path, filepath.Clean(uz.Dest)+string(os.PathSeparator)) { + return fmt.Errorf("%s: illegal file path", path) + } + + if f.FileInfo().IsDir() { + _ = os.MkdirAll(path, f.Mode()) + } else { + mode := f.FileHeader.Mode() + if mode&os.ModeType == os.ModeSymlink { + data, err := io.ReadAll(rc) + if err != nil { + return err + } + _ = writeSymbolicLink(path, string(data)) + } else { + _ = os.MkdirAll(filepath.Dir(path), f.Mode()) + outFile, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return err + } + defer func() { + if err := outFile.Close(); err != nil { + panic(err) + } + }() + + // G110: Potential DoS vulnerability via decompression bomb. + for { + _, err := io.CopyN(outFile, rc, 1024) + if err != nil { + if err == io.EOF { + break + } + return err + } + } + } + } + + return nil +} + +func writeSymbolicLink(filePath string, targetPath string) error { + err := os.MkdirAll(filepath.Dir(filePath), 0755) + if err != nil { + return err + } + + err = os.Symlink(targetPath, filePath) + if err != nil { + return err + } + + return nil +} + +// sanitizeArchivePath sanitize archive file pathing from "G305: Zip Slip vulnerability" +func sanitizeArchivePath(d, t string) (v string, err error) { + v = filepath.Join(d, t) + if strings.HasPrefix(v, filepath.Clean(d)) { + return v, nil + } + + return "", fmt.Errorf("%s: %s", "content filepath is tainted", t) +} + +// encodingFilename will convert the GBK into UTF-8 +func encodingFilename(name string) string { + i := bytes.NewReader([]byte(name)) + decoder := transform.NewReader(i, encoding.NewDecoder()) + content, err := io.ReadAll(decoder) + if err != nil { + // Fallback to default UTF-8 encoding + return name + } else { + return string(content) + } +}