Skip to content

Commit

Permalink
参数 -vol=3:6 多册范围
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudw committed Apr 20, 2024
1 parent 9c90aaf commit 2012e35
Show file tree
Hide file tree
Showing 46 changed files with 703 additions and 129 deletions.
2 changes: 1 addition & 1 deletion app/berlin.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (p *Berlin) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
2 changes: 1 addition & 1 deletion app/bluk.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (p *Bluk) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
4 changes: 2 additions & 2 deletions app/cafaedu.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func (p *CafaEdu) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down Expand Up @@ -118,7 +118,7 @@ func (p *CafaEdu) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string,
if err != nil {
return nil, err
}
jsonUrl := fmt.Sprintf("https://%s/api/viewer/lgiiif?url=/srv/www/limbgallery/medias/%s/&max=%d", p.dt.UrlParsed.Host, iiifId, 1000)
jsonUrl := fmt.Sprintf("https://%s/api/viewer/lgiiif?url=/srv/www/limbgallery/medias/%s/&max=%d", p.dt.UrlParsed.Host, iiifId, 10000)
volumes = append(volumes, jsonUrl)
return volumes, err
}
Expand Down
89 changes: 67 additions & 22 deletions app/cuhk.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,21 @@ func (r *Cuhk) Init(iTask int, sUrl string) (msg string, err error) {
return "requested URL was not found.", err
}
r.dt.Jar, _ = cookiejar.New(nil)
OpenWebBrowser(sUrl, []string{})
//OpenWebBrowser(sUrl, []string{})
WaitNewCookie()
return r.download()
}

func (r *Cuhk) download() (msg string, err error) {
name := util.GenNumberSorted(r.dt.Index)
log.Printf("Get %s %s\n", name, r.dt.Url)

respVolume, err := r.getVolumes(r.dt.Url, r.dt.Jar)
if err != nil {
fmt.Println(err)
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
vid := util.GenNumberSorted(i + 1)
Expand All @@ -77,6 +77,44 @@ func (r *Cuhk) do(imgUrls []string) (msg string, err error) {
if imgUrls == nil {
return
}
if config.Conf.UseDziRs {
r.doDezoomifyRs(imgUrls)
} else {
r.doNormal(imgUrls)
}
return "", err
}

func (r *Cuhk) doDezoomifyRs(iiifUrls []string) bool {
if iiifUrls == nil {
return false
}
referer := url.QueryEscape(r.dt.Url)
size := len(iiifUrls)
for i, uri := range iiifUrls {
if uri == "" || !config.PageRange(i, size) {
continue
}
sortId := util.GenNumberSorted(i + 1)
filename := sortId + config.Conf.FileExt
dest := r.dt.SavePath + filename
if FileExist(dest) {
continue
}
log.Printf("Get %d/%d %s\n", i+1, size, uri)
cookies := gohttp.ReadCookieFile(config.Conf.CookieFile)
args := []string{"--dezoomer=deepzoom",
"-H", "Origin:" + referer,
"-H", "Referer:" + referer,
"-H", "User-Agent:" + config.Conf.UserAgent,
"-H", "cookie:" + cookies,
}
util.StartProcess(uri, dest, args)
}
return true
}

func (r *Cuhk) doNormal(imgUrls []string) {
fmt.Println()
referer := r.dt.Url
size := len(imgUrls)
Expand Down Expand Up @@ -109,22 +147,18 @@ func (r *Cuhk) do(imgUrls []string) (msg string, err error) {
if err == nil && resp.GetStatusCode() == 200 {
break
}
WaitNewCookie()
WaitNewCookieWithMsg(uri)
}
util.PrintSleepTime(config.Conf.Speed)
fmt.Println()
}
fmt.Println()
return "", err

}

func (r *Cuhk) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string, err error) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
text := string(bs)
subText := util.SubText(text, "id=\"block-islandora-compound-object-compound-navigation-select-list\"", "id=\"book-viewer\">")
bs, err := r.getBodyWithLoop(sUrl, jar)
subText := util.SubText(string(bs), "id=\"block-islandora-compound-object-compound-navigation-select-list\"", "id=\"book-viewer\">")
matches := regexp.MustCompile(`value=['"]([A-z\d:_-]+)['"]`).FindAllStringSubmatch(subText, -1)
if matches == nil {
volumes = append(volumes, sUrl)
Expand All @@ -143,10 +177,7 @@ func (r *Cuhk) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string, er
}

func (r *Cuhk) getCanvases(sUrl string, jar *cookiejar.Jar) (canvases []string, err error) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
bs, err := r.getBodyWithLoop(sUrl, jar)
var resp ResponsePage
matches := regexp.MustCompile(`"pages":([^]]+)]`).FindSubmatch(bs)
if matches == nil {
Expand All @@ -158,16 +189,33 @@ func (r *Cuhk) getCanvases(sUrl string, jar *cookiejar.Jar) (canvases []string,
}
for _, page := range resp.ImagePage {
var imgUrl string
if config.Conf.FileExt == ".jpg" {
imgUrl = fmt.Sprintf("https://%s/iiif/2/%s/%s", r.dt.UrlParsed.Host, page.Identifier, config.Conf.Format)
if config.Conf.UseDziRs {
//dezoomify-rs URL
imgUrl = fmt.Sprintf("https://%s/iiif/2/%s/info.json", r.dt.UrlParsed.Host, page.Identifier)
} else {
imgUrl = fmt.Sprintf("https://%s/islandora/object/%s/datastream/JP2", r.dt.UrlParsed.Host, page.Pid)
if config.Conf.FileExt == ".jpg" {
imgUrl = fmt.Sprintf("https://%s/iiif/2/%s/%s", r.dt.UrlParsed.Host, page.Identifier, config.Conf.Format)
} else {
imgUrl = fmt.Sprintf("https://%s/islandora/object/%s/datastream/JP2", r.dt.UrlParsed.Host, page.Pid)
}
}
canvases = append(canvases, imgUrl)
}
return canvases, err
}

func (r *Cuhk) getBodyWithLoop(sUrl string, jar *cookiejar.Jar) (bs []byte, err error) {
for i := 0; i < 1000; i++ {
bs, err = r.getBody(sUrl, jar)
if err != nil {
WaitNewCookie()
continue
}
break
}
return bs, nil
}

func (r *Cuhk) getBody(apiUrl string, jar *cookiejar.Jar) ([]byte, error) {
referer := url.QueryEscape(apiUrl)
ctx := context.Background()
Expand All @@ -191,10 +239,7 @@ func (r *Cuhk) getBody(apiUrl string, jar *cookiejar.Jar) ([]byte, error) {
}

func (r *Cuhk) getCanvasesJPEG2000(sUrl string, jar *cookiejar.Jar) (imagePage []ImagePage) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
bs, err := r.getBodyWithLoop(sUrl, jar)
var resp ResponsePage
matches := regexp.MustCompile(`"pages":([^]]+)]`).FindSubmatch(bs)
if matches != nil {
Expand Down
30 changes: 25 additions & 5 deletions app/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"net/http/cookiejar"
"net/url"
"os"
"strings"
"sync"
"time"
)
Expand Down Expand Up @@ -137,7 +138,8 @@ func FileExist(path string) bool {

func CreateDirectory(domain, bookId, volumeId string) string {
bookIdEncode := getBookId(bookId)
dirPath := config.Conf.SaveFolder + string(os.PathSeparator) + domain + "_" + bookIdEncode + string(os.PathSeparator)
domainNew := strings.ReplaceAll(domain, ":", "_")
dirPath := config.Conf.SaveFolder + string(os.PathSeparator) + domainNew + "_" + bookIdEncode + string(os.PathSeparator)
if volumeId != "" {
dirPath += "vol." + volumeId + string(os.PathSeparator)
}
Expand All @@ -161,13 +163,13 @@ func OpenWebBrowser(sUrl string, args []string) {

//WaitNewCookie
_ = os.Remove(config.Conf.CookieFile)
fmt.Println("请使用 bookget-gui 浏览器,打开图书网址,完成「真人验证 / 登录用户」,然后 「刷新」 网页.")
go func() {
defer wg.Done()
for i := 0; i < 3600; i++ {
for i := 0; i < 3600*8; i++ {
if FileExist(config.Conf.CookieFile) {
break
}
fmt.Printf("请使用 bookget-gui 浏览器,打开图书网址,完成「真人验证 / 登录用户」,然后 「刷新」 网页.\r")
time.Sleep(time.Second * 3)
}
}()
Expand All @@ -179,13 +181,31 @@ func WaitNewCookie() {
_ = os.Remove(config.Conf.CookieFile)
var wg sync.WaitGroup
wg.Add(1)
fmt.Println("请使用 bookget-gui 浏览器,打开图书网址,完成「真人验证 / 登录用户」,然后 「刷新」 网页.")
go func() {
defer wg.Done()
for i := 0; i < 3600; i++ {
for i := 0; i < 3600*8; i++ {
if FileExist(config.Conf.CookieFile) {
break
}
time.Sleep(time.Second * 3)
}
}()
wg.Wait()
}

func WaitNewCookieWithMsg(uri string) {
_ = os.Remove(config.Conf.CookieFile)
var wg sync.WaitGroup
wg.Add(1)
fmt.Println("请使用 bookget-gui 浏览器打开下面 URL,完成「真人验证 / 登录用户」,然后 「刷新」 网页.")
fmt.Println(uri)
go func() {
defer wg.Done()
for i := 0; i < 3600*8; i++ {
if FileExist(config.Conf.CookieFile) {
break
}
fmt.Printf("请使用 bookget-gui 浏览器,打开图书网址,完成「真人验证 / 登录用户」,然后 「刷新」 网页.\r")
time.Sleep(time.Second * 3)
}
}()
Expand Down
2 changes: 1 addition & 1 deletion app/emuseum.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func (p *Emuseum) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
2 changes: 1 addition & 1 deletion app/harvard.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func (p *Harvard) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
2 changes: 1 addition & 1 deletion app/hkulib.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (r *Hkulib) download() (msg string, err error) {
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
vid := util.GenNumberSorted(i + 1)
Expand Down
2 changes: 1 addition & 1 deletion app/huawen.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (r *Huawen) download() (msg string, err error) {
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
r.dt.SavePath = CreateDirectory(r.dt.UrlParsed.Host, r.dt.BookId, "")
Expand Down
2 changes: 1 addition & 1 deletion app/keio.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (r *Keio) download() (msg string, err error) {
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
vid := util.GenNumberSorted(i + 1)
Expand Down
2 changes: 1 addition & 1 deletion app/kokusho.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (p *Kokusho) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
2 changes: 1 addition & 1 deletion app/korea.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func (p *Korea) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
4 changes: 2 additions & 2 deletions app/kyoto.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (p *Kyoto) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down Expand Up @@ -86,7 +86,7 @@ func (p *Kyoto) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string, e
if err != nil {
return
}
match := regexp.MustCompile(`https://(.+)/manifest.json`).FindSubmatch(bs)
match := regexp.MustCompile(`https://(.+?)/manifest.json`).FindSubmatch(bs)
if match == nil {
return
}
Expand Down
2 changes: 1 addition & 1 deletion app/kyotou.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func (p *Kyotou) download() (msg string, err error) {
}
sizeVol := len(respVolume)
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
if sizeVol == 1 {
Expand Down
2 changes: 1 addition & 1 deletion app/kyudbsnu.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (r *KyudbSnu) download() (msg string, err error) {
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
if !config.VolumeRange(i) {
continue
}
r.dt.SavePath = CreateDirectory(r.dt.UrlParsed.Host, r.dt.BookId, vol)
Expand Down
Loading

0 comments on commit 2012e35

Please sign in to comment.