Skip to content

Commit

Permalink
[新增]日本龙谷大学图书馆
Browse files Browse the repository at this point in the history
  • Loading branch information
zhudw committed Jul 12, 2023
1 parent 3995f65 commit cfb3624
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 2 deletions.
234 changes: 234 additions & 0 deletions app/ryukoku.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
package app

import (
"bookget/config"
"bookget/lib/gohttp"
"bookget/lib/util"
"context"
"encoding/json"
"errors"
"fmt"
"log"
"net/http/cookiejar"
"net/url"
"os"
"regexp"
"sync"
)

type Ryukoku struct {
dt *DownloadTask
}

func (r *Ryukoku) Init(iTask int, sUrl string) (msg string, err error) {
r.dt = new(DownloadTask)
r.dt.UrlParsed, err = url.Parse(sUrl)
r.dt.Url = sUrl
r.dt.Index = iTask
r.dt.BookId = r.getBookId(r.dt.Url)
if r.dt.BookId == "" {
return "requested URL was not found.", err
}
r.dt.Jar, _ = cookiejar.New(nil)
return r.download()
}

func (r *Ryukoku) getBookId(sUrl string) (bookId string) {
m := regexp.MustCompile(`/page/([A-Za-z0-9]+)`).FindStringSubmatch(sUrl)
if m != nil {
bookId = m[1]
}
return bookId
}

func (r *Ryukoku) download() (msg string, err error) {
name := util.GenNumberSorted(r.dt.Index)
log.Printf("Get %s %s\n", name, r.dt.Url)

respVolume, err := r.getVolumes(r.dt.Url, r.dt.Jar)
if err != nil {
fmt.Println(err)
return "getVolumes", err
}
for i, vol := range respVolume {
if config.Conf.Volume > 0 && config.Conf.Volume != i+1 {
continue
}
vid := util.GenNumberSorted(i + 1)
r.dt.VolumeId = r.dt.BookId + "_vol." + vid
r.dt.SavePath = config.CreateDirectory(r.dt.Url, r.dt.VolumeId)
canvases, err := r.getCanvases(vol, r.dt.Jar)
if err != nil || canvases == nil {
fmt.Println(err)
continue
}
log.Printf(" %d/%d volume, %d pages \n", i+1, len(respVolume), len(canvases))
r.do(canvases)
}
return "", nil
}

func (r *Ryukoku) do(imgUrls []string) (msg string, err error) {
if config.Conf.UseDziRs {
r.doDezoomifyRs(imgUrls)
} else {
r.doNormal(imgUrls)
}
return "", nil
}

func (r *Ryukoku) doDezoomifyRs(iiifUrls []string) bool {
if iiifUrls == nil {
return false
}
referer := url.QueryEscape(r.dt.Url)
args := []string{
"-H", "Origin:" + referer,
"-H", "Referer:" + referer,
"-H", "User-Agent:" + config.Conf.UserAgent,
}
size := len(iiifUrls)
for i, uri := range iiifUrls {
if uri == "" || !config.PageRange(i, size) {
continue
}
sortId := util.GenNumberSorted(i + 1)
filename := sortId + config.Conf.FileExt
dest := r.dt.SavePath + string(os.PathSeparator) + filename
if FileExist(dest) {
continue
}
log.Printf("Get %d/%d %s\n", i+1, size, uri)
util.StartProcess(uri, dest, args)
}
return true
}

func (r *Ryukoku) doNormal(imgUrls []string) bool {
if imgUrls == nil {
return false
}
size := len(imgUrls)
fmt.Println()
var wg sync.WaitGroup
q := QueueNew(int(config.Conf.Threads))
for i, uri := range imgUrls {
if uri == "" || !config.PageRange(i, size) {
continue
}
ext := util.FileExt(uri)
sortId := util.GenNumberSorted(i + 1)
filename := sortId + ext
dest := r.dt.SavePath + string(os.PathSeparator) + filename
if FileExist(dest) {
continue
}
log.Printf("Get %d/%d %s\n", i+1, size, uri)
wg.Add(1)
q.Go(func() {
defer wg.Done()
ctx := context.Background()
opts := gohttp.Options{
DestFile: dest,
Overwrite: false,
Concurrency: 1,
CookieFile: config.Conf.CookieFile,
CookieJar: r.dt.Jar,
Headers: map[string]interface{}{
"User-Agent": config.Conf.UserAgent,
},
}
_, err := gohttp.FastGet(ctx, uri, opts)
if err != nil {
fmt.Println(err)
}
fmt.Println()
})
}
wg.Wait()
fmt.Println()
return true
}

func (r *Ryukoku) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string, err error) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
//text := util.SubText(string(bs), "<div id=\"linkbox\">", "<div id=\"bottombox\">")
matches := regexp.MustCompile(`href="/view/([A-z0-9]+)/([A-z0-9]+)"`).FindAllStringSubmatch(string(bs), -1)
if matches == nil {
return
}

for i, m := range matches {
if i == 1 {
continue
}
jsonUrl := fmt.Sprintf("https://%s/iiif/%s/%s/manifest.json", r.dt.UrlParsed.Host, m[1], m[2])
volumes = append(volumes, jsonUrl)
}
return volumes, nil
}

func (r *Ryukoku) getCanvases(sUrl string, jar *cookiejar.Jar) (canvases []string, err error) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
var manifest = new(ResponseManifest)
if err = json.Unmarshal(bs, manifest); err != nil {
log.Printf("json.Unmarshal failed: %s\n", err)
return
}
if len(manifest.Sequences) == 0 {
return
}
newWidth := ""
//>6400使用原图
if config.Conf.FullImageWidth > 6400 {
newWidth = "full/full"
} else if config.Conf.FullImageWidth >= 1000 {
newWidth = fmt.Sprintf("full/%d,", config.Conf.FullImageWidth)
}

size := len(manifest.Sequences[0].Canvases)
canvases = make([]string, 0, size)
for _, canvase := range manifest.Sequences[0].Canvases {
for _, image := range canvase.Images {
if config.Conf.UseDziRs {
//iifUrl, _ := url.QueryUnescape(image.Resource.Service.Id)
//dezoomify-rs URL
iiiInfo := fmt.Sprintf("%s/info.json", image.Resource.Service.Id)
canvases = append(canvases, iiiInfo)
} else {
//JPEG URL
imgUrl := fmt.Sprintf("%s/%s/0/default.jpg", image.Resource.Service.Id, newWidth)
canvases = append(canvases, imgUrl)
}
}
}
return canvases, nil
}

func (r *Ryukoku) getBody(apiUrl string, jar *cookiejar.Jar) ([]byte, error) {
referer := url.QueryEscape(apiUrl)
ctx := context.Background()
cli := gohttp.NewClient(ctx, gohttp.Options{
CookieFile: config.Conf.CookieFile,
CookieJar: jar,
Headers: map[string]interface{}{
"User-Agent": config.Conf.UserAgent,
"Referer": referer,
},
})
resp, err := cli.Get(apiUrl)
if err != nil {
return nil, err
}
bs, _ := resp.GetBody()
if resp.GetStatusCode() != 200 || bs == nil {
return nil, errors.New(fmt.Sprintf("ErrCode:%d, %s", resp.GetStatusCode(), resp.GetReasonPhrase()))
}
return bs, nil
}
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ver="1.1.8"
ver="1.1.9"

mkdir -p target/bookget-${ver}.linux/
mkdir -p target/bookget-${ver}.macOS/
Expand Down
2 changes: 1 addition & 1 deletion config/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
var Conf Input
var UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50"

const version = "1.1.8"
const version = "1.1.9"

// initSeq false = 最小值 <= 当前页码 <= 最大值
func initSeq() {
Expand Down
2 changes: 2 additions & 0 deletions router/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ func FactoryRouter(siteID string, sUrl []string) (map[string]interface{}, error)
//[日本]市立米泽图书馆
Router["www.library.yonezawa.yamagata.jp"] = new(LibYonezawa)
Router["webarchives.tnm.jp"] = new(WebarchivesTnm)
//[日本]龙谷大学
Router["da.library.ryukoku.ac.jp"] = new(Ryukoku)
//}}} -----------------------------------------------------------------

//{{{---------------美国、欧洲--------------------------------------------------
Expand Down
10 changes: 10 additions & 0 deletions router/japan.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,13 @@ func (p Waseda) getRouterInit(sUrl []string) (map[string]interface{}, error) {
}
return nil, nil
}

type Ryukoku struct{}

func (p Ryukoku) getRouterInit(sUrl []string) (map[string]interface{}, error) {
for i, s := range sUrl {
var ryukoku app.Ryukoku
ryukoku.Init(i+1, s)
}
return nil, nil
}

0 comments on commit cfb3624

Please sign in to comment.