Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
deweizhu committed Oct 15, 2024
0 parents commit ad16b8c
Show file tree
Hide file tree
Showing 180 changed files with 19,992 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# This workflow will build a golang project
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go

name: Go

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

jobs:

build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Set up Go
uses: actions/setup-go@v3
with:
go-version: 1.23

- name: Build
run: go build -v ./...
#
# - name: Test
# run: go test -v ./...
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Default ignored files
/target/
/dist/
*.exe
/.idea/
.fleet/
*.xml
*.7z
*.bak
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# 簡介

bookget 数字古籍图书下载工具,已支持约 50+ 个数字图书馆。 截止 2024/01/16 日,已停止更新。

### 使用説明
1. 打开 [https://github.com/deweizhu/bookget/releases](https://github.com/deweizhu/bookget/releases/latest) 下载最新版。
1. [必读]使用手册wiki https://github.com/deweizhu/bookget/wiki
1. 此项目代码仅供学习研究使用,欢迎有能力的朋友git clone 代码二次开发维护您自己的版本。

#### 源碼編譯
从源码构建,仅对计算机程序员参考。普通用户可直接跳过阅读。
阅读 [golang 官方文档](https://golang.google.cn/doc/install) ,给您的电脑安装 golang 开发环境。
```shell
git clone --depth=1 https://github.com/deweizhu/bookget.git
cd bookget
go build .
```

- For Win可用环境:windows 10 x64 / windows 11 (自2024/01/12日起,引入bookget-gui只适用于Win10/11 x64系统)。

# 新書推薦

本書即以清代避諱為例,試探如何運用融合數位與傳統的e考據之法,重新定義避諱學2.0該有的研究模式。這將是治清代的文史工作者案頭不可或缺的一本專著。[^1]
- [清代避諱研究](https://gpi.culture.tw/books/1011300273)
- 作者: 黃一農 / 黄一农
- 出版社: 台灣清華大學出版社
- 副标题: e考據的學術實踐
- 出版年: 2024-4
- ISBN: 9786269724987
[^1]:[https://gpi.culture.tw/books/1011300273](https://gpi.culture.tw/books/1011300273)

### 第三方社區/網站
- 书格 https://www.shuge.org
- Arch Linux AUR https://aur.archlinux.org/packages/bookget-git


134 changes: 134 additions & 0 deletions app/berkeley.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
package app

import (
"bookget/config"
"bookget/lib/gohttp"
"bookget/lib/util"
"context"
"errors"
"fmt"
"log"
"net/http/cookiejar"
"net/url"
"path/filepath"
"regexp"
)

type Berkeley struct {
dt *DownloadTask
}

func (r *Berkeley) Init(iTask int, sUrl string) (msg string, err error) {
r.dt = new(DownloadTask)
r.dt.UrlParsed, err = url.Parse(sUrl)
r.dt.Url = sUrl
r.dt.Index = iTask
r.dt.BookId = r.getBookId(r.dt.Url)
if r.dt.BookId == "" {
return "requested URL was not found.", err
}
r.dt.Jar, _ = cookiejar.New(nil)
return r.download()
}

func (r *Berkeley) getBookId(sUrl string) (bookId string) {
m := regexp.MustCompile(`(?i)record/([A-z0-9_-]+)`).FindStringSubmatch(sUrl)
if m != nil {
bookId = m[1]
}
return bookId
}

func (r *Berkeley) download() (msg string, err error) {
name := util.GenNumberSorted(r.dt.Index)
log.Printf("Get %s %s\n", name, r.dt.Url)

r.dt.SavePath = CreateDirectory(r.dt.UrlParsed.Host, r.dt.BookId, "")
canvases, err := r.getCanvases(r.dt.Url, r.dt.Jar)
if err != nil || canvases == nil {
return "requested URL was not found.", err
}
log.Printf(" %d files \n", len(canvases))
r.do(canvases)
return "", nil
}

func (r *Berkeley) do(canvases []string) (msg string, err error) {
if canvases == nil {
return
}
fmt.Println()
referer := r.dt.Url
size := len(canvases)
for i, dUrl := range canvases {
if dUrl == "" || !config.PageRange(i, size) {
continue
}
sortId := util.GenNumberSorted(i + 1)
ext := filepath.Ext(dUrl)
filename := sortId + ext
dest := r.dt.SavePath + filename
if FileExist(dest) {
continue
}
log.Printf("Get %d/%d, URL: %s\n", i+1, size, dUrl)
ctx := context.Background()
opts := gohttp.Options{
DestFile: dest,
Overwrite: false,
Concurrency: 1,
CookieFile: config.Conf.CookieFile,
CookieJar: r.dt.Jar,
Headers: map[string]interface{}{
"User-Agent": config.Conf.UserAgent,
"Referer": referer,
},
}
gohttp.FastGet(ctx, dUrl, opts)
fmt.Println()
}
return "", err
}

func (r *Berkeley) getVolumes(sUrl string, jar *cookiejar.Jar) (volumes []string, err error) {
//TODO implement me
panic("implement me")
}

func (r *Berkeley) getCanvases(sUrl string, jar *cookiejar.Jar) (canvases []string, err error) {
bs, err := r.getBody(sUrl, jar)
if err != nil {
return
}
matches := regexp.MustCompile(`value="https://([^"]+)\.pdf"`).FindAllSubmatch(bs, -1)
if matches == nil {
return nil, errors.New("not match")
}
for _, match := range matches {
pdfUrl := "https://" + string(match[1]) + ".pdf"
canvases = append(canvases, pdfUrl)
}
return
}

func (r *Berkeley) getBody(apiUrl string, jar *cookiejar.Jar) ([]byte, error) {
referer := url.QueryEscape(apiUrl)
ctx := context.Background()
cli := gohttp.NewClient(ctx, gohttp.Options{
CookieFile: config.Conf.CookieFile,
CookieJar: jar,
Headers: map[string]interface{}{
"User-Agent": config.Conf.UserAgent,
"Referer": referer,
},
})
resp, err := cli.Get(apiUrl)
if err != nil {
return nil, err
}
bs, _ := resp.GetBody()
if resp.GetStatusCode() != 200 || bs == nil {
return nil, errors.New(fmt.Sprintf("ErrCode:%d, %s", resp.GetStatusCode(), resp.GetReasonPhrase()))
}
return bs, nil
}
Empty file added app/berkeley.go:Zone.Identifier
Empty file.
Loading

0 comments on commit ad16b8c

Please sign in to comment.