-
Notifications
You must be signed in to change notification settings - Fork 0
/
extractor.js
executable file
·100 lines (88 loc) · 2.22 KB
/
extractor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env node
import { execFile } from 'child_process';
import util from 'util';
import { globby } from 'globby';
import yargs from 'yargs';
import { hideBin } from 'yargs/helpers';
const aExecFile = util.promisify(execFile);
const { argv } = yargs(hideBin(process.argv))
.option('source', {
string: true,
alias: 's',
describe: 'source path pattern specification',
})
.option('regex', {
string: true,
alias: 'r',
describe: 'search regular expression',
})
.option('verbosity', {
number: true,
alias: 'v',
describe: 'logging level (0 is quiet)',
})
.demandOption(['source', 'regex'], 'Please specify a source path pattern and a search regular expresion.')
.default('verbosity', 1)
.help();
const quietLogger = {
errMessage(mess) {
// eslint-disable-next-line no-console
console.error(mess);
},
message() {},
debugMessage() {},
};
const normalLogger = {
...quietLogger,
message(mess) {
// eslint-disable-next-line no-console
console.log(mess);
},
};
const verboseLogger = {
...normalLogger,
debugMessage(mess) {
// eslint-disable-next-line no-console
console.log(mess);
},
};
const logger = ((level) => {
switch (level) {
case 2:
return verboseLogger;
case 0:
return quietLogger;
default:
return normalLogger;
}
})(argv.verbosity);
async function processFile(file, exp) {
const { stdout } = await aExecFile('pdftotext', [file, '-'], { maxBuffer: 67108864 });
const matched = stdout.matchAll(exp);
if (matched) {
const matchlist = Array.from(matched, (m) => m[0]).join('\n');
logger.message(`${file} . . . .\n${matchlist}`);
} else {
logger.debugMessage(` (No matches in ${file})`);
}
}
const main = async () => {
const src = argv.source;
const exp = new RegExp(argv.regex, 'g');
logger.debugMessage(`Running with ${src} | ${exp}`);
try {
const pdfs = await globby([src, '!._*']);
logger.debugMessage('Starting search------------------');
await Promise.all(pdfs.map((file) => processFile(file, exp)));
} catch (error) {
logger.errMessage(error);
process.exit(1);
}
};
main()
.catch(
(error) => {
logger.errMessage(error);
process.exit(1);
},
);