Skip to content

Commit

Permalink
Merge pull request #15 from Hopding/FixStreamParsing
Browse files Browse the repository at this point in the history
Fix stream parsing
  • Loading branch information
Hopding authored Jul 7, 2018
2 parents 48692ec + 09f3ab5 commit d06c6d5
Show file tree
Hide file tree
Showing 64 changed files with 524 additions and 199 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ API documentation is available [here](https://github.com/Hopding/pdf-lib/tree/ma
* [`pdfmake`](https://github.com/bpampuch/pdfmake) is a PDF generation library for the browser.
* [`hummus`](https://github.com/galkahana/HummusJS) is a PDF generation and modification library for Node environments. `hummus` is a Node wrapper around a [C++ library](https://github.com/galkahana/PDF-Writer).
* [`react-native-pdf-lib`](https://github.com/Hopding/react-native-pdf-lib) is a PDF generation and modification library for React Native environments. `react-native-pdf-lib` is a wrapper around [C++](https://github.com/galkahana/PDF-Writer) and [Java](https://github.com/TomRoush/PdfBox-Android) libraries.
* [`pdfassembler`](https://github.com/DevelopingMagic/pdfassembler) is a PDF generation and modification library for Node and the browser. It requires some knowledge about the logical structure of PDF document to use.

## License
[MIT](https://choosealicense.com/licenses/mit/)
1 change: 1 addition & 0 deletions __integration_tests__/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export interface ITestAssets {
with_update_sections: Uint8Array;
linearized_with_object_streams: Uint8Array;
with_large_page_count: Uint8Array;
with_missing_endstream_eol_and_polluted_ctm: Uint8Array;
};
}

Expand Down
3 changes: 3 additions & 0 deletions __integration_tests__/runners/node/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ const assets = {
with_update_sections: readPdf('fd/form/F1040V.pdf'),
linearized_with_object_streams: readPdf('ef/inst/ef_ins_1040.pdf'),
with_large_page_count: fs.readFileSync('pdf_specification.pdf'),
with_missing_endstream_eol_and_polluted_ctm: fs.readFileSync(
'test-pdfs/receipt.pdf',
),
},
};

Expand Down
1 change: 1 addition & 0 deletions __integration_tests__/tests/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ export { default as test3 } from './test3';
export { default as test4 } from './test4';
export { default as test5 } from './test5';
export { default as test6 } from './test6';
export { default as test7 } from './test7';
2 changes: 1 addition & 1 deletion __integration_tests__/tests/test1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ const makeUpperRightQuadrant = (size: number) => [
// with text.
...drawCircle(0.5 * size, 0.5 * size, 250, 350),
...drawCircle(0.5 * size, 0.5 * size, 100, 150),
W.asterisk.operator,
W!.asterisk!.operator,
n.operator,

// Create a text object.
Expand Down
123 changes: 123 additions & 0 deletions __integration_tests__/tests/test7.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import faker from 'faker';
import fs from 'fs';
import _ from 'lodash';

import {
clip,
clipEvenOdd,
closePath,
dashPattern,
drawCircle,
drawEllipse,
drawImage,
drawLinesOfText,
drawRectangle,
drawSquare,
drawText,
endPath,
fillingRgbColor,
fontAndSize,
lineCap,
lineHeight,
lineJoin,
lineTo,
moveTo,
nextLine,
PDFArray,
PDFContentStream,
PDFDictionary,
PDFDocument,
PDFDocumentFactory,
PDFDocumentWriter,
PDFIndirectReference,
PDFName,
PDFPage,
PDFTextObject,
popGraphicsState,
pushGraphicsState,
scale,
Standard14Fonts,
strokingRgbColor,
text,
textRenderingMode,
translate,
} from '../../src';

import { PDFOperators } from '../../src';

const { cm } = PDFOperators;

import { ITest, ITestAssets, ITestKernel } from '../models';

const makeOverlayContentStream = (
pdfDoc: PDFDocument,
marioDims: { width: number; height: number },
) =>
pdfDoc.createContentStream(
...drawImage('Mario', {
x: 200,
y: 375,
width: marioDims.width * 0.15,
height: marioDims.height * 0.15,
}),
...drawRectangle({
x: 120,
y: 265,
width: 400,
height: 90,
colorRgb: [253 / 255, 246 / 255, 227 / 255],
borderWidth: 3,
borderColorRgb: [101 / 255, 123 / 255, 131 / 255],
}),
...drawLinesOfText(
[
'This is an image of Mario running.',
'This image and text was drawn on',
'top of an existing PDF using pdf-lib!',
],
{
x: 125,
y: 325,
colorRgb: [101 / 255, 123 / 255, 131 / 255],
font: 'Ubuntu',
size: 24,
},
),
);

// Define the test kernel using the above content stream functions.
const kernel: ITestKernel = (assets: ITestAssets) => {
const pdfDoc = PDFDocumentFactory.load(
assets.pdfs.with_missing_endstream_eol_and_polluted_ctm,
);

const [FontTimesRoman] = pdfDoc.embedStandardFont('Times-Roman');
const [FontUbuntu] = pdfDoc.embedFont(assets.fonts.ttf.ubuntu_r);
const [PngMario, marioDims] = pdfDoc.embedPNG(assets.images.png.small_mario);

const pages = pdfDoc.getPages();

const overlayContentStreamRef = pdfDoc.register(
makeOverlayContentStream(pdfDoc, marioDims),
);

pages[0]
.addFontDictionary('Times-Roman', FontTimesRoman)
.addFontDictionary('Ubuntu', FontUbuntu)
.addXObject('Mario', PngMario)
.addContentStreams(overlayContentStreamRef);

return PDFDocumentWriter.saveToBytes(pdfDoc);
};

export default {
kernel,
title: 'PDF with Missing "endstream" EOL-Marker and Modified CTM Test',
description:
'This tests that PDFs with missing EOL markers before their "endstream" keywords and a modified CTM can be parsed and modified with the default CTM.\nhttps://github.com/Hopding/pdf-lib/issues/12',
checklist: [
'the background of the PDF is a WaveOC USA, Inc. refund receipt.',
'an image of Mario running is drawn on top of the receipt.',
'a box with solarized text is drawn underneath Mario.',
],
};
4 changes: 3 additions & 1 deletion __tests__/core/pdf-parser/PDFParser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,14 @@ describe(`PDFParser`, () => {
expect(res.dictionaries).toHaveLength(14);
expect(res.updates).toHaveLength(1);
expect(res).toEqual({
maxObjectNumber: 5,
catalog: expect.any(PDFCatalog),
arrays: expect.any(Array),
dictionaries: expect.any(Array),
original: {
header: expect.any(PDFHeader),
body: expect.any(Map),
linearization: null,
linearization: undefined,
xRefTable: expect.any(PDFXRef.Table),
trailer: expect.any(PDFTrailer),
},
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseArray.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ describe(`parseArray`, () => {
expect(res[1]).toEqual(typedArrayFor('[3 4]'));
});

it(`returns null when the leading input is not a PDF Array`, () => {
it(`returns undefined when the leading input is not a PDF Array`, () => {
const input = typedArrayFor('1 2 [3 4]');
const res = parseArray(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseArray" parseHandler with the parsed PDFArray object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseBool.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ describe(`parseBool`, () => {
expect(res[1]).toEqual(typedArrayFor('FOOBAR'));
});

it(`returns null when leading input is not a PDFBoolean`, () => {
it(`returns undefined when leading input is not a PDFBoolean`, () => {
const input = typedArrayFor('FOOBARtrue');
const res = parseBool(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseBool" parse handler with the parsed PDFBoolean object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseDict.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ describe(`parseDict`, () => {
expect(res[1]).toEqual(typedArrayFor('<< /Qux (Baz) >>'));
});

it(`returns null when the leading input is not a PDF Dictionary`, () => {
it(`returns undefined when the leading input is not a PDF Dictionary`, () => {
const input = typedArrayFor('[(foo)] << /Qux /Baz >>');
const res = parseDict(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseDict" parseHandler with the parsed PDFArray object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseDictOrStream.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ describe(`parseDictOrStream`, () => {
);
});

it(`returns null when leading input is not a PDF Dictionary or a PDF Stream`, () => {
it(`returns undefined when leading input is not a PDF Dictionary or a PDF Stream`, () => {
const input = typedArrayFor('(Foo)<< /Stuff /AndThings >>');
const res = parseDictOrStream(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseDict" parseHandler with the parsing a PDFDictionary object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseHeader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ describe(`parseHeader`, () => {
expect(res[1]).toEqual(typedArrayFor('%PDF-1.3'));
});

it(`returns null when leading input is not a PDF Header`, () => {
it(`returns undefined when leading input is not a PDF Header`, () => {
const input = typedArrayFor('(%PDF-1.3)');
const res = parseHeader(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseHeader" parseHandler with the parsed PDFHeader object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseHexString.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ describe(`parseHexString`, () => {
expect(res[1]).toEqual(typedArrayFor('<AND STUFF>'));
});

it(`returns null when leading input is not a PDF Hex String`, () => {
it(`returns undefined when leading input is not a PDF Hex String`, () => {
const input = typedArrayFor('(FOOBAR)<ABD123>');
const res = parseHexString(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseHexString" parseHandler with the parsed PDFHexString object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseIndirectObj.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ describe(`parseIndirectObj`, () => {
expect(res[1]).toEqual(typedArrayFor(`0 2 obj\n<< /Foo /Bar >>\nendobj`));
});

it(`returns null when leading input is not a PDF Indirect Object`, () => {
it(`returns undefined when leading input is not a PDF Indirect Object`, () => {
const input = typedArrayFor(`(foobar)0 1 obj\n[/Foo]\nendobj`);
const res = parseIndirectObj(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseIndirectObj" parseHandler with the parsed PDFArray object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseIndirectRef.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ describe(`parseIndirectRef`, () => {
expect(res[1]).toEqual(typedArrayFor(`1 1 R`));
});

it(`returns null when the leading input is not a PDF Indirect Reference`, () => {
it(`returns undefined when the leading input is not a PDF Indirect Reference`, () => {
const input = typedArrayFor(`(foo)1 1 R`);
const res = parseIndirectRef(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseIndirectRef" parseHandler with the parsed PDFIndirectReference object`, () => {
Expand Down
10 changes: 5 additions & 5 deletions __tests__/core/pdf-parser/parseLinearization.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,15 @@ describe(`parseLinearization`, () => {
expect(res[0]).toEqual({
paramDict: expect.any(PDFIndirectObject),
xref: expect.any(PDFIndirectObject),
trailer: null,
trailer: undefined,
});
expect(res[0].paramDict.pdfObject).toEqual(expect.any(PDFDictionary));
});

it(`returns null when the leading input is not a PDF linearization`, () => {
it(`returns undefined when the leading input is not a PDF linearization`, () => {
const input = typedArrayFor(`(I'm a string!)`);
const res = parseLinearization(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseLinearization" parseHandler with the parsed IPDFLinearization object`, () => {
Expand All @@ -132,10 +132,10 @@ describe(`parseLinearization`, () => {
});
});

it(`returns null if the leading indirect object is not a Linearization Param Dictionary`, () => {
it(`returns undefined if the leading indirect object is not a Linearization Param Dictionary`, () => {
const input = typedArrayFor(`1 0 obj\n<< /Type /Catalog >>\nendobj`);
const res = parseLinearization(input, PDFObjectIndex.create());
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`throws an error if a Linearization Param Dictionary is found, but no xref table or stream is found.`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseName.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ describe(`parseName`, () => {
expect(res).toEqual([PDFName.from('Foo'), typedArrayFor('/Bar')]);
});

it(`returns null when the leading input is not a PDF Name`, () => {
it(`returns undefined when the leading input is not a PDF Name`, () => {
const input = typedArrayFor('(Foo)/Bar');
const res = parseName(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseName" parseHandler with the parsed PDFName object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseNull.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ describe(`parseNull`, () => {
expect(res).toEqual([PDFNull.instance, typedArrayFor('null(foo)')]);
});

it(`returns null when the leading input is not a PDF Null`, () => {
it(`returns undefined when the leading input is not a PDF Null`, () => {
const input = typedArrayFor('(foo)null');
const res = parseNull(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseNull" parseHandler with the parsed PDFNull object`, () => {
Expand Down
4 changes: 2 additions & 2 deletions __tests__/core/pdf-parser/parseNumber.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ describe(`parseNumber`, () => {
expect(res[1]).toEqual(typedArrayFor('.123'));
});

it(`returns null when the leading input is not a PDF Number`, () => {
it(`returns undefined when the leading input is not a PDF Number`, () => {
const input = typedArrayFor('(123)123');
const res = parseNumber(input);
expect(res).toBeNull();
expect(res).toBeUndefined();
});

it(`invokes the "onParseNumber" parseHandler with the parsed PDFNumber object`, () => {
Expand Down
Loading

0 comments on commit d06c6d5

Please sign in to comment.