Working (only tested Split yet but im tired)

This commit is contained in:
Felix Kaspar
2023-11-14 23:14:08 +01:00
parent d7feec32dd
commit 85d61fddf0
12 changed files with 164 additions and 136 deletions

View File

@@ -6,7 +6,7 @@ import { Image } from 'image-js';
import { getImagesOnPage } from "./getImagesOnPage.js";
export async function detectEmptyPages(file: PdfFile, whiteThreashold: number): Promise<number[]> {
const pdfDoc = await file.pdfjsDocuemnt;
const pdfDoc = await file.pdfjsDocument;
const emptyPages: number[] = [];
for (let i = 1; i <= pdfDoc.numPages; i++) {

View File

@@ -1,6 +1,6 @@
import { PDFDocument } from 'pdf-lib';
import { PdfFile } from '../wrappers/PdfFile';
import { PdfFile, RepresentationType } from '../wrappers/PdfFile';
export type MergeParamsType = {
files: PdfFile[];
@@ -15,5 +15,5 @@ export async function mergePDFs(params: MergeParamsType): Promise<PdfFile> {
copiedPages.forEach((page) => mergedPdf.addPage(page));
}
return new PdfFile("mergedPDF", mergedPdf);
return new PdfFile("mergedPDF", mergedPdf, RepresentationType.PDFLibDocument);
};

View File

@@ -16,6 +16,8 @@ export async function splitOn(params: SplitOnParamsType) {
const { file, type, whiteThreashold } = params;
let splitAtPages: number[] = [];
console.log("File: ", file);
switch (type) {
case "BAR_CODE":
@@ -36,6 +38,8 @@ export async function splitOn(params: SplitOnParamsType) {
console.log("Split At Pages: ", splitAtPages);
console.log("File: ", file);
// Remove detected Pages & Split
const pdfDoc = await file.pdflibDocument;
const numberOfPages = pdfDoc.getPageCount();
@@ -66,7 +70,9 @@ export async function splitOn(params: SplitOnParamsType) {
return subDocuments;
async function getPagesWithQRCode(file: PdfFile) {
const pdfDoc = await file.pdfjsDocuemnt;
console.log("FileInQRPrev: ", file);
const pdfDoc = await file.pdfjsDocument;
console.log("FileInQRAfter: ", file);
const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) {
@@ -74,7 +80,7 @@ export async function splitOn(params: SplitOnParamsType) {
const page = await pdfDoc.getPage(i + 1);
const images = await getImagesOnPage(page);
console.log("images:", images);
// console.log("images:", images);
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {

View File

@@ -1,6 +1,6 @@
import { PDFDocument } from 'pdf-lib';
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile.js';
import { PdfFile, RepresentationType } from '../wrappers/PdfFile.js';
import { detectEmptyPages } from "./common/detectEmptyPages.js";
@@ -21,12 +21,11 @@ export async function sortPagesWithPreset(params: SortPagesWithPresetParamsType)
throw new Error("Operation not supported");
}
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pdflibDocument = await file.pdflibDocument;
const pageCount = byteFile.pdfLib.getPageCount();
const pageCount = pdflibDocument.getPageCount();
const sortIndecies = sortFunction(pageCount);
return selectPages({file:byteFile, pagesToExtractArray:sortIndecies});
return selectPages({file: file, pagesToExtractArray: sortIndecies});
}
export type RearrangePagesParamsType = {
@@ -37,11 +36,10 @@ export type RearrangePagesParamsType = {
export async function rearrangePages(params: RearrangePagesParamsType): Promise<PdfFile> {
const { file, fancyPageSelector } = params;
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pdflibDocument = await file.pdflibDocument;
const pagesToExtractArray = parseFancyPageSelector(fancyPageSelector, byteFile.pdfLib.getPageCount());
const newDocument = selectPages({file:byteFile, pagesToExtractArray});
const pagesToExtractArray = parseFancyPageSelector(fancyPageSelector, pdflibDocument.getPageCount());
const newDocument = selectPages({file: file, pagesToExtractArray});
return newDocument;
};
@@ -52,23 +50,22 @@ export type SelectPagesParamsType = {
export async function selectPages(params: SelectPagesParamsType): Promise<PdfFile> {
const { file, pagesToExtractArray } = params;
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pdflibDocument = await file.pdflibDocument;
const subDocument = await PDFDocument.create();
// Check that array max number is not larger pdf pages number
if(Math.max(...pagesToExtractArray) >= byteFile.pdfLib.getPageCount()) {
throw new Error(`The PDF document only has ${byteFile.pdfLib.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
if(Math.max(...pagesToExtractArray) >= pdflibDocument.getPageCount()) {
throw new Error(`The PDF document only has ${pdflibDocument.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
}
const copiedPages = await subDocument.copyPages(byteFile.pdfLib, pagesToExtractArray);
const copiedPages = await subDocument.copyPages(pdflibDocument, pagesToExtractArray);
for (let i = 0; i < copiedPages.length; i++) {
subDocument.addPage(copiedPages[i]);
}
return fromPdfLib(subDocument, file.filename);
return new PdfFile(file.originalFilename, subDocument, RepresentationType.PDFLibDocument, file.filename);
}
export type RemovePagesParamsType = {
@@ -78,11 +75,10 @@ export type RemovePagesParamsType = {
export async function removePages(params: RemovePagesParamsType): Promise<PdfFile> {
const { file, pagesToRemoveArray } = params;
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pdflibDocument = await file.pdflibDocument;
const pagesToExtractArray = invertSelection(pagesToRemoveArray, byteFile.pdfLib.getPageIndices())
return selectPages({file:byteFile, pagesToExtractArray});
const pagesToExtractArray = invertSelection(pagesToRemoveArray, pdflibDocument.getPageIndices())
return selectPages({file: file, pagesToExtractArray});
}
export type RemoveBlankPagesParamsType = {

View File

@@ -1,5 +1,5 @@
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile';
import { PdfFile } from '../wrappers/PdfFile';
export type UpdateMetadataParams = {
file: PdfFile,
@@ -17,7 +17,7 @@ export type UpdateMetadataParams = {
}
export async function updateMetadata(params: UpdateMetadataParams): Promise<PdfFile> {
const pdfDoc = await params.file.getAsPdfLib();
const pdfDoc = await params.file.pdflibDocument;
if (params.deleteAll) {
pdfDoc.setAuthor("");
@@ -49,5 +49,5 @@ export async function updateMetadata(params: UpdateMetadataParams): Promise<PdfF
// TODO add trapped and custom metadata. May need another library
return fromPdfLib(pdfDoc, params.file.filename);
return params.file;
};

View File

@@ -9,8 +9,10 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[
yield* nextOperation(operations, input);
return results;
async function * nextOperation(actions: Action[], input: PdfFile[] | PdfFile): AsyncGenerator<string, void, void> {
if(Array.isArray(actions) && actions.length == 0) { // isEmpty
async function * nextOperation(actions: Action[] | undefined, input: PdfFile[] | PdfFile): AsyncGenerator<string, void, void> {
console.log("Next Operation");
if(actions === undefined || (Array.isArray(actions) && actions.length == 0)) { // isEmpty
console.log("Last Operation");
if(Array.isArray(input)) {
console.log("operation done: " + input[0].filename + (input.length > 1 ? "+" : ""));
results = results.concat(input);
@@ -24,11 +26,12 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[
}
for (let i = 0; i < actions.length; i++) {
yield* computeOperation(actions[i], structuredClone(input));
yield* computeOperation(actions[i], input); // TODO: structuredClone doesn't work in ts need to find another solution to pass by value.
}
}
async function * computeOperation(action: Action, input: PdfFile|PdfFile[]): AsyncGenerator<string, void, void> {
yield "Starting: " + action.type;
switch (action.type) {
case "done": // Skip this, because it is a valid node.
@@ -132,9 +135,7 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[
const input = Array.isArray(inputs) ? inputs : [inputs]; // Convert single values to array, keep arrays as is.
const newInputs = await callback(input);
if (action.actions) {
yield* nextOperation(action.actions, newInputs);
}
yield* nextOperation(action.actions, newInputs);
}
/**
@@ -149,15 +150,11 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[
for (let i = 0; i < input.length; i++) {
output = output.concat(await callback(input[i]));
}
if (action.actions) {
yield* nextOperation(action.actions, output);
}
yield* nextOperation(action.actions, output);
}
else {
const nextInput = await callback(input);
if (action.actions) {
yield* nextOperation(action.actions, nextInput);
}
yield* nextOperation(action.actions, nextInput);
}
}
@@ -167,15 +164,11 @@ export async function * traverseOperations(operations: Action[], input: PdfFile[
for (let i = 0; i < input.length; i++) {
nextInputs.concat(await callback(input[i]));
}
if (action.actions) {
yield* nextOperation(action.actions, nextInputs);
}
yield* nextOperation(action.actions, nextInputs);
}
else {
const nextInput = await callback(input);
if (action.actions) {
yield* nextOperation(action.actions, nextInput);
}
yield* nextOperation(action.actions, nextInput);
}
}
}

View File

@@ -1,75 +1,100 @@
import * as PDFJS from 'pdfjs-dist';
import { PDFDocumentProxy as PDFJSDocument } from 'pdfjs-dist/types/src/display/api';
import type { PDFDocumentProxy as PDFJSDocument } from 'pdfjs-dist/types/src/display/api';
import { PDFDocument as PDFLibDocument } from 'pdf-lib';
import Joi from 'joi';
export enum RepresentationType {
Uint8Array,
PDFLibDocument,
PDFJSDocument
}
export class PdfFile {
private representation: Uint8Array | PDFLibDocument | PDFJSDocument;
private representationType: RepresentationType;
originalFilename: string;
filename: string;
get uint8Array() : Promise<Uint8Array> {
switch (this.representation.constructor) {
case Uint8Array:
switch (this.representationType) {
case RepresentationType.Uint8Array:
return new Promise((resolve, reject) => {
resolve(this.representation as Uint8Array);
});
case PDFLibDocument:
return (this.representation as PDFLibDocument).save();
case PDFJSDocument:
return (this.representation as PDFJSDocument).getData();
case RepresentationType.PDFLibDocument:
return new Promise(async (resolve, reject) => {
var uint8Array = await (this.representation as PDFLibDocument).save();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
case RepresentationType.PDFJSDocument:
return new Promise(async (resolve, reject) => {
var uint8Array = await (this.representation as PDFJSDocument).getData();
this.uint8Array = uint8Array;
resolve(uint8Array);
});
default:
console.error("unhandeled PDF type: " + typeof this.representation as string);
throw Error("unhandeled PDF type");
}
}
set uint8Array(value: Uint8Array) {
this.representation = value;
this.representationType = RepresentationType.Uint8Array;
}
get pdflibDocument() : Promise<PDFLibDocument> {
switch (this.representation.constructor) {
case PDFLibDocument: // PDFLib
switch (this.representationType) {
case RepresentationType.PDFLibDocument:
return new Promise((resolve, reject) => {
resolve(this.representation as PDFLibDocument);
});
default:
return new Promise(async (resolve, reject) => {
resolve(PDFLibDocument.load(await this.uint8Array, {
var uint8Array = await this.uint8Array;
var pdfLibDoc = await PDFLibDocument.load(uint8Array, {
updateMetadata: false,
}));
});
this.pdflibDocument = pdfLibDoc;
resolve(pdfLibDoc);
});
}
}
set pdflibDocument(value: PDFLibDocument) {
this.representation = value;
this.representationType = RepresentationType.PDFLibDocument;
}
get pdfjsDocuemnt() : Promise<PDFJSDocument> {
switch (this.representation.constructor) {
case PDFJSDocument:
get pdfjsDocument() : Promise<PDFJSDocument> {
switch (this.representationType) {
case RepresentationType.PDFJSDocument:
return new Promise((resolve, reject) => {
resolve(this.representation as PDFJSDocument);
});
default:
return new Promise(async (resolve, reject) => {
resolve(await PDFJS.getDocument(await this.uint8Array).promise);
const pdfjsDoc = await PDFJS.getDocument(await this.uint8Array).promise;
this.pdfjsDocument = pdfjsDoc;
resolve(pdfjsDoc);
});
}
}
set pdfjsDocuemnt(value: PDFJSDocument) {
set pdfjsDocument(value: PDFJSDocument) {
this.representation = value;
this.representationType = RepresentationType.PDFJSDocument;
}
constructor(originalFilename: string, representation: Uint8Array | PDFLibDocument | PDFJSDocument, filename?: string) {
constructor(originalFilename: string, representation: Uint8Array | PDFLibDocument | PDFJSDocument, representationType: RepresentationType, filename?: string) {
this.originalFilename = originalFilename;
this.filename = filename ? filename : originalFilename;
this.representation = representation;
this.representationType = representationType;
}
static fromMulterFile(value: Express.Multer.File): PdfFile {
return new PdfFile(value.originalname, value.buffer as Uint8Array)
return new PdfFile(value.originalname, value.buffer as Uint8Array, RepresentationType.Uint8Array);
}
static fromMulterFiles(values: Express.Multer.File[]): PdfFile[] {
return values.map(v => PdfFile.fromMulterFile(v));