Merge branch 'version-2' of https://github.com/Frooodle/Stirling-PDF into version-2

This commit is contained in:
Saud Fatayerji
2023-11-13 02:52:01 +03:00
31 changed files with 99 additions and 129 deletions

View File

@@ -0,0 +1,51 @@
import { PdfFile } from '../../wrappers/PdfFile';
import { PDFPageProxy } from "pdfjs-dist/types/src/display/api.js";
import { Image } from 'image-js';
import { getImagesOnPage } from "./getImagesOnPage.js";
export async function detectEmptyPages(file: PdfFile, whiteThreashold: number): Promise<number[]> {
const pdfDoc = await file.getAsPdfJs();
const emptyPages: number[] = [];
for (let i = 1; i <= pdfDoc.numPages; i++) {
const page = await pdfDoc.getPage(i);
console.log("Checking page " + i);
if(!await hasText(page)) {
console.log(`Found text on Page ${i}, page is not empty`);
continue;
}
if(!await areImagesBlank(page, whiteThreashold)) {
console.log(`Found non white image on Page ${i}, page is not empty`);
continue;
}
console.log(`Page ${i} is empty.`);
emptyPages.push(i - 1);
}
return emptyPages;
}
async function hasText(page: PDFPageProxy): Promise<boolean> {
const textContent = await page.getTextContent();
return textContent.items.length === 0;
}
async function areImagesBlank(page: PDFPageProxy, threshold: number): Promise<boolean> {
const images = await getImagesOnPage(page);
for (const image of images) {
if(!await isImageBlank(image, threshold))
return false;
}
return true;
}
async function isImageBlank(image: string | Uint8Array | ArrayBuffer, threshold: number): Promise<boolean> {
var img = await Image.load(image);
var grey = img.grey();
var mean = grey.getMean();
return mean[0] <= threshold;
}

View File

@@ -0,0 +1,15 @@
import { PDFPageProxy } from "pdfjs-dist/types/src/display/api.js";
import * as PDFJS from 'pdfjs-dist';
export async function getImagesOnPage(page: PDFPageProxy) {
const ops = await page.getOperatorList();
const images: any = [];
for (var j=0; j < ops.fnArray.length; j++) {
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
const image = page.objs.get(ops.argsArray[j][0]);
images.push(image);
}
}
return images;
}

View File

@@ -0,0 +1,47 @@
import { PdfFile, convertAllToPdfLibFile } from '../../wrappers/PdfFile';
export async function sortPdfs(
files: PdfFile[],
sortType: "orderProvided"|"byFileName"|"byDateModified"|"byDateCreated"|"byPDFTitle" = "orderProvided"
): Promise<PdfFile[]> {
const pdfLibFiles = await convertAllToPdfLibFile(files);
switch(sortType) {
case "byFileName":
pdfLibFiles.sort((a, b) => {
if (!a || !b) return 0;
const ad = a.filename, bd = b.filename;
if (!ad || !bd) return 0;
return ad.localeCompare(bd);
});
break;
case "byDateModified":
pdfLibFiles.sort((a, b) => {
const ad = a.pdfLib?.getModificationDate()?.getTime();
const bd = b.pdfLib?.getModificationDate()?.getTime();
if (!ad || !bd) return 0;
return ad > bd ? 1 : -1
});
break;
case "byDateCreated":
pdfLibFiles.sort((a, b) => {
const ad = a.pdfLib?.getCreationDate()?.getTime();
const bd = b.pdfLib?.getCreationDate()?.getTime();
if (!ad || !bd) return 0;
return ad > bd ? 1 : -1
});
break;
case "byPDFTitle":
pdfLibFiles.sort((a, b) => {
const ad = a.pdfLib?.getTitle();
const bd = b.pdfLib?.getTitle();
if (!ad || !bd) return 0;
return ad.localeCompare(bd);
});
break;
}
return pdfLibFiles;
}

View File

@@ -0,0 +1,12 @@
export async function impose(snapshot: any, nup: number, format: string, pdfcpuWrapper: any) {
return await pdfcpuWrapper.oneToOne([
"pdfcpu.wasm",
"nup",
"-c",
"disable",
'f:' + format,
"/output.pdf",
String(nup),
"input.pdf",
], snapshot);
}

View File

@@ -0,0 +1,18 @@
import { PDFDocument } from 'pdf-lib';
import { PdfFile, convertAllToPdfLibFile, fromPdfLib } from '../wrappers/PdfFile';
export async function mergePDFs(files: PdfFile[]): Promise<PdfFile> {
const pdfLibFiles = await convertAllToPdfLibFile(files);
const mergedPdf = await PDFDocument.create();
for (let i = 0; i < pdfLibFiles.length; i++) {
const pdfToMerge = await pdfLibFiles[i].getAsPdfLib();
const copiedPages = await mergedPdf.copyPages(pdfToMerge, pdfToMerge.getPageIndices());
copiedPages.forEach((page) => mergedPdf.addPage(page));
}
return fromPdfLib(mergedPdf, files[0].filename);
};

View File

@@ -0,0 +1,26 @@
import { degrees } from 'pdf-lib';
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile';
export async function rotatePages(file: PdfFile, rotation: number|number[]): Promise<PdfFile> {
const pdfDoc = await file.getAsPdfLib();
const pages = pdfDoc.getPages();
if (Array.isArray(rotation)) {
if (rotation.length != pages.length) {
throw new Error(`Number of given rotations '${rotation.length}' is not the same as the number of pages '${pages.length}'`)
}
for (let i=0; i<rotation.length; i++) {
const oldRotation = pages[i].getRotation().angle
pages[i].setRotation(degrees(oldRotation + rotation[i]))
}
} else {
pages.forEach(page => {
// Change page size
const oldRotation = page.getRotation().angle
page.setRotation(degrees(oldRotation + rotation))
});
}
return fromPdfLib(pdfDoc, file.filename);
};

View File

@@ -0,0 +1,36 @@
import { PDFPage } from 'pdf-lib';
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile';
export async function scaleContent(file: PdfFile, scaleFactor: number|number[]): Promise<PdfFile> {
const pdfDoc = await file.getAsPdfLib();
const pages = pdfDoc.getPages();
if (Array.isArray(scaleFactor)) {
if (scaleFactor.length != pages.length) {
throw new Error(`Number of given scale factors '${scaleFactor.length}' is not the same as the number of pages '${pages.length}'`)
}
for (let i=0; i<scaleFactor.length; i++) {
scalePage(pages[i], scaleFactor[i]);
}
} else {
pages.forEach(page => scalePage(page, scaleFactor));
}
return fromPdfLib(pdfDoc, file.filename);
};
function scalePage(page: PDFPage, scaleFactor: number) {
const width = page.getWidth();
const height = page.getHeight();
// Scale content
page.scaleContent(scaleFactor, scaleFactor);
const scaled_diff = {
width: Math.round(width - scaleFactor * width),
height: Math.round(height - scaleFactor * height),
};
// Center content in new page format
page.translateContent(Math.round(scaled_diff.width / 2), Math.round(scaled_diff.height / 2));
}

View File

@@ -0,0 +1,56 @@
import { PDFPage } from 'pdf-lib';
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile';
export async function scalePage(file: PdfFile, pageSize: {width?:number,height?:number}|{width?:number,height?:number}[]): Promise<PdfFile> {
const pdfDoc = await file.getAsPdfLib();
const pages = pdfDoc.getPages();
if (Array.isArray(pageSize)) {
if (pageSize.length != pages.length) {
throw new Error(`Number of given sizes '${pageSize.length}' is not the same as the number of pages '${pages.length}'`)
}
for (let i=0; i<pageSize.length; i++) {
resize(pages[i], pageSize[i]);
}
} else {
pages.forEach(page => resize(page, pageSize));
}
return fromPdfLib(pdfDoc, file.filename);
};
function resize(page: PDFPage, newSize: {width?:number,height?:number}) {
const calculatedSize = calculateSize(page, newSize);
page.setSize(calculatedSize.width, calculatedSize.height);
const xRatio = calculatedSize.width / page.getWidth();
const yRatio = calculatedSize.height / page.getHeight();
page.scaleContent(xRatio, yRatio);
}
function calculateSize(page: PDFPage, newSize: {width?:number,height?:number}): {width:number,height:number} {
if (!newSize.width && !newSize.height){
throw new Error(`Sizes '${newSize}' cannot have null width and null height`);
} else if (!newSize.width && newSize.height) {
const oldSize = page.getSize();
const ratio = oldSize.width / oldSize.height;
return { width: newSize.height * ratio, height: newSize.height };
} else if (newSize.width && !newSize.height) {
const oldSize = page.getSize();
const ratio = oldSize.height / oldSize.width;
return { width: newSize.width, height: newSize.width * ratio };
}
return { width: newSize.width!, height: newSize.height! };
}
export const PageSize = Object.freeze({
a4: {
width: 594.96,
height: 841.92
},
letter: {
width: 612,
height: 792
}
});

View File

@@ -0,0 +1,110 @@
import jsQR from "jsqr";
import { detectEmptyPages } from "./common/detectEmptyPages.js";
import { getImagesOnPage } from "./common/getImagesOnPage.js";
import { selectPages } from "./subDocumentFunctions";
import { PdfFile } from '../wrappers/PdfFile.js';
export async function splitOn(
file: PdfFile,
type: "BAR_CODE"|"QR_CODE"|"BLANK_PAGE",
whiteThreashold: number) {
let splitAtPages: number[] = [];
switch (type) {
case "BAR_CODE":
// TODO: Implement
throw new Error("This split-type has not been implemented yet");
case "QR_CODE":
splitAtPages = await getPagesWithQRCode(file);
break;
case "BLANK_PAGE":
splitAtPages = await detectEmptyPages(file, whiteThreashold);
break;
default:
throw new Error("An invalid split-type was provided.");
}
console.log("Split At Pages: ", splitAtPages);
// Remove detected Pages & Split
const pdfDoc = await file.getAsPdfLib();
const numberOfPages = pdfDoc.getPageCount();
let pagesArray: number[] = [];
let splitAfter = splitAtPages.shift();
const subDocuments: PdfFile[] = [];
for (let i = 0; i < numberOfPages; i++) {
console.log(i);
if(i == splitAfter) {
if(pagesArray.length > 0) {
subDocuments.push(await selectPages(file, pagesArray));
pagesArray = [];
}
splitAfter = splitAtPages.shift();
}
else { // Skip splitAtPage
console.log("PagesArray")
pagesArray.push(i);
}
}
if(pagesArray.length > 0) {
subDocuments.push(await selectPages(file, pagesArray));
}
pagesArray = [];
return subDocuments;
async function getPagesWithQRCode(file: PdfFile) {
const pdfDoc = await file.getAsPdfJs();
const pagesWithQR: number[] = [];
for (let i = 0; i < pdfDoc.numPages; i++) {
console.log("Page:", i, "/", pdfDoc.numPages);
const page = await pdfDoc.getPage(i + 1);
const images = await getImagesOnPage(page);
console.log("images:", images);
for (const image of images) {
const data = await checkForQROnImage(image);
if(data == "https://github.com/Frooodle/Stirling-PDF") {
pagesWithQR.push(i);
}
}
}
if(pagesWithQR.length == 0) {
console.warn("Could not find any QR Codes in the provided PDF.")
}
return pagesWithQR;
}
async function checkForQROnImage(image: any) {
// TODO: There is an issue with the jsQR package (The package expects rgba but sometimes we have rgb), and the package seems to be stale, we could create a fork and fix the issue. In the meanwhile we just force rgba:
// Check for rgb and convert to rgba
if(image.data.length == image.width * image.height * 3) {
const tmpArray = new Uint8ClampedArray(image.width * image.height * 4);
// Iterate through the original array and add an alpha channel
for (let i = 0, j = 0; i < image.data.length; i += 3, j += 4) {
tmpArray[j] = image.data[i]; // Red channel
tmpArray[j + 1] = image.data[i + 1]; // Green channel
tmpArray[j + 2] = image.data[i + 2]; // Blue channel
tmpArray[j + 3] = 255; // Alpha channel (fully opaque)
}
image.data = tmpArray;
}
const code = jsQR(image.data, image.width, image.height);
if(code)
return code.data;
else
return null;
}
};

View File

@@ -0,0 +1,27 @@
import { selectPages } from "./subDocumentFunctions";
import { PdfFile } from '../wrappers/PdfFile';
export async function splitPDF(file: PdfFile, splitAfterPageArray: number[]): Promise<PdfFile[]> {
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return [];
const numberOfPages = byteFile.pdfLib.getPages().length;
let pagesArray: number[] = [];
let splitAfter = splitAfterPageArray.shift();
const subDocuments: PdfFile[] = [];
for (let i = 0; i < numberOfPages; i++) {
if(splitAfter && i > splitAfter && pagesArray.length > 0) {
subDocuments.push(await selectPages(byteFile, pagesArray));
splitAfter = splitAfterPageArray.shift();
pagesArray = [];
}
pagesArray.push(i);
}
subDocuments.push(await selectPages(byteFile, pagesArray));
pagesArray = [];
return subDocuments;
};

View File

@@ -0,0 +1,227 @@
import { PDFDocument } from 'pdf-lib';
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile.js';
import { detectEmptyPages } from "./common/detectEmptyPages.js";
export async function sortPagesWithPreset(file: PdfFile, sortPreset: string, fancyPageSelector: string) {
if (sortPreset === "CUSTOM_PAGE_ORDER") {
return rearrangePages(file, fancyPageSelector);
}
const sortFunction = sorts[sortPreset];
if (!sortFunction) {
throw new Error("Operation not supported");
}
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pageCount = byteFile.pdfLib.getPageCount();
const sortIndecies = sortFunction(pageCount);
return selectPages(byteFile, sortIndecies);
}
export async function rearrangePages(file: PdfFile, fancyPageSelector: string): Promise<PdfFile> {
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pagesToExtractArray = parseFancyPageSelector(fancyPageSelector, byteFile.pdfLib.getPageCount());
const newDocument = selectPages(byteFile, pagesToExtractArray);
return newDocument;
};
export async function selectPages(file: PdfFile, pagesToExtractArray: number[]): Promise<PdfFile> {
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const subDocument = await PDFDocument.create();
// Check that array max number is not larger pdf pages number
if(Math.max(...pagesToExtractArray) >= byteFile.pdfLib.getPageCount()) {
throw new Error(`The PDF document only has ${byteFile.pdfLib.getPageCount()} pages and you tried to extract page ${Math.max(...pagesToExtractArray)}`);
}
const copiedPages = await subDocument.copyPages(byteFile.pdfLib, pagesToExtractArray);
for (let i = 0; i < copiedPages.length; i++) {
subDocument.addPage(copiedPages[i]);
}
return fromPdfLib(subDocument, file.filename);
}
export async function removePages(file: PdfFile, pagesToRemoveArray: number[]): Promise<PdfFile> {
const byteFile = await file.convertToPdfLibFile();
if (!byteFile?.pdfLib) return byteFile;
const pagesToExtractArray = invertSelection(pagesToRemoveArray, byteFile.pdfLib.getPageIndices())
return selectPages(byteFile, pagesToExtractArray);
}
export async function removeBlankPages(file: PdfFile, whiteThreashold: number) {
const emptyPages = await detectEmptyPages(file, whiteThreashold);
console.log("Empty Pages: ", emptyPages);
return removePages(file, emptyPages);
}
/**
* Parse the page selector string used in the 'PDF Page Organizer'
* @param pageOrderArr
* @param totalPages
* @returns
*/
function parseFancyPageSelector(pageNumbers: string, totalPages: number): number[] {
// Translated to JS from the original Java function
const pageOrderArr = pageNumbers.split(",")
const newPageOrder: number[] = [];
// loop through the page order array
pageOrderArr.forEach(element => {
if (element.toLocaleLowerCase() === "all") {
for (var i = 0; i < totalPages; i++) {
newPageOrder.push(i);
}
// As all pages are already added, no need to check further
return;
}
else if (element.match("\\d*n\\+?-?\\d*|\\d*\\+?n")) {
// Handle page order as a function
var coefficient = 0;
var constant = 0;
var coefficientExists = false;
var constantExists = false;
if (element.includes("n")) {
var parts = element.split("n");
if (!parts[0]) {
coefficient = parseInt(parts[0]);
coefficientExists = true;
}
if (parts.length > 1 && parts[1]) {
constant = parseInt(parts[1]);
constantExists = true;
}
} else if (element.includes("+")) {
constant = parseInt(element.replace("+", ""));
constantExists = true;
}
for (var i = 1; i <= totalPages; i++) {
var pageNum = coefficientExists ? coefficient * i : i;
pageNum += constantExists ? constant : 0;
if (pageNum <= totalPages && pageNum > 0) {
newPageOrder.push(pageNum - 1);
}
}
} else if (element.includes("-")) {
// split the range into start and end page
const range = element.split("-");
const start = parseInt(range[0]);
var end = parseInt(range[1]);
// check if the end page is greater than total pages
if (end > totalPages) {
end = totalPages;
}
// loop through the range of pages
for (var j = start; j <= end; j++) {
// print the current index
newPageOrder.push(j - 1);
}
} else {
// if the element is a single page
newPageOrder.push(parseInt(element) - 1);
}
});
return newPageOrder;
}
function invertSelection(selection: number[], pageIndecies: number[]): number[] {
const pageIndeciesCopy = [...pageIndecies];
return pageIndeciesCopy.filter(x => !selection.includes(x));
}
//////////////////
// Page Sorters //
//////////////////
function reverseSort(totalPages: number): number[] {
return [...Array(totalPages).keys()].reverse();
}
function duplexSort(totalPages: number): number[] {
// Translated to JS from the original Java function
const newPageOrder: number[] = [];
const half = Math.floor((totalPages + 1) / 2); // This ensures proper behavior with odd numbers of pages
for (let i = 1; i <= half; i++) {
newPageOrder.push(i - 1);
if (i <= totalPages - half) {
// Avoid going out of bounds
newPageOrder.push(totalPages - i);
}
}
return newPageOrder;
}
function bookletSort(totalPages: number): number[] {
const newPageOrder: number[] = [];
for (let i = 0; i < totalPages / 2; i++) {
newPageOrder.push(i);
newPageOrder.push(totalPages - i - 1);
}
return newPageOrder;
}
function sideStitchBooklet(totalPages: number): number[] {
const newPageOrder: number[] = [];
for (let i = 0; i < (totalPages + 3) / 4; i++) {
const begin = i * 4;
newPageOrder.push(Math.min(begin + 3, totalPages - 1));
newPageOrder.push(Math.min(begin, totalPages - 1));
newPageOrder.push(Math.min(begin + 1, totalPages - 1));
newPageOrder.push(Math.min(begin + 2, totalPages - 1));
}
return newPageOrder;
}
function oddEvenSplit(totalPages: number): number[] {
const newPageOrder: number[] = [];
for (let i = 1; i <= totalPages; i += 2) {
newPageOrder.push(i - 1);
}
for (let i = 2; i <= totalPages; i += 2) {
newPageOrder.push(i - 1);
}
return newPageOrder;
}
function removeFirst(totalPages: number): number[] {
return [...Array(totalPages-1).keys()].map(i => i+1);
}
function removeLast(totalPages: number): number[] {
return [...Array(totalPages-1).keys()];
}
function removeFirstAndLast(totalPages: number): number[] {
return [...Array(totalPages-2).keys()].map(i => i+1);
}
export type SortFunction = (totalPages: number) => number[];
type Sorts = {
[key: string]: SortFunction;
};
export const sorts: Sorts = Object.freeze({
"REVERSE_ORDER": reverseSort,
"DUPLEX_SORT": duplexSort,
"BOOKLET_SORT": bookletSort,
"SIDE_STITCH_BOOKLET_SORT": sideStitchBooklet,
"ODD_EVEN_SPLIT": oddEvenSplit,
"REMOVE_FIRST": removeFirst,
"REMOVE_LAST": removeLast,
"REMOVE_FIRST_AND_LAST": removeFirstAndLast,
});

View File

@@ -0,0 +1,55 @@
import { PdfFile, fromPdfLib } from '../wrappers/PdfFile';
export type Metadata = {
deleteAll?: boolean, // Delete all metadata if set to true
author?: string, // The author of the document
creationDate?: Date, // The creation date of the document (format: yyyy/MM/dd HH:mm:ss)
creator?: string, // The creator of the document
keywords?: string, // The keywords for the document
modificationDate?: Date, // The modification date of the document (format: yyyy/MM/dd HH:mm:ss)
producer?: string, // The producer of the document
subject?: string, // The subject of the document
title?: string, // The title of the document
//trapped?: string, // The trapped status of the document
//allRequestParams?: {[key: string]: [key: string]}, // Map list of key and value of custom parameters. Note these must start with customKey and customValue if they are non-standard
}
export async function updateMetadata(file: PdfFile, metadata: Metadata|null): Promise<PdfFile> {
const pdfDoc = await file.getAsPdfLib();
if (!metadata || metadata.deleteAll) {
pdfDoc.setAuthor("");
pdfDoc.setCreationDate(new Date(0))
pdfDoc.setCreator("")
pdfDoc.setKeywords([])
pdfDoc.setModificationDate(new Date(0))
pdfDoc.setProducer("")
pdfDoc.setSubject("")
pdfDoc.setTitle("")
}
if (!metadata) {
return fromPdfLib(pdfDoc, file.filename);
}
if(metadata.author)
pdfDoc.setAuthor(metadata.author);
if(metadata.creationDate)
pdfDoc.setCreationDate(metadata.creationDate)
if(metadata.creator)
pdfDoc.setCreator(metadata.creator)
if(metadata.keywords)
pdfDoc.setKeywords(metadata.keywords.split(","))
if(metadata.modificationDate)
pdfDoc.setModificationDate(metadata.modificationDate)
if(metadata.producer)
pdfDoc.setProducer(metadata.producer)
if(metadata.subject)
pdfDoc.setSubject(metadata.subject)
if(metadata.title)
pdfDoc.setTitle(metadata.title)
// TODO add trapped and custom metadata. May need another library
return fromPdfLib(pdfDoc, file.filename);
};