Started migrating workflow controller
This commit is contained in:
51
shared-operations/src/functions/common/detectEmptyPages.ts
Normal file
51
shared-operations/src/functions/common/detectEmptyPages.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
|
||||
import { PdfFile } from '../../wrappers/PdfFile';
|
||||
import { PDFPageProxy } from "pdfjs-dist/types/src/display/api.js";
|
||||
import { Image } from 'image-js';
|
||||
|
||||
import { getImagesOnPage } from "./getImagesOnPage.js";
|
||||
|
||||
export async function detectEmptyPages(file: PdfFile, whiteThreashold: number): Promise<number[]> {
|
||||
const pdfDoc = await file.getAsPdfJs();
|
||||
|
||||
const emptyPages: number[] = [];
|
||||
for (let i = 1; i <= pdfDoc.numPages; i++) {
|
||||
const page = await pdfDoc.getPage(i);
|
||||
console.log("Checking page " + i);
|
||||
|
||||
if(!await hasText(page)) {
|
||||
console.log(`Found text on Page ${i}, page is not empty`);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!await areImagesBlank(page, whiteThreashold)) {
|
||||
console.log(`Found non white image on Page ${i}, page is not empty`);
|
||||
continue;
|
||||
}
|
||||
|
||||
console.log(`Page ${i} is empty.`);
|
||||
emptyPages.push(i - 1);
|
||||
}
|
||||
return emptyPages;
|
||||
}
|
||||
|
||||
async function hasText(page: PDFPageProxy): Promise<boolean> {
|
||||
const textContent = await page.getTextContent();
|
||||
return textContent.items.length === 0;
|
||||
}
|
||||
|
||||
async function areImagesBlank(page: PDFPageProxy, threshold: number): Promise<boolean> {
|
||||
const images = await getImagesOnPage(page);
|
||||
for (const image of images) {
|
||||
if(!await isImageBlank(image, threshold))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
async function isImageBlank(image: string | Uint8Array | ArrayBuffer, threshold: number): Promise<boolean> {
|
||||
var img = await Image.load(image);
|
||||
var grey = img.grey();
|
||||
var mean = grey.getMean();
|
||||
return mean[0] <= threshold;
|
||||
}
|
||||
15
shared-operations/src/functions/common/getImagesOnPage.ts
Normal file
15
shared-operations/src/functions/common/getImagesOnPage.ts
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
import { PDFPageProxy } from "pdfjs-dist/types/src/display/api.js";
|
||||
import * as PDFJS from 'pdfjs-dist';
|
||||
|
||||
export async function getImagesOnPage(page: PDFPageProxy) {
|
||||
const ops = await page.getOperatorList();
|
||||
const images: any = [];
|
||||
for (var j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image = page.objs.get(ops.argsArray[j][0]);
|
||||
images.push(image);
|
||||
}
|
||||
}
|
||||
return images;
|
||||
}
|
||||
47
shared-operations/src/functions/common/pdf-utils.ts
Normal file
47
shared-operations/src/functions/common/pdf-utils.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
|
||||
import { PdfFile, convertAllToPdfLibFile } from '../../wrappers/PdfFile';
|
||||
|
||||
export async function sortPdfs(
|
||||
files: PdfFile[],
|
||||
sortType: "orderProvided"|"byFileName"|"byDateModified"|"byDateCreated"|"byPDFTitle" = "orderProvided"
|
||||
): Promise<PdfFile[]> {
|
||||
|
||||
const pdfLibFiles = await convertAllToPdfLibFile(files);
|
||||
|
||||
switch(sortType) {
|
||||
case "byFileName":
|
||||
pdfLibFiles.sort((a, b) => {
|
||||
if (!a || !b) return 0;
|
||||
const ad = a.filename, bd = b.filename;
|
||||
if (!ad || !bd) return 0;
|
||||
return ad.localeCompare(bd);
|
||||
});
|
||||
break;
|
||||
case "byDateModified":
|
||||
pdfLibFiles.sort((a, b) => {
|
||||
const ad = a.pdfLib?.getModificationDate()?.getTime();
|
||||
const bd = b.pdfLib?.getModificationDate()?.getTime();
|
||||
if (!ad || !bd) return 0;
|
||||
return ad > bd ? 1 : -1
|
||||
});
|
||||
break;
|
||||
case "byDateCreated":
|
||||
pdfLibFiles.sort((a, b) => {
|
||||
const ad = a.pdfLib?.getCreationDate()?.getTime();
|
||||
const bd = b.pdfLib?.getCreationDate()?.getTime();
|
||||
if (!ad || !bd) return 0;
|
||||
return ad > bd ? 1 : -1
|
||||
});
|
||||
break;
|
||||
case "byPDFTitle":
|
||||
pdfLibFiles.sort((a, b) => {
|
||||
const ad = a.pdfLib?.getTitle();
|
||||
const bd = b.pdfLib?.getTitle();
|
||||
if (!ad || !bd) return 0;
|
||||
return ad.localeCompare(bd);
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
return pdfLibFiles;
|
||||
}
|
||||
Reference in New Issue
Block a user