removeBlankPages. backend only

This commit is contained in:
Felix Kaspar
2024-05-13 20:46:04 +02:00
parent f587797ddb
commit e72f3d5525
6 changed files with 117 additions and 96 deletions

View File

@@ -11,6 +11,7 @@
"dependencies": {
"@stirling-tools/joi": "github:Stirling-Tools/joi",
"buffer": "^6.0.3",
"canvas": "^2.11.2",
"i18next-resources-to-backend": "^1.2.0",
"image-js": "^0.35.5",
"memfs": "^4.9.2",

View File

@@ -0,0 +1,10 @@
{
"friendlyName": "Remove Blank Pages",
"description": "Remove pages without .",
"values": {
"whiteThreashold": {
"friendlyName": "Image White Threashold",
"description": "0-255 the tolerance for near white images. 0 - Almost white is detected as empty. 255 - No images are detected as empty."
}
}
}

View File

@@ -3,6 +3,8 @@ import { PDFPageProxy } from "pdfjs-dist/types/src/display/api";
import * as PDFJS from "pdfjs-dist";
import { createCanvas } from "canvas";
export interface PDFJSImage {
width: number;
height: number;
@@ -16,7 +18,9 @@ export async function getImagesOnPage(page: PDFPageProxy): Promise<PDFJSImage[]>
const images: PDFJSImage[] = [];
for (let j=0; j < ops.fnArray.length; j++) {
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
const image = page.objs.get(ops.argsArray[j][0]) as PDFJSImage;
const image: PDFJSImage = page.objs.get(ops.argsArray[j][0])
console.log("Image: ", image);
images.push(image);
}
}

View File

@@ -1,23 +1,58 @@
import { Operator, Progress, oneToOne } from ".";
import Joi from "@stirling-tools/joi";
import { JoiPDFFileSchema } from "../wrappers/PdfFileJoi";
import i18next from "i18next";
import { PdfFile } from "../wrappers/PdfFile";
import { detectEmptyPages } from "./common/detectEmptyPages";
import { getPages } from "./common/getPagesByIndex";
import { invertSelection } from "./common/pageIndexesUtils";
export interface RemoveBlankPagesParamsType {
file: PdfFile;
whiteThreashold: number;
export class RemoveBlankPages extends Operator {
static type = "removeBlankPages";
/**
* Validation & Localisation
*/
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
protected static valueSchema = Joi.object({
whiteThreashold: Joi.number().min(0).max(255).required()
.label(i18next.t("values.whiteThreashold.friendlyName", { ns: "removeBlankPages" })).description(i18next.t("values.whiteThreashold.description", { ns: "removeBlankPages" }))
.example("10").example("0").example("255").required()
});
protected static outputSchema = JoiPDFFileSchema.label(i18next.t("outputs.pdffile.name")).description(i18next.t("outputs.pdffile.description"));
static schema = Joi.object({
input: RemoveBlankPages.inputSchema,
values: RemoveBlankPages.valueSchema.required(),
output: RemoveBlankPages.outputSchema
}).label(i18next.t("friendlyName", { ns: "removeBlankPages" })).description(i18next.t("description", { ns: "removeBlankPages" }));
/**
* Logic
*/
/** Detect and remove white pages */
async run(input: PdfFile[], progressCallback: (state: Progress) => void): Promise<PdfFile[]> {
return oneToOne<PdfFile, PdfFile>(input, async (input, index, max) => {
const pdfDoc = await input.pdfLibDocument;
const pageCount = pdfDoc.getPageCount();
progressCallback({ curFileProgress: 0, operationProgress: index/max });
const emptyPages = await detectEmptyPages(input, this.actionValues.whiteThreashold);
progressCallback({ curFileProgress: 0.6, operationProgress: index/max });
const pagesToKeep = invertSelection(emptyPages, pageCount);
progressCallback({ curFileProgress: 0.3, operationProgress: index/max });
const result = await getPages(input, pagesToKeep);
progressCallback({ curFileProgress: 1, operationProgress: index/max });
result.filename += "_removedBlanks";
return result;
});
}
}
export async function removeBlankPages(params: RemoveBlankPagesParamsType) {
const { file, whiteThreashold } = params;
const pdfDoc = await file.pdfLibDocument;
const pageCount = pdfDoc.getPageCount();
const emptyPages = await detectEmptyPages(file, whiteThreashold);
console.debug("Empty Pages: ", emptyPages);
const pagesToKeep = invertSelection(emptyPages, pageCount);
const newFile = await getPages(file, pagesToKeep);
newFile.filename += "_removedBlanks";
return newFile;
}

View File

@@ -6,6 +6,9 @@
"baseUrl": "./src", /* Specify the base directory to resolve non-relative module names. */
"paths": {
"#pdfcpu": ["../../shared-operations/src/wasm/pdfcpu/pdfcpu-wrapper.server"],
}
},
"types": [
"vite/client"
],
} /* Specify a set of entries that re-map imports to additional lookup locations. */
}