removeBlankPages. backend only
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
"dependencies": {
|
||||
"@stirling-tools/joi": "github:Stirling-Tools/joi",
|
||||
"buffer": "^6.0.3",
|
||||
"canvas": "^2.11.2",
|
||||
"i18next-resources-to-backend": "^1.2.0",
|
||||
"image-js": "^0.35.5",
|
||||
"memfs": "^4.9.2",
|
||||
|
||||
10
shared-operations/public/locales/removeBlankPages/en.json
Normal file
10
shared-operations/public/locales/removeBlankPages/en.json
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"friendlyName": "Remove Blank Pages",
|
||||
"description": "Remove pages without .",
|
||||
"values": {
|
||||
"whiteThreashold": {
|
||||
"friendlyName": "Image White Threashold",
|
||||
"description": "0-255 the tolerance for near white images. 0 - Almost white is detected as empty. 255 - No images are detected as empty."
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,6 +3,8 @@ import { PDFPageProxy } from "pdfjs-dist/types/src/display/api";
|
||||
|
||||
import * as PDFJS from "pdfjs-dist";
|
||||
|
||||
import { createCanvas } from "canvas";
|
||||
|
||||
export interface PDFJSImage {
|
||||
width: number;
|
||||
height: number;
|
||||
@@ -16,7 +18,9 @@ export async function getImagesOnPage(page: PDFPageProxy): Promise<PDFJSImage[]>
|
||||
const images: PDFJSImage[] = [];
|
||||
for (let j=0; j < ops.fnArray.length; j++) {
|
||||
if (ops.fnArray[j] == PDFJS.OPS.paintImageXObject) {
|
||||
const image = page.objs.get(ops.argsArray[j][0]) as PDFJSImage;
|
||||
const image: PDFJSImage = page.objs.get(ops.argsArray[j][0])
|
||||
|
||||
console.log("Image: ", image);
|
||||
images.push(image);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,23 +1,58 @@
|
||||
import { Operator, Progress, oneToOne } from ".";
|
||||
|
||||
import Joi from "@stirling-tools/joi";
|
||||
import { JoiPDFFileSchema } from "../wrappers/PdfFileJoi";
|
||||
|
||||
import i18next from "i18next";
|
||||
|
||||
import { PdfFile } from "../wrappers/PdfFile";
|
||||
import { detectEmptyPages } from "./common/detectEmptyPages";
|
||||
import { getPages } from "./common/getPagesByIndex";
|
||||
import { invertSelection } from "./common/pageIndexesUtils";
|
||||
|
||||
export interface RemoveBlankPagesParamsType {
|
||||
file: PdfFile;
|
||||
whiteThreashold: number;
|
||||
export class RemoveBlankPages extends Operator {
|
||||
static type = "removeBlankPages";
|
||||
|
||||
/**
|
||||
* Validation & Localisation
|
||||
*/
|
||||
|
||||
protected static inputSchema = JoiPDFFileSchema.label(i18next.t("inputs.pdffile.name")).description(i18next.t("inputs.pdffile.description"));
|
||||
protected static valueSchema = Joi.object({
|
||||
whiteThreashold: Joi.number().min(0).max(255).required()
|
||||
.label(i18next.t("values.whiteThreashold.friendlyName", { ns: "removeBlankPages" })).description(i18next.t("values.whiteThreashold.description", { ns: "removeBlankPages" }))
|
||||
.example("10").example("0").example("255").required()
|
||||
});
|
||||
protected static outputSchema = JoiPDFFileSchema.label(i18next.t("outputs.pdffile.name")).description(i18next.t("outputs.pdffile.description"));
|
||||
|
||||
static schema = Joi.object({
|
||||
input: RemoveBlankPages.inputSchema,
|
||||
values: RemoveBlankPages.valueSchema.required(),
|
||||
output: RemoveBlankPages.outputSchema
|
||||
}).label(i18next.t("friendlyName", { ns: "removeBlankPages" })).description(i18next.t("description", { ns: "removeBlankPages" }));
|
||||
|
||||
|
||||
/**
|
||||
* Logic
|
||||
*/
|
||||
|
||||
/** Detect and remove white pages */
|
||||
async run(input: PdfFile[], progressCallback: (state: Progress) => void): Promise<PdfFile[]> {
|
||||
return oneToOne<PdfFile, PdfFile>(input, async (input, index, max) => {
|
||||
const pdfDoc = await input.pdfLibDocument;
|
||||
const pageCount = pdfDoc.getPageCount();
|
||||
|
||||
progressCallback({ curFileProgress: 0, operationProgress: index/max });
|
||||
const emptyPages = await detectEmptyPages(input, this.actionValues.whiteThreashold);
|
||||
progressCallback({ curFileProgress: 0.6, operationProgress: index/max });
|
||||
const pagesToKeep = invertSelection(emptyPages, pageCount);
|
||||
progressCallback({ curFileProgress: 0.3, operationProgress: index/max });
|
||||
|
||||
const result = await getPages(input, pagesToKeep);
|
||||
progressCallback({ curFileProgress: 1, operationProgress: index/max });
|
||||
|
||||
result.filename += "_removedBlanks";
|
||||
return result;
|
||||
});
|
||||
}
|
||||
}
|
||||
export async function removeBlankPages(params: RemoveBlankPagesParamsType) {
|
||||
const { file, whiteThreashold } = params;
|
||||
const pdfDoc = await file.pdfLibDocument;
|
||||
const pageCount = pdfDoc.getPageCount();
|
||||
|
||||
const emptyPages = await detectEmptyPages(file, whiteThreashold);
|
||||
console.debug("Empty Pages: ", emptyPages);
|
||||
const pagesToKeep = invertSelection(emptyPages, pageCount);
|
||||
|
||||
const newFile = await getPages(file, pagesToKeep);
|
||||
newFile.filename += "_removedBlanks";
|
||||
return newFile;
|
||||
}
|
||||
@@ -6,6 +6,9 @@
|
||||
"baseUrl": "./src", /* Specify the base directory to resolve non-relative module names. */
|
||||
"paths": {
|
||||
"#pdfcpu": ["../../shared-operations/src/wasm/pdfcpu/pdfcpu-wrapper.server"],
|
||||
}
|
||||
},
|
||||
"types": [
|
||||
"vite/client"
|
||||
],
|
||||
} /* Specify a set of entries that re-map imports to additional lookup locations. */
|
||||
}
|
||||
Reference in New Issue
Block a user