Pdf to image custom page selection (#2576)

# Description

Implemented custom page selection for the pdf-to-image feature, allowing
users to specify which PDF pages to convert to images.

1. Variable Renaming: Changed singleOrMultiple to imageResultType
because it supports three options: single, multiple, and custom.
2. New Field: Added pageNumbers to accept user-defined page selections.
This field appears only when custom is selected in the UI.
3. New Method: Added getPageIndicesToConvert to process and validate the
specified page numbers.
4. Method Update: Updated convertFromPdf to handle custom page numbers,
ensuring only selected pages are converted.
5. Translation Properties: Added two new English translation properties,
custom and customPageNumber, to all language files with placeholder
values. These will need to be translated into country-specific languages
in the future.

Note: If an invalid page number is provided (zero, negative, or exceeds
page count), a single image containing all PDF pages is generated.

Closes #918 

## Checklist

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have performed a self-review of my own code
- [x] I have attached images of the change if it is UI based
- [x] I have commented my code, particularly in hard-to-understand areas
- [x] If my code has heavily changed functionality I have updated
relevant docs on [Stirling-PDFs doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
- [x] My changes generate no new warnings
- [x] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

![Screenshot 2025-01-02 at 12 31
29 AM](https://github.com/user-attachments/assets/c4ba3f31-5dd6-4a17-991e-51b86c2eb466)
![Screenshot 2025-01-02 at 12 31
49 AM](https://github.com/user-attachments/assets/3e800a95-2088-4f69-8a01-bd03d7b9e471)

---------

Co-authored-by: Sai Kumar J <saikumar@Sais-MacBook-Air.local>
Co-authored-by: Ludy <Ludy87@users.noreply.github.com>
Co-authored-by: saikumar <saikumar.jetti@gmail.com>
Co-authored-by: Anthony Stirling <77850077+Frooodle@users.noreply.github.com>
This commit is contained in:
Sai Kumar
2025-01-04 23:31:13 +05:30
committed by GitHub
parent 5ba98e4411
commit b8303e3860
41 changed files with 106 additions and 8 deletions

View File

@@ -13,6 +13,9 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.ImageType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.http.MediaType;
@@ -31,11 +34,8 @@ import lombok.extern.slf4j.Slf4j;
import stirling.software.SPDF.model.api.converters.ConvertToImageRequest;
import stirling.software.SPDF.model.api.converters.ConvertToPdfRequest;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.CheckProgramInstall;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.*;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/convert")
@@ -62,14 +62,20 @@ public class ConvertImgPDFController {
String singleOrMultiple = request.getSingleOrMultiple();
String colorType = request.getColorType();
String dpi = request.getDpi();
String pageNumbers = request.getPageNumbers();
Path tempFile = null;
Path tempOutputDir = null;
Path tempPdfPath = null;
byte[] result = null;
String[] pageOrderArr =
(pageNumbers != null && !pageNumbers.trim().isEmpty())
? pageNumbers.split(",")
: new String[] {"all"};
;
try {
byte[] pdfBytes = file.getBytes();
// Load the input PDF
byte[] newPdfBytes = rearrangePdfPages(file.getBytes(), pageOrderArr);
ImageType colorTypeResult = ImageType.RGB;
if ("greyscale".equals(colorType)) {
colorTypeResult = ImageType.GRAY;
@@ -84,7 +90,7 @@ public class ConvertImgPDFController {
result =
PdfUtils.convertFromPdf(
pdfBytes,
newPdfBytes,
"webp".equalsIgnoreCase(imageFormat)
? "png"
: imageFormat.toUpperCase(),
@@ -227,4 +233,46 @@ public class ConvertImgPDFController {
String mimeType = URLConnection.guessContentTypeFromName("." + imageFormat);
return "null".equals(mimeType) ? "application/octet-stream" : mimeType;
}
/**
* Rearranges the pages of the given PDF document based on the specified page order.
*
* @param pdfBytes The byte array of the original PDF file.
* @param pageOrderArr An array of page numbers indicating the new order.
* @return A byte array of the rearranged PDF.
* @throws IOException If an error occurs while processing the PDF.
*/
private byte[] rearrangePdfPages(byte[] pdfBytes, String[] pageOrderArr) throws IOException {
// Load the input PDF
PDDocument document = Loader.loadPDF(pdfBytes);
int totalPages = document.getNumberOfPages();
List<Integer> newPageOrder = GeneralUtils.parsePageList(pageOrderArr, totalPages, false);
// Create a new list to hold the pages in the new order
List<PDPage> newPages = new ArrayList<>();
for (int pageIndex : newPageOrder) {
newPages.add(document.getPage(pageIndex));
}
// Remove all the pages from the original document
for (int i = document.getNumberOfPages() - 1; i >= 0; i--) {
document.removePage(i);
}
// Add the pages in the new order
for (PDPage page : newPages) {
document.addPage(page);
}
// Convert PDDocument to byte array
byte[] newPdfBytes;
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
document.save(baos);
newPdfBytes = baos.toByteArray();
} finally {
document.close();
}
return newPdfBytes;
}
}

View File

@@ -21,6 +21,11 @@ public class ConvertToImageRequest extends PDFFile {
allowableValues = {"single", "multiple"})
private String singleOrMultiple;
@Schema(
description =
"The pages to select, Supports ranges (e.g., '1,3,5-9'), or 'all' or functions in the format 'an+b' where 'a' is the multiplier of the page number 'n', and 'b' is a constant (e.g., '2n+1', '3n', '6n-5')\"")
private String pageNumbers;
@Schema(
description = "The color type of the output image(s)",
allowableValues = {"color", "greyscale", "blackwhite"})