Add: Convert PDF to WebP (#1666)

* Add PDF to WebP

* add swagger param

* back

* creates a custom image for Docker from pymupdf

* Converting with pdf2image and Pillow instead of pymupdf

* webp remove to pdf-to-img

* remove mupdf
This commit is contained in:
Ludy
2024-08-20 17:17:54 +02:00
committed by GitHub
parent 4a4c7faf47
commit 58618b3a21
12 changed files with 296 additions and 28 deletions

View File

@@ -166,6 +166,7 @@ public class EndpointConfiguration {
addEndpointToGroup("Python", REMOVE_BLANKS);
addEndpointToGroup("Python", "html-to-pdf");
addEndpointToGroup("Python", "url-to-pdf");
addEndpointToGroup("Python", "pdf-to-img");
// openCV
addEndpointToGroup("OpenCV", "extract-image-scans");

View File

@@ -1,11 +1,23 @@
package stirling.software.SPDF.controller.api.converters;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.rendering.ImageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
@@ -21,6 +33,8 @@ import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.converters.ConvertToImageRequest;
import stirling.software.SPDF.model.api.converters.ConvertToPdfRequest;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@@ -60,15 +74,92 @@ public class ConvertImgPDFController {
result =
PdfUtils.convertFromPdf(
pdfBytes,
imageFormat.toUpperCase(),
imageFormat.equalsIgnoreCase("webp") ? "png" : imageFormat.toUpperCase(),
colorTypeResult,
singleImage,
Integer.valueOf(dpi),
filename);
if (result == null || result.length == 0) {
logger.error("resultant bytes for {} is null, error converting ", filename);
}
if (imageFormat.equalsIgnoreCase("webp")) {
// Write the output stream to a temp file
Path tempFile = Files.createTempFile("temp_png", ".png");
try (FileOutputStream fos = new FileOutputStream(tempFile.toFile())) {
fos.write(result);
fos.flush();
}
String pythonVersion = "python3";
try {
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
.runCommandWithOutputHandling(Arrays.asList("python3", "--version"));
} catch (IOException e) {
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
.runCommandWithOutputHandling(Arrays.asList("python", "--version"));
pythonVersion = "python";
}
List<String> command = new ArrayList<>();
command.add(pythonVersion);
command.add("./scripts/png_to_webp.py"); // Python script to handle the conversion
// Create a temporary directory for the output WebP files
Path tempOutputDir = Files.createTempDirectory("webp_output");
if (singleImage) {
// Run the Python script to convert PNG to WebP
command.add(tempFile.toString());
command.add(tempOutputDir.toString());
command.add("--single");
} else {
// Save the uploaded PDF to a temporary file
Path tempPdfPath = Files.createTempFile("temp_pdf", ".pdf");
file.transferTo(tempPdfPath.toFile());
// Run the Python script to convert PDF to WebP
command.add(tempPdfPath.toString());
command.add(tempOutputDir.toString());
}
command.add("--dpi");
command.add(dpi);
ProcessExecutorResult resultProcess =
ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV)
.runCommandWithOutputHandling(command);
// Find all WebP files in the output directory
List<Path> webpFiles =
Files.walk(tempOutputDir)
.filter(path -> path.toString().endsWith(".webp"))
.collect(Collectors.toList());
if (webpFiles.isEmpty()) {
logger.error("No WebP files were created in: {}", tempOutputDir.toString());
throw new IOException("No WebP files were created. " + resultProcess.getMessages());
}
byte[] bodyBytes = new byte[0];
if (webpFiles.size() == 1) {
// Return the single WebP file directly
Path webpFilePath = webpFiles.get(0);
bodyBytes = Files.readAllBytes(webpFilePath);
} else {
// Create a ZIP file containing all WebP images
ByteArrayOutputStream zipOutputStream = new ByteArrayOutputStream();
try (ZipOutputStream zos = new ZipOutputStream(zipOutputStream)) {
for (Path webpFile : webpFiles) {
zos.putNextEntry(new ZipEntry(webpFile.getFileName().toString()));
Files.copy(webpFile, zos);
zos.closeEntry();
}
}
bodyBytes = zipOutputStream.toByteArray();
}
// Clean up the temporary files
Files.deleteIfExists(tempFile);
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
result = bodyBytes;
}
if (singleImage) {
String docName = filename + "." + imageFormat;
MediaType mediaType = MediaType.parseMediaType(getMediaType(imageFormat));

View File

@@ -21,14 +21,6 @@ public class ConverterWebController {
return "convert/book-to-pdf";
}
@ConditionalOnExpression("#{bookAndHtmlFormatsInstalled}")
@GetMapping("/pdf-to-book")
@Hidden
public String convertPdfToBookForm(Model model) {
model.addAttribute("currentPage", "pdf-to-book");
return "convert/pdf-to-book";
}
@GetMapping("/img-to-pdf")
@Hidden
public String convertImgToPdfForm(Model model) {
@@ -57,13 +49,6 @@ public class ConverterWebController {
return "convert/url-to-pdf";
}
@GetMapping("/pdf-to-img")
@Hidden
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
@GetMapping("/file-to-pdf")
@Hidden
public String convertToPdfForm(Model model) {
@@ -73,6 +58,21 @@ public class ConverterWebController {
// PDF TO......
@ConditionalOnExpression("#{bookAndHtmlFormatsInstalled}")
@GetMapping("/pdf-to-book")
@Hidden
public String convertPdfToBookForm(Model model) {
model.addAttribute("currentPage", "pdf-to-book");
return "convert/pdf-to-book";
}
@GetMapping("/pdf-to-img")
@Hidden
public String pdfToimgForm(Model model) {
model.addAttribute("currentPage", "pdf-to-img");
return "convert/pdf-to-img";
}
@GetMapping("/pdf-to-html")
@Hidden
public ModelAndView pdfToHTML() {

View File

@@ -12,7 +12,7 @@ public class ConvertToImageRequest extends PDFFile {
@Schema(
description = "The output image format",
allowableValues = {"png", "jpeg", "jpg", "gif"})
allowableValues = {"png", "jpeg", "jpg", "gif", "webp"})
private String imageFormat;
@Schema(

View File

@@ -1147,4 +1147,4 @@ error.discordSubmit=Discord - Submit Support post
removeImage.title=Remove image
removeImage.header=Remove image
removeImage.removeImage=Remove image
removeImage.submit=Remove image
removeImage.submit=Remove image

View File

@@ -28,6 +28,7 @@
<option value="gif">GIF</option>
<option value="tiff">TIFF</option>
<option value="bmp">BMP</option>
<option value="webp">WEPB</option>
</select>
</div>
<div class="mb-3">

View File

@@ -335,7 +335,7 @@
</div>
</div>
</li>
<li class="nav-item dropdown">
<a class="nav-link" href="#" id="searchDropdown" role="button" data-bs-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
<span class="material-symbols-rounded">