Convert PDF to Docx, powerpoint and others (#90)
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
package stirling.software.SPDF.controller;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -11,8 +9,6 @@ import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageTree;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.core.io.InputStreamResource;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
|
||||
@@ -5,12 +5,12 @@ import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
@@ -28,9 +28,6 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
//import com.spire.pdf.*;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.regex.Pattern;
|
||||
@Controller
|
||||
public class OCRController {
|
||||
|
||||
|
||||
@@ -3,18 +3,14 @@ package stirling.software.SPDF.controller;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.net.URI;
|
||||
import java.nio.file.FileSystem;
|
||||
import java.nio.file.FileSystems;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
@@ -31,8 +27,6 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
@Controller
|
||||
public class SplitPDFController {
|
||||
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.ui.Model;
|
||||
@@ -15,7 +16,7 @@ import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@Controller
|
||||
@@ -75,4 +76,5 @@ private boolean isValidFileExtension(String fileExtension) {
|
||||
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
|
||||
return fileExtension.matches(extensionPattern);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.stereotype.Controller;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.web.servlet.ModelAndView;
|
||||
|
||||
import stirling.software.SPDF.utils.PDFToFile;
|
||||
|
||||
@Controller
|
||||
public class ConvertPDFToOffice {
|
||||
|
||||
|
||||
|
||||
@GetMapping("/pdf-to-word")
|
||||
public ModelAndView pdfToWord() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
|
||||
modelAndView.addObject("currentPage", "pdf-to-word");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-presentation")
|
||||
public ModelAndView pdfToPresentation() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-presentation");
|
||||
modelAndView.addObject("currentPage", "pdf-to-presentation");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-text")
|
||||
public ModelAndView pdfToText() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-text");
|
||||
modelAndView.addObject("currentPage", "pdf-to-text");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-html")
|
||||
public ModelAndView pdfToHTML() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
|
||||
modelAndView.addObject("currentPage", "pdf-to-html");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
@GetMapping("/pdf-to-xml")
|
||||
public ModelAndView pdfToXML() {
|
||||
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-xml");
|
||||
modelAndView.addObject("currentPage", "pdf-to-xml");
|
||||
return modelAndView;
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/pdf-to-word")
|
||||
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-presentation")
|
||||
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-text")
|
||||
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
|
||||
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
|
||||
}
|
||||
|
||||
|
||||
@PostMapping("/pdf-to-html")
|
||||
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
|
||||
}
|
||||
|
||||
@PostMapping("/pdf-to-xml")
|
||||
public ResponseEntity<byte[]> processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
|
||||
PDFToFile pdfToFile = new PDFToFile();
|
||||
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
@@ -1,11 +1,9 @@
|
||||
package stirling.software.SPDF.controller.converters;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.http.HttpHeaders;
|
||||
@@ -18,9 +16,6 @@ import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.itextpdf.xmp.XMPException;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.ProcessExecutor;
|
||||
@Controller
|
||||
public class ConvertPDFToPDFA {
|
||||
|
||||
@@ -11,6 +11,7 @@ import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
|
||||
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
|
||||
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
|
||||
@@ -26,7 +27,6 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WatermarkRemover;
|
||||
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
|
||||
|
||||
@Controller
|
||||
public class WatermarkController {
|
||||
|
||||
101
src/main/java/stirling/software/SPDF/utils/PDFToFile.java
Normal file
101
src/main/java/stirling/software/SPDF/utils/PDFToFile.java
Normal file
@@ -0,0 +1,101 @@
|
||||
package stirling.software.SPDF.utils;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
public class PDFToFile {
|
||||
public ResponseEntity<byte[]> processPdfToOfficeFormat(MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
|
||||
throws IOException, InterruptedException {
|
||||
|
||||
if (!"application/pdf".equals(inputFile.getContentType())) {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
// Get the original PDF file name without the extension
|
||||
String originalPdfFileName = inputFile.getOriginalFilename();
|
||||
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
||||
|
||||
// Validate output format
|
||||
List<String> allowedFormats = Arrays.asList("doc", "docx", "odt", "ppt", "pptx", "odp", "rtf", "html","xml","txt:Text");
|
||||
if (!allowedFormats.contains(outputFormat)) {
|
||||
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
Path tempInputFile = null;
|
||||
Path tempOutputDir = null;
|
||||
byte[] fileBytes;
|
||||
// Prepare response
|
||||
HttpHeaders headers = new HttpHeaders();
|
||||
|
||||
try {
|
||||
// Save the uploaded file to a temporary location
|
||||
tempInputFile = Files.createTempFile("input_", ".pdf");
|
||||
Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// Prepare the output directory
|
||||
tempOutputDir = Files.createTempDirectory("output_");
|
||||
|
||||
// Run the LibreOffice command
|
||||
List<String> command = new ArrayList<>(Arrays.asList(
|
||||
"soffice", "--infilter=" + libreOfficeFilter, "--convert-to", outputFormat, "--outdir", tempOutputDir.toString(), tempInputFile.toString()
|
||||
));
|
||||
int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE).runCommandWithOutputHandling(command);
|
||||
|
||||
// Get output files
|
||||
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());
|
||||
|
||||
if (outputFiles.size() == 1) {
|
||||
// Return single output file
|
||||
File outputFile = outputFiles.get(0);
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
if(outputFormat.equals("txt:Text")) {
|
||||
outputFormat="txt";
|
||||
}
|
||||
headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat);
|
||||
fileBytes = FileUtils.readFileToByteArray(outputFile);
|
||||
} else {
|
||||
// Return output files in a ZIP archive
|
||||
headers.setContentType(MediaType.APPLICATION_OCTET_STREAM);
|
||||
headers.setContentDispositionFormData("attachment", pdfBaseName + "To" + outputFormat + ".zip");
|
||||
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
||||
ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream);
|
||||
|
||||
for (File outputFile : outputFiles) {
|
||||
ZipEntry entry = new ZipEntry(outputFile.getName());
|
||||
zipOutputStream.putNextEntry(entry);
|
||||
FileInputStream fis = new FileInputStream(outputFile);
|
||||
IOUtils.copy(fis, zipOutputStream);
|
||||
fis.close();
|
||||
zipOutputStream.closeEntry();
|
||||
}
|
||||
|
||||
zipOutputStream.close();
|
||||
fileBytes = byteArrayOutputStream.toByteArray();
|
||||
}
|
||||
|
||||
} finally {
|
||||
// Clean up the temporary files
|
||||
if (tempInputFile != null)
|
||||
Files.delete(tempInputFile);
|
||||
if (tempOutputDir != null)
|
||||
FileUtils.deleteDirectory(tempOutputDir.toFile());
|
||||
}
|
||||
return new ResponseEntity<>(fileBytes, headers, HttpStatus.OK);
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,13 @@
|
||||
package stirling.software.SPDF.utils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Semaphore;
|
||||
public class ProcessExecutor {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user