Convert PDF to Docx, powerpoint and others (#90)

This commit is contained in:
Anthony Stirling
2023-04-16 22:03:30 +01:00
committed by GitHub
parent 0a7517ecdd
commit c311f9a4ed
23 changed files with 633 additions and 47 deletions

View File

@@ -1,7 +1,5 @@
package stirling.software.SPDF.controller;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -11,8 +9,6 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.InputStreamResource;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;

View File

@@ -5,12 +5,12 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@@ -28,9 +28,6 @@ import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.ProcessExecutor;
//import com.spire.pdf.*;
import java.util.concurrent.Semaphore;
import java.util.regex.Pattern;
@Controller
public class OCRController {

View File

@@ -3,18 +3,14 @@ package stirling.software.SPDF.controller;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
@@ -31,8 +27,6 @@ import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
@Controller
public class SplitPDFController {

View File

@@ -1,13 +1,14 @@
package stirling.software.SPDF.controller.converters;
import java.io.IOException;
import java.nio.file.StandardCopyOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
@@ -15,7 +16,7 @@ import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.apache.commons.io.FilenameUtils;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
@@ -75,4 +76,5 @@ private boolean isValidFileExtension(String fileExtension) {
String extensionPattern = "^(?i)[a-z0-9]{2,4}$";
return fileExtension.matches(extensionPattern);
}
}

View File

@@ -0,0 +1,97 @@
package stirling.software.SPDF.controller.converters;
import java.io.IOException;
import org.springframework.http.ResponseEntity;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import org.springframework.web.servlet.ModelAndView;
import stirling.software.SPDF.utils.PDFToFile;
@Controller
public class ConvertPDFToOffice {
@GetMapping("/pdf-to-word")
public ModelAndView pdfToWord() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word");
modelAndView.addObject("currentPage", "pdf-to-word");
return modelAndView;
}
@GetMapping("/pdf-to-presentation")
public ModelAndView pdfToPresentation() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-presentation");
modelAndView.addObject("currentPage", "pdf-to-presentation");
return modelAndView;
}
@GetMapping("/pdf-to-text")
public ModelAndView pdfToText() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-text");
modelAndView.addObject("currentPage", "pdf-to-text");
return modelAndView;
}
@GetMapping("/pdf-to-html")
public ModelAndView pdfToHTML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html");
modelAndView.addObject("currentPage", "pdf-to-html");
return modelAndView;
}
@GetMapping("/pdf-to-xml")
public ModelAndView pdfToXML() {
ModelAndView modelAndView = new ModelAndView("convert/pdf-to-xml");
modelAndView.addObject("currentPage", "pdf-to-xml");
return modelAndView;
}
@PostMapping("/pdf-to-word")
public ResponseEntity<byte[]> processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-presentation")
public ResponseEntity<byte[]> processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import");
}
@PostMapping("/pdf-to-text")
public ResponseEntity<byte[]> processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile,
@RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import");
}
@PostMapping("/pdf-to-html")
public ResponseEntity<byte[]> processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import");
}
@PostMapping("/pdf-to-xml")
public ResponseEntity<byte[]> processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException {
PDFToFile pdfToFile = new PDFToFile();
return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import");
}
}

View File

@@ -1,11 +1,9 @@
package stirling.software.SPDF.controller.converters;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.springframework.http.HttpHeaders;
@@ -18,9 +16,6 @@ import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.multipart.MultipartFile;
import com.itextpdf.xmp.XMPException;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.ProcessExecutor;
@Controller
public class ConvertPDFToPDFA {

View File

@@ -11,6 +11,7 @@ import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
@@ -26,7 +27,6 @@ import org.springframework.web.multipart.MultipartFile;
import stirling.software.SPDF.utils.PdfUtils;
import stirling.software.SPDF.utils.WatermarkRemover;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
@Controller
public class WatermarkController {