# Description of Changes This pull request includes several changes primarily focused on improving configuration management, removing deprecated methods, and updating paths for external dependencies. The most important changes are summarized below: ### Configuration Management Improvements: * Added a new `RuntimePathConfig` class to manage dynamic paths for operations and pipeline configurations (`src/main/java/stirling/software/SPDF/config/RuntimePathConfig.java`). * Removed the `bookAndHtmlFormatsInstalled` bean and its associated logic from `AppConfig` and `EndpointConfiguration` (`src/main/java/stirling/software/SPDF/config/AppConfig.java`, `src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`). [[1]](diffhunk://#diff-4d774ec79aa55750c0a4739bee971b68877078b73654e863fd40ee924347e143L130-L138) [[2]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL12-L35) [[3]](diffhunk://#diff-750f31f6ecbd64b025567108a33775cad339e835a04360affff82a09410b697dL275-L280) ### External Dependency Path Updates: * Updated paths for `weasyprint` and `unoconvert` in `ExternalAppDepConfig` to use values from `RuntimePathConfig` (`src/main/java/stirling/software/SPDF/config/ExternalAppDepConfig.java`). [[1]](diffhunk://#diff-c47af298c07c2622aa98b038b78822c56bdb002de71081e102d344794e7832a6R12-L33) [[2]](diffhunk://#diff-c47af298c07c2622aa98b038b78822c56bdb002de71081e102d344794e7832a6L104-R115) ### Minor Adjustments: * Corrected a typo from "Unoconv" to "Unoconvert" in `EndpointConfiguration` (`src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java`). --- ## Checklist ### General - [ ] I have read the [Contribution Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md) - [ ] I have read the [Stirling-PDF Developer Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md) (if applicable) - [ ] I have read the [How to add new languages to Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md) (if applicable) - [ ] I have performed a self-review of my own code - [ ] My changes generate no new warnings ### Documentation - [ ] I have updated relevant docs on [Stirling-PDF's doc repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/) (if functionality has heavily changed) - [ ] I have read the section [Add New Translation Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags) (for new translation tags only) ### UI Changes (if applicable) - [ ] Screenshots or videos demonstrating the UI changes are attached (e.g., as comments or direct attachments in the PR) ### Testing (if applicable) - [ ] I have tested my changes locally. Refer to the [Testing Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing) for more details.
306 lines
14 KiB
Java
306 lines
14 KiB
Java
package stirling.software.SPDF.utils;
|
|
|
|
import java.io.*;
|
|
import java.nio.charset.StandardCharsets;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.Semaphore;
|
|
import java.util.concurrent.TimeUnit;
|
|
|
|
import io.github.pixee.security.BoundedLineReader;
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
import stirling.software.SPDF.model.ApplicationProperties;
|
|
|
|
@Slf4j
|
|
public class ProcessExecutor {
|
|
|
|
private static final Map<Processes, ProcessExecutor> instances = new ConcurrentHashMap<>();
|
|
private static ApplicationProperties applicationProperties = new ApplicationProperties();
|
|
private final Semaphore semaphore;
|
|
private final boolean liveUpdates;
|
|
private long timeoutDuration;
|
|
|
|
private ProcessExecutor(int semaphoreLimit, boolean liveUpdates, long timeout) {
|
|
this.semaphore = new Semaphore(semaphoreLimit);
|
|
this.liveUpdates = liveUpdates;
|
|
this.timeoutDuration = timeout;
|
|
}
|
|
|
|
public static ProcessExecutor getInstance(Processes processType) {
|
|
return getInstance(processType, true);
|
|
}
|
|
|
|
public static ProcessExecutor getInstance(Processes processType, boolean liveUpdates) {
|
|
return instances.computeIfAbsent(
|
|
processType,
|
|
key -> {
|
|
int semaphoreLimit =
|
|
switch (key) {
|
|
case LIBRE_OFFICE ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getLibreOfficeSessionLimit();
|
|
case PDFTOHTML ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getPdfToHtmlSessionLimit();
|
|
case PYTHON_OPENCV ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getPythonOpenCvSessionLimit();
|
|
case WEASYPRINT ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getWeasyPrintSessionLimit();
|
|
case INSTALL_APP ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getInstallAppSessionLimit();
|
|
case TESSERACT ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getTesseractSessionLimit();
|
|
case QPDF ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getQpdfSessionLimit();
|
|
case CALIBRE ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getSessionLimit()
|
|
.getCalibreSessionLimit();
|
|
};
|
|
|
|
long timeoutMinutes =
|
|
switch (key) {
|
|
case LIBRE_OFFICE ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getLibreOfficeTimeoutMinutes();
|
|
case PDFTOHTML ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getPdfToHtmlTimeoutMinutes();
|
|
case PYTHON_OPENCV ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getPythonOpenCvTimeoutMinutes();
|
|
case WEASYPRINT ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getWeasyPrintTimeoutMinutes();
|
|
case INSTALL_APP ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getInstallAppTimeoutMinutes();
|
|
case TESSERACT ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getTesseractTimeoutMinutes();
|
|
case QPDF ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getQpdfTimeoutMinutes();
|
|
case CALIBRE ->
|
|
applicationProperties
|
|
.getProcessExecutor()
|
|
.getTimeoutMinutes()
|
|
.getCalibreTimeoutMinutes();
|
|
};
|
|
return new ProcessExecutor(semaphoreLimit, liveUpdates, timeoutMinutes);
|
|
});
|
|
}
|
|
|
|
public ProcessExecutorResult runCommandWithOutputHandling(List<String> command)
|
|
throws IOException, InterruptedException {
|
|
return runCommandWithOutputHandling(command, null);
|
|
}
|
|
|
|
public ProcessExecutorResult runCommandWithOutputHandling(
|
|
List<String> command, File workingDirectory) throws IOException, InterruptedException {
|
|
String messages = "";
|
|
int exitCode = 1;
|
|
semaphore.acquire();
|
|
try {
|
|
|
|
log.info("Running command: " + String.join(" ", command));
|
|
ProcessBuilder processBuilder = new ProcessBuilder(command);
|
|
|
|
// Use the working directory if it's set
|
|
if (workingDirectory != null) {
|
|
processBuilder.directory(workingDirectory);
|
|
}
|
|
Process process = processBuilder.start();
|
|
|
|
// Read the error stream and standard output stream concurrently
|
|
List<String> errorLines = new ArrayList<>();
|
|
List<String> outputLines = new ArrayList<>();
|
|
|
|
Thread errorReaderThread =
|
|
new Thread(
|
|
() -> {
|
|
try (BufferedReader errorReader =
|
|
new BufferedReader(
|
|
new InputStreamReader(
|
|
process.getErrorStream(),
|
|
StandardCharsets.UTF_8))) {
|
|
String line;
|
|
while ((line =
|
|
BoundedLineReader.readLine(
|
|
errorReader, 5_000_000))
|
|
!= null) {
|
|
errorLines.add(line);
|
|
if (liveUpdates) log.info(line);
|
|
}
|
|
} catch (InterruptedIOException e) {
|
|
log.warn("Error reader thread was interrupted due to timeout.");
|
|
} catch (IOException e) {
|
|
log.error("exception", e);
|
|
}
|
|
});
|
|
|
|
Thread outputReaderThread =
|
|
new Thread(
|
|
() -> {
|
|
try (BufferedReader outputReader =
|
|
new BufferedReader(
|
|
new InputStreamReader(
|
|
process.getInputStream(),
|
|
StandardCharsets.UTF_8))) {
|
|
String line;
|
|
while ((line =
|
|
BoundedLineReader.readLine(
|
|
outputReader, 5_000_000))
|
|
!= null) {
|
|
outputLines.add(line);
|
|
if (liveUpdates) log.info(line);
|
|
}
|
|
} catch (InterruptedIOException e) {
|
|
log.warn("Error reader thread was interrupted due to timeout.");
|
|
} catch (IOException e) {
|
|
log.error("exception", e);
|
|
}
|
|
});
|
|
|
|
errorReaderThread.start();
|
|
outputReaderThread.start();
|
|
|
|
// Wait for the conversion process to complete
|
|
boolean finished = process.waitFor(timeoutDuration, TimeUnit.MINUTES);
|
|
|
|
if (!finished) {
|
|
// Terminate the process
|
|
process.destroy();
|
|
// Interrupt the reader threads
|
|
errorReaderThread.interrupt();
|
|
outputReaderThread.interrupt();
|
|
throw new IOException("Process timeout exceeded.");
|
|
}
|
|
exitCode = process.exitValue();
|
|
// Wait for the reader threads to finish
|
|
errorReaderThread.join();
|
|
outputReaderThread.join();
|
|
|
|
boolean isQpdf =
|
|
command != null && !command.isEmpty() && command.get(0).contains("qpdf");
|
|
|
|
if (outputLines.size() > 0) {
|
|
String outputMessage = String.join("\n", outputLines);
|
|
messages += outputMessage;
|
|
if (!liveUpdates) {
|
|
log.info("Command output:\n" + outputMessage);
|
|
}
|
|
}
|
|
|
|
if (errorLines.size() > 0) {
|
|
String errorMessage = String.join("\n", errorLines);
|
|
messages += errorMessage;
|
|
if (!liveUpdates) {
|
|
log.warn("Command error output:\n" + errorMessage);
|
|
}
|
|
if (exitCode != 0) {
|
|
if (isQpdf && exitCode == 3) {
|
|
log.warn("qpdf succeeded with warnings: {}", messages);
|
|
} else {
|
|
throw new IOException(
|
|
"Command process failed with exit code "
|
|
+ exitCode
|
|
+ ". Error message: "
|
|
+ errorMessage);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (exitCode != 0) {
|
|
if (isQpdf && exitCode == 3) {
|
|
log.warn("qpdf succeeded with warnings: {}", messages);
|
|
} else {
|
|
throw new IOException(
|
|
"Command process failed with exit code "
|
|
+ exitCode
|
|
+ "\nLogs: "
|
|
+ messages);
|
|
}
|
|
}
|
|
} finally {
|
|
semaphore.release();
|
|
}
|
|
return new ProcessExecutorResult(exitCode, messages);
|
|
}
|
|
|
|
public enum Processes {
|
|
LIBRE_OFFICE,
|
|
PDFTOHTML,
|
|
PYTHON_OPENCV,
|
|
WEASYPRINT,
|
|
INSTALL_APP,
|
|
CALIBRE,
|
|
TESSERACT,
|
|
QPDF
|
|
}
|
|
|
|
public class ProcessExecutorResult {
|
|
int rc;
|
|
String messages;
|
|
|
|
public ProcessExecutorResult(int rc, String messages) {
|
|
this.rc = rc;
|
|
this.messages = messages;
|
|
}
|
|
|
|
public int getRc() {
|
|
return rc;
|
|
}
|
|
|
|
public void setRc(int rc) {
|
|
this.rc = rc;
|
|
}
|
|
|
|
public String getMessages() {
|
|
return messages;
|
|
}
|
|
|
|
public void setMessages(String messages) {
|
|
this.messages = messages;
|
|
}
|
|
}
|
|
}
|