This commit is contained in:
Anthony Stirling
2024-05-27 16:31:00 +01:00
parent b93bff5cad
commit 6ffa80c386
21 changed files with 526 additions and 147 deletions

View File

@@ -1,5 +1,7 @@
@example
Feature: API Validation
@positive @password
Scenario: Remove password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
@@ -12,7 +14,8 @@ Feature: API Validation
And the response file should have size greater than 0
And the response PDF is not passworded
And the response status code should be 200
@negative @password
Scenario: Remove password wrong password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
@@ -24,6 +27,7 @@ Feature: API Validation
Then the response status code should be 500
And the response should contain error message "Internal Server Error"
@positive @info
Scenario: Get info
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
@@ -31,6 +35,7 @@ Feature: API Validation
And the response file should have size greater than 100
And the response status code should be 200
@positive @password
Scenario: Add password
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
@@ -43,6 +48,7 @@ Feature: API Validation
And the response PDF is passworded
And the response status code should be 200
@positive @password
Scenario: Add password with other params
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
@@ -59,7 +65,7 @@ Feature: API Validation
And the response PDF is passworded
And the response status code should be 200
@positive @watermark
Scenario: Add watermark
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages
@@ -76,18 +82,8 @@ Feature: API Validation
Then the response content type should be "application/pdf"
And the response file should have size greater than 100
And the response status code should be 200
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@positive
Scenario: Remove blank pages
Given I generate a PDF file as "fileInput"
And the pdf contains 3 blank pages
@@ -100,80 +96,8 @@ Feature: API Validation
And the response file should have size greater than 0
And the response PDF should contain 0 pages
And the response status code should be 200
@ocr
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr
Scenario: Process PDF with text and OCR with type normal
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response status code should be 500
@ocr
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr
Scenario: Process PDF with OCR with sidecar
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | true |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response file should have size greater than 0
And the response status code should be 200
@positive @flatten
Scenario: Flatten PDF
Given I generate a PDF file as "fileInput"
And the request data includes
@@ -184,6 +108,7 @@ Feature: API Validation
And the response file should have size greater than 0
And the response status code should be 200
@positive @metadata
Scenario: Update metadata
Given I generate a PDF file as "fileInput"
And the request data includes
@@ -202,41 +127,4 @@ Feature: API Validation
And the response PDF metadata should include "Title" as "Sample Title"
And the response status code should be 200
@libre
Scenario: Convert PDF to DOCX
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | docx |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".docx"
# And the response DOCX should contain 3 pages
@libre
Scenario: Convert PDF to ODT
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | odt |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".odt"
# And the response ODT should contain 3 pages
@libre
Scenario: Convert PDF to DOC
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | doc |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have extension ".doc"
And the response file should have size greater than 100
# And the response DOC should contain 3 pages

View File

@@ -0,0 +1,184 @@
Feature: API Validation
@libre @positive
Scenario: Repair PDF
Given I generate a PDF file as "fileInput"
When I send the API request to the endpoint "/api/v1/misc/repair"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with text and OCR with type normal
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Normal |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response status code should be 500
@ocr @positive
Scenario: Process PDF with OCR
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | false |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/pdf"
And the response file should have size greater than 0
And the response status code should be 200
@ocr @positive
Scenario: Process PDF with OCR with sidecar
Given I generate a PDF file as "fileInput"
And the request data includes
| parameter | value |
| languages | eng |
| sidecar | true |
| deskew | true |
| clean | true |
| cleanFinal | true |
| ocrType | Force |
| ocrRenderType | hocr |
| removeImagesAfter| false |
When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
Then the response content type should be "application/octet-stream"
And the response file should have extension ".zip"
And the response file should have size greater than 0
And the response status code should be 200
@libre @positive
Scenario Outline: Convert PDF to various word formats
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/word"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| format | extension |
| docx | .docx |
| odt | .odt |
| doc | .doc |
@compress @ghostscript @positive
Scenario: Compress
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| optimizeLevel | 4 |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @ghostscript @positive
Scenario: Compress
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@compress @ghostscript @positive
Scenario: Compress
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| optimizeLevel | 1 |
| expectedOutputSize | 5KB |
When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
Then the response status code should be 200
And the response file should have extension ".pdf"
And the response file should have size greater than 100
@libre @positive
Scenario Outline: Convert PDF to various types
Given I generate a PDF file as "fileInput"
And the pdf contains 3 pages with random text
And the request data includes
| parameter | value |
| outputFormat | <format> |
When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension "<extension>"
Examples:
| type | format | extension |
| text | rtf | .rtf |
| text | txt | .txt |
| presentation | ppt | .ppt |
| presentation | pptx | .pptx |
| presentation | odp | .odp |
| html | html | .zip |
@libre @positive @topdf
Scenario Outline: Convert PDF to various types
Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
When I send the API request to the endpoint "/api/v1/convert/file/pdf"
Then the response status code should be 200
And the response file should have size greater than 100
And the response file should have extension ".pdf"
Examples:
| extension |
| .docx |
| .odp |
| .odt |
| .pptx |
| .rtf |

View File

@@ -0,0 +1,95 @@
@general
Feature: API Validation
@split-pdf-by-sections
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-sections
Scenario Outline: split-pdf-by-sections with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 2 pages
And the request data includes
| parameter | value |
| horizontalDivisions | <horizontalDivisions> |
| verticalDivisions | <verticalDivisions> |
| merge | true |
When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
Then the response content type should be "application/pdf"
And the response file should have size greater than 200
And the response status code should be 200
And the response PDF should contain <page_count> pages
Examples:
| horizontalDivisions | verticalDivisions | page_count |
| 0 | 1 | 4 |
| 1 | 1 | 8 |
| 1 | 2 | 12 |
| 2 | 2 | 18 |
@split-pdf-by-pages
Scenario Outline: split-pdf-by-pages with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| pageNumbers | <pageNumbers> |
When I send the API request to the endpoint "/api/v1/general/split-pages"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP should contain <file_count> files
Examples:
| pageNumbers | file_count |
| 1,3,5-9 | 8 |
| all | 20 |
| 2n+1 | 11 |
| 3n | 7 |
@split-pdf-by-size-or-count
Scenario Outline: split-pdf-by-size-or-count with different parameters
Given I generate a PDF file as "fileInput"
And the pdf contains 20 pages
And the request data includes
| parameter | value |
| fileInput | fileInput |
| splitType | <splitType> |
| splitValue | <splitValue> |
When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
Then the response content type should be "application/octet-stream"
And the response status code should be 200
And the response file should have size greater than 200
And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
Examples:
| splitType | splitValue | doc_count | pages_per_doc |
| 1 | 5 | 4 | 5 |
| 2 | 2 | 2 | 10 |
| 2 | 4 | 4 | 5 |
| 1 | 10 | 2 | 10 |

View File

@@ -9,6 +9,8 @@ from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
import mimetypes
import requests
import zipfile
import shutil
#########
# GIVEN #
@@ -26,6 +28,23 @@ def step_generate_pdf(context, fileInput):
context.files = {}
context.files[context.param_name] = open(context.file_name, 'rb')
@given('I use an example file at "{filePath}" as parameter "{fileInput}"')
def step_use_example_file(context, filePath, fileInput):
context.param_name = fileInput
context.file_name = filePath.split('/')[-1]
if not hasattr(context, 'files'):
context.files = {}
# Ensure the file exists before opening
try:
example_file = open(filePath, 'rb')
context.files[context.param_name] = example_file
except FileNotFoundError:
raise FileNotFoundError(f"The example file '{filePath}' does not exist.")
@given('the pdf contains {page_count:d} pages')
def step_pdf_contains_pages(context, page_count):
writer = PdfWriter()
@@ -129,7 +148,7 @@ def step_send_api_request(context, endpoint):
for key, file in files.items():
mime_type, _ = mimetypes.guess_type(file.name)
mime_type = mime_type or 'application/octet-stream'
print("form_data " + file.name + " with " + mime_type)
print(f"form_data {file.name} with {mime_type}")
form_data.append((key, (file.name, file, mime_type)))
response = requests.post(url, files=form_data)
@@ -205,3 +224,31 @@ def step_save_response_file(context, filename):
with open(filename, 'wb') as f:
f.write(context.response.content)
print(f"Saved response content to {filename}")
@then('the response PDF should contain {page_count:d} pages')
def step_check_response_pdf_page_count(context, page_count):
response_file = io.BytesIO(context.response.content)
reader = PdfReader(io.BytesIO(response_file.getvalue()))
actual_page_count = len(reader.pages)
assert actual_page_count == page_count, f"Expected {page_count} pages but got {actual_page_count} pages"
@then('the response ZIP should contain {file_count:d} files')
def step_check_response_zip_file_count(context, file_count):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_file_count = len(zip_file.namelist())
assert actual_file_count == file_count, f"Expected {file_count} files but got {actual_file_count} files"
@then('the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages')
def step_check_response_zip_doc_page_count(context, doc_count, pages_per_doc):
response_file = io.BytesIO(context.response.content)
with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
actual_doc_count = len(zip_file.namelist())
assert actual_doc_count == doc_count, f"Expected {doc_count} documents but got {actual_doc_count} documents"
for file_name in zip_file.namelist():
with zip_file.open(file_name) as pdf_file:
reader = PdfReader(pdf_file)
actual_pages_per_doc = len(reader.pages)
assert actual_pages_per_doc == pages_per_doc, f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"