changes

2024-05-27 16:31:00 +01:00
parent b93bff5cad
commit 6ffa80c386
21 changed files with 526 additions and 147 deletions
--- a/cucumber/features/examples.feature
+++ b/cucumber/features/examples.feature
@@ -1,5 +1,7 @@
+@example
 Feature: API Validation

+  @positive @password
  Scenario: Remove password 
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
@@ -12,7 +14,8 @@ Feature: API Validation
    And the response file should have size greater than 0
    And the response PDF is not passworded
 	And the response status code should be 200
-	
+
+  @negative @password
  Scenario: Remove password wrong password
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
@@ -24,6 +27,7 @@ Feature: API Validation
    Then the response status code should be 500
    And the response should contain error message "Internal Server Error"

+  @positive @info
  Scenario: Get info
    Given I generate a PDF file as "fileInput"
    When I send the API request to the endpoint "/api/v1/security/get-info-on-pdf"
@@ -31,6 +35,7 @@ Feature: API Validation
    And the response file should have size greater than 100
 	And the response status code should be 200

+  @positive @password
  Scenario: Add password
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
@@ -43,6 +48,7 @@ Feature: API Validation
    And the response PDF is passworded
 	And the response status code should be 200
 	
+  @positive @password
  Scenario: Add password with other params 
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
@@ -59,7 +65,7 @@ Feature: API Validation
    And the response PDF is passworded
 	And the response status code should be 200
 	
-	
+  @positive @watermark
  Scenario: Add watermark
    Given I generate a PDF file as "fileInput"
    And the pdf contains 3 pages
@@ -76,18 +82,8 @@ Feature: API Validation
    Then the response content type should be "application/pdf"
    And the response file should have size greater than 100
 	And the response status code should be 200
-	
-
-
-  Scenario: Repair PDF
-    Given I generate a PDF file as "fileInput"
-    When I send the API request to the endpoint "/api/v1/misc/repair"
-    Then the response content type should be "application/pdf"
-    And the response file should have size greater than 0
-	And the response status code should be 200
-	
-

+  @positive
  Scenario: Remove blank pages
    Given I generate a PDF file as "fileInput"
 	And the pdf contains 3 blank pages
@@ -100,80 +96,8 @@ Feature: API Validation
    And the response file should have size greater than 0
    And the response PDF should contain 0 pages
 	And the response status code should be 200
-	
-  @ocr
-  Scenario: Process PDF with OCR
-    Given I generate a PDF file as "fileInput"
-    And the request data includes
-      | parameter        | value       |
-      | languages        | eng         |
-      | sidecar          | false        |
-      | deskew           | true        |
-      | clean            | true        |
-      | cleanFinal       | true        |
-      | ocrType          | Normal      |
-      | ocrRenderType    | hocr        |
-      | removeImagesAfter| false       |
-    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
-    Then the response content type should be "application/pdf"
-    And the response file should have size greater than 0
-	And the response status code should be 200

-  @ocr
-  Scenario: Process PDF with text and OCR with type normal 
-    Given I generate a PDF file as "fileInput"
-    And the pdf contains 3 pages with random text
-    And the request data includes
-      | parameter        | value       |
-      | languages        | eng         |
-      | sidecar          | false        |
-      | deskew           | true        |
-      | clean            | true        |
-      | cleanFinal       | true        |
-      | ocrType          | Normal      |
-      | ocrRenderType    | hocr        |
-      | removeImagesAfter| false       |
-    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
-	Then the response status code should be 500
-	
-  @ocr
-  Scenario: Process PDF with OCR
-    Given I generate a PDF file as "fileInput"
-    And the request data includes
-      | parameter        | value       |
-      | languages        | eng         |
-      | sidecar          | false        |
-      | deskew           | true        |
-      | clean            | true        |
-      | cleanFinal       | true        |
-      | ocrType          | Force      |
-      | ocrRenderType    | hocr        |
-      | removeImagesAfter| false       |
-    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
-    Then the response content type should be "application/pdf"
-    And the response file should have size greater than 0
-	And the response status code should be 200
-	
-  @ocr
-  Scenario: Process PDF with OCR with sidecar
-    Given I generate a PDF file as "fileInput"
-    And the request data includes
-      | parameter        | value       |
-      | languages        | eng         |
-      | sidecar          | true        |
-      | deskew           | true        |
-      | clean            | true        |
-      | cleanFinal       | true        |
-      | ocrType          | Force      |
-      | ocrRenderType    | hocr        |
-      | removeImagesAfter| false       |
-    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
-    Then the response content type should be "application/octet-stream"
-	And the response file should have extension ".zip"
-    And the response file should have size greater than 0
-	And the response status code should be 200
-
-	
+  @positive @flatten
  Scenario: Flatten PDF
    Given I generate a PDF file as "fileInput"
    And the request data includes
@@ -184,6 +108,7 @@ Feature: API Validation
    And the response file should have size greater than 0
 	And the response status code should be 200
 	
+  @positive @metadata
  Scenario: Update metadata
    Given I generate a PDF file as "fileInput"
    And the request data includes
@@ -202,41 +127,4 @@ Feature: API Validation
 	And the response PDF metadata should include "Title" as "Sample Title"
 	And the response status code should be 200

-  @libre
-  Scenario: Convert PDF to DOCX
-    Given I generate a PDF file as "fileInput"
-    And the pdf contains 3 pages with random text
-	And the request data includes
-      | parameter        | value      |
-      | outputFormat     | docx       |
-    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
-	Then the response status code should be 200
-    And the response file should have size greater than 100
-    And the response file should have extension ".docx"
-#    And the response DOCX should contain 3 pages
-
-  @libre
-  Scenario: Convert PDF to ODT
-    Given I generate a PDF file as "fileInput"
-    And the pdf contains 3 pages with random text
-	And the request data includes
-      | parameter        | value     |
-      | outputFormat     | odt       |
-    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
-	Then the response status code should be 200
-    And the response file should have size greater than 100
-    And the response file should have extension ".odt"
-#   And the response ODT should contain 3 pages
-
-  @libre
-  Scenario: Convert PDF to DOC
-    Given I generate a PDF file as "fileInput"
-    And the pdf contains 3 pages with random text
-	And the request data includes
-      | parameter        | value     |
-      | outputFormat     | doc       |
-    When I send the API request to the endpoint "/api/v1/convert/pdf/word"
-	Then the response status code should be 200
-    And the response file should have extension ".doc"
-    And the response file should have size greater than 100
-#    And the response DOC should contain 3 pages
+  
--- a/cucumber/features/external.feature
+++ b/cucumber/features/external.feature
@@ -0,0 +1,184 @@
+Feature: API Validation
+
+
+  @libre @positive
+  Scenario: Repair PDF
+    Given I generate a PDF file as "fileInput"
+    When I send the API request to the endpoint "/api/v1/misc/repair"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+
+  @ocr @positive
+  Scenario: Process PDF with OCR
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Normal      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+
+  @ocr @positive
+  Scenario: Process PDF with text and OCR with type normal 
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Normal      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+	Then the response status code should be 500
+	
+  @ocr @positive
+  Scenario: Process PDF with OCR
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | false        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Force      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+	
+  @ocr @positive
+  Scenario: Process PDF with OCR with sidecar
+    Given I generate a PDF file as "fileInput"
+    And the request data includes
+      | parameter        | value       |
+      | languages        | eng         |
+      | sidecar          | true        |
+      | deskew           | true        |
+      | clean            | true        |
+      | cleanFinal       | true        |
+      | ocrType          | Force      |
+      | ocrRenderType    | hocr        |
+      | removeImagesAfter| false       |
+    When I send the API request to the endpoint "/api/v1/misc/ocr-pdf"
+    Then the response content type should be "application/octet-stream"
+	And the response file should have extension ".zip"
+    And the response file should have size greater than 0
+	And the response status code should be 200
+
+
+  @libre @positive
+  Scenario Outline: Convert PDF to various word formats
+  Given I generate a PDF file as "fileInput"
+  And the pdf contains 3 pages with random text
+  And the request data includes
+    | parameter    | value       |
+    | outputFormat | <format>    |
+  When I send the API request to the endpoint "/api/v1/convert/pdf/word"
+  Then the response status code should be 200
+  And the response file should have size greater than 100
+  And the response file should have extension "<extension>"
+
+  Examples:
+    | format | extension |
+    | docx   | .docx     |
+    | odt    | .odt      |
+    | doc    | .doc      |
+
+
+
+  @compress @ghostscript @positive
+  Scenario: Compress
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | optimizeLevel     | 4       |
+    When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
+	Then the response status code should be 200
+    And the response file should have extension ".pdf"
+    And the response file should have size greater than 100
+	
+  @compress @ghostscript @positive
+  Scenario: Compress
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | optimizeLevel     | 1       |
+	  | expectedOutputSize | 5KB |
+    When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
+	Then the response status code should be 200
+    And the response file should have extension ".pdf"
+    And the response file should have size greater than 100
+	
+	
+  @compress @ghostscript @positive
+  Scenario: Compress
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 3 pages with random text
+	And the request data includes
+      | parameter        | value     |
+      | optimizeLevel     | 1       |
+	  | expectedOutputSize | 5KB |
+    When I send the API request to the endpoint "/api/v1/misc/compress-pdf"
+	Then the response status code should be 200
+    And the response file should have extension ".pdf"
+    And the response file should have size greater than 100	
+	
+  @libre @positive
+  Scenario Outline: Convert PDF to various types
+  Given I generate a PDF file as "fileInput"
+  And the pdf contains 3 pages with random text
+  And the request data includes
+    | parameter    | value       |
+    | outputFormat | <format>    |
+  When I send the API request to the endpoint "/api/v1/convert/pdf/<type>"
+  Then the response status code should be 200
+  And the response file should have size greater than 100
+  And the response file should have extension "<extension>"
+
+  Examples:
+   | type | format | extension |
+   |  text   | rtf   | .rtf     |
+   |  text   | txt    | .txt      |
+   |  presentation   | ppt   | .ppt     |
+   |  presentation   | pptx    | .pptx      |
+   |  presentation   | odp   | .odp     |
+   |  html   | html    | .zip      |
+
+	
+  @libre @positive @topdf
+  Scenario Outline: Convert PDF to various types
+  Given I use an example file at "exampleFiles/example<extension>" as parameter "fileInput"
+  When I send the API request to the endpoint "/api/v1/convert/file/pdf"
+  Then the response status code should be 200
+  And the response file should have size greater than 100
+  And the response file should have extension ".pdf"
+
+  Examples:
+   | extension | 
+   |   .docx  |
+   |  .odp   |
+   |  .odt   | 
+   |  .pptx   | 
+   |  .rtf   | 
+
+
+		
--- a/cucumber/features/general.feature
+++ b/cucumber/features/general.feature
@@ -0,0 +1,95 @@
+@general
+Feature: API Validation
+
+  @split-pdf-by-sections
+  Scenario Outline: split-pdf-by-sections with different parameters
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 2 pages
+    And the request data includes
+      | parameter           | value       |
+      | horizontalDivisions | <horizontalDivisions> |
+      | verticalDivisions   | <verticalDivisions> |
+      | merge               | true |
+    When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 200
+    And the response status code should be 200
+    And the response PDF should contain <page_count> pages
+
+  Examples:
+    | horizontalDivisions | verticalDivisions | page_count |
+    | 0                   | 1                 | 4          |
+    | 1                   | 1                 | 8          |
+    | 1                   | 2                 | 12          |
+    | 2                   | 2                 | 18          |
+
+  @split-pdf-by-sections
+  Scenario Outline: split-pdf-by-sections with different parameters
+    Given I generate a PDF file as "fileInput"
+    And the pdf contains 2 pages
+    And the request data includes
+      | parameter           | value       |
+      | horizontalDivisions | <horizontalDivisions> |
+      | verticalDivisions   | <verticalDivisions> |
+      | merge               | true |
+    When I send the API request to the endpoint "/api/v1/general/split-pdf-by-sections"
+    Then the response content type should be "application/pdf"
+    And the response file should have size greater than 200
+    And the response status code should be 200
+    And the response PDF should contain <page_count> pages
+
+  Examples:
+    | horizontalDivisions | verticalDivisions | page_count |
+    | 0                   | 1                 | 4          |
+    | 1                   | 1                 | 8          |
+    | 1                   | 2                 | 12          |
+    | 2                   | 2                 | 18          |
+
+
+
+  @split-pdf-by-pages
+  Scenario Outline: split-pdf-by-pages with different parameters
+  Given I generate a PDF file as "fileInput"
+  And the pdf contains 20 pages
+  And the request data includes
+    | parameter     | value         |
+    | fileInput     | fileInput     |
+    | pageNumbers   | <pageNumbers> |
+  When I send the API request to the endpoint "/api/v1/general/split-pages"
+  Then the response content type should be "application/octet-stream"
+  And the response status code should be 200
+  And the response file should have size greater than 200
+  And the response ZIP should contain <file_count> files
+
+  Examples:
+    | pageNumbers | file_count |
+    | 1,3,5-9     | 8          |
+    | all         | 20         |
+    | 2n+1        | 11         |
+    | 3n          | 7          |
+
+
+
+  @split-pdf-by-size-or-count
+  Scenario Outline: split-pdf-by-size-or-count with different parameters
+  Given I generate a PDF file as "fileInput"
+  And the pdf contains 20 pages
+  And the request data includes
+    | parameter  | value          |
+    | fileInput  | fileInput      |
+    | splitType  | <splitType>    |
+    | splitValue | <splitValue>   |
+  When I send the API request to the endpoint "/api/v1/general/split-by-size-or-count"
+  Then the response content type should be "application/octet-stream"
+  And the response status code should be 200
+  And the response file should have size greater than 200
+  And the response ZIP file should contain <doc_count> documents each having <pages_per_doc> pages
+
+  Examples:
+    | splitType | splitValue | doc_count | pages_per_doc |
+    | 1         | 5          | 4         | 5             |
+    | 2         | 2          | 2         | 10            |
+    | 2         | 4          | 4         | 5             |
+    | 1         | 10         | 2         | 10            |
+
+
--- a/cucumber/features/steps/step_definitions.py
+++ b/cucumber/features/steps/step_definitions.py
@@ -9,6 +9,8 @@ from reportlab.lib.pagesizes import letter
 from reportlab.pdfgen import canvas
 import mimetypes
 import requests
+import zipfile
+import shutil

 #########
 # GIVEN #
@@ -26,6 +28,23 @@ def step_generate_pdf(context, fileInput):
        context.files = {}
    context.files[context.param_name] = open(context.file_name, 'rb')

+
+@given('I use an example file at "{filePath}" as parameter "{fileInput}"')
+def step_use_example_file(context, filePath, fileInput):
+    context.param_name = fileInput
+    context.file_name = filePath.split('/')[-1]
+    if not hasattr(context, 'files'):
+        context.files = {}
+    
+    # Ensure the file exists before opening
+    try:
+        example_file = open(filePath, 'rb')
+        context.files[context.param_name] = example_file
+    except FileNotFoundError:
+        raise FileNotFoundError(f"The example file '{filePath}' does not exist.")
+
+        
+
@given('the pdf contains {page_count:d} pages')
 def step_pdf_contains_pages(context, page_count):
    writer = PdfWriter()
@@ -129,7 +148,7 @@ def step_send_api_request(context, endpoint):
    for key, file in files.items():
        mime_type, _ = mimetypes.guess_type(file.name)
        mime_type = mime_type or 'application/octet-stream'
-        print("form_data " + file.name + " with " + mime_type)
+        print(f"form_data {file.name} with {mime_type}")
        form_data.append((key, (file.name, file, mime_type)))

    response = requests.post(url, files=form_data)
@@ -205,3 +224,31 @@ def step_save_response_file(context, filename):
    with open(filename, 'wb') as f:
        f.write(context.response.content)
    print(f"Saved response content to {filename}")
+
+
+@then('the response PDF should contain {page_count:d} pages')
+def step_check_response_pdf_page_count(context, page_count):
+    response_file = io.BytesIO(context.response.content)
+    reader = PdfReader(io.BytesIO(response_file.getvalue()))
+    actual_page_count = len(reader.pages)
+    assert actual_page_count == page_count, f"Expected {page_count} pages but got {actual_page_count} pages"
+
+@then('the response ZIP should contain {file_count:d} files')
+def step_check_response_zip_file_count(context, file_count):
+    response_file = io.BytesIO(context.response.content)
+    with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
+      actual_file_count = len(zip_file.namelist())
+    assert actual_file_count == file_count, f"Expected {file_count} files but got {actual_file_count} files"
+
+@then('the response ZIP file should contain {doc_count:d} documents each having {pages_per_doc:d} pages')
+def step_check_response_zip_doc_page_count(context, doc_count, pages_per_doc):
+    response_file = io.BytesIO(context.response.content)
+    with zipfile.ZipFile(io.BytesIO(response_file.getvalue())) as zip_file:
+        actual_doc_count = len(zip_file.namelist())
+        assert actual_doc_count == doc_count, f"Expected {doc_count} documents but got {actual_doc_count} documents"
+        
+        for file_name in zip_file.namelist():
+            with zip_file.open(file_name) as pdf_file:
+                reader = PdfReader(pdf_file)
+                actual_pages_per_doc = len(reader.pages)
+                assert actual_pages_per_doc == pages_per_doc, f"Expected {pages_per_doc} pages per document but got {actual_pages_per_doc} pages in document {file_name}"