pdf: add fill-form enpoint (#73544)
This commit is contained in:
parent
91fa126653
commit
0d9e35cc2f
|
@ -0,0 +1,28 @@
|
|||
# Generated by Django 2.2.26 on 2023-02-01 17:19
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
import passerelle.apps.pdf.models
|
||||
import passerelle.utils.models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('pdf', '0001_initial'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='resource',
|
||||
name='fill_form_file',
|
||||
field=models.FileField(
|
||||
blank=True,
|
||||
help_text='PDF file, used if not input-form in fill-form payload',
|
||||
null=True,
|
||||
upload_to=passerelle.utils.models.resource_file_upload_to,
|
||||
validators=[passerelle.apps.pdf.models.validate_pdf],
|
||||
verbose_name='Fill Form default input file',
|
||||
),
|
||||
),
|
||||
]
|
|
@ -18,15 +18,19 @@ import base64
|
|||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import xml.etree.ElementTree as ET
|
||||
from collections import OrderedDict
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.db import models
|
||||
from django.http.response import HttpResponse
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
from passerelle.base.models import BaseResource
|
||||
from passerelle.utils.api import endpoint
|
||||
from passerelle.utils.jsonresponse import APIError
|
||||
from passerelle.utils.models import resource_file_upload_to
|
||||
|
||||
PDF_FILE_OBJECT = {
|
||||
'type': 'object',
|
||||
|
@ -76,10 +80,50 @@ ASSEMBLE_SCHEMA = {
|
|||
),
|
||||
}
|
||||
|
||||
FILL_FORM_SCHEMA = {
|
||||
'$schema': 'http://json-schema.org/draft-04/schema#',
|
||||
'title': '',
|
||||
'description': '',
|
||||
'type': 'object',
|
||||
'required': ['filename', 'fields'],
|
||||
'unflatten': True,
|
||||
'properties': OrderedDict(
|
||||
{
|
||||
'filename': {
|
||||
'description': _('output PDF filename'),
|
||||
'type': 'string',
|
||||
},
|
||||
'input-form': PDF_FILE_OBJECT,
|
||||
'fields': {
|
||||
'description': _('hierarchical dictionary of fields'),
|
||||
'type': 'object',
|
||||
},
|
||||
}
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def validate_pdf(fieldfile):
|
||||
fieldfile.open()
|
||||
if fieldfile.read(5) != b'%PDF-':
|
||||
raise ValidationError(
|
||||
_('%(value)s is not a PDF file'),
|
||||
params={'value': fieldfile},
|
||||
)
|
||||
|
||||
|
||||
class Resource(BaseResource):
|
||||
category = _('Misc')
|
||||
|
||||
fill_form_file = models.FileField(
|
||||
_('Fill Form default input file'),
|
||||
upload_to=resource_file_upload_to,
|
||||
help_text=_('PDF file, used if not input-form in fill-form payload'),
|
||||
validators=[validate_pdf],
|
||||
null=True,
|
||||
blank=True,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
verbose_name = _('PDF')
|
||||
|
||||
|
@ -137,3 +181,78 @@ class Resource(BaseResource):
|
|||
response = HttpResponse(pdf_content, content_type='application/pdf')
|
||||
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
|
||||
return response
|
||||
|
||||
@endpoint(
|
||||
name='fill-form',
|
||||
description=_('Fills the input PDF form with fields'),
|
||||
perm='can_access',
|
||||
methods=['post'],
|
||||
post={
|
||||
'request_body': {'schema': {'application/json': FILL_FORM_SCHEMA}},
|
||||
'input_example': {
|
||||
'filename': 'filled.pdf',
|
||||
'fields/Page1[0]/FirstName[0]': 'John',
|
||||
'fields/Page1[0]/LastName[0]': 'Doe',
|
||||
'fields/Page2[0]/Checkbox[0]': '0',
|
||||
'fields/Page2[0]/Checkbox[1]': '1',
|
||||
},
|
||||
},
|
||||
)
|
||||
def fill_form(self, request, post_data):
|
||||
filename = post_data.pop('filename')
|
||||
fields = post_data.pop('fields')
|
||||
|
||||
xfdf_root = ET.Element('xfdf')
|
||||
xfdf_root.attrib['xmlns'] = 'http://ns.adobe.com/xfdf/'
|
||||
xfdf_root.attrib['xml:space'] = 'preserve'
|
||||
xfdf_f = ET.SubElement(xfdf_root, 'f')
|
||||
xfdf_fields = ET.SubElement(xfdf_root, 'fields')
|
||||
|
||||
def add_fields(element, fields):
|
||||
if isinstance(fields, dict):
|
||||
for key in fields:
|
||||
field = ET.SubElement(element, 'field')
|
||||
field.attrib['name'] = key
|
||||
add_fields(field, fields[key])
|
||||
else:
|
||||
value = ET.SubElement(element, 'value')
|
||||
value.text = str(fields)
|
||||
|
||||
add_fields(xfdf_fields, fields)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix='passerelle-pdftk-%s-fill-form-' % self.id) as tmpdir:
|
||||
if isinstance(post_data.get('input-form'), dict) and post_data['input-form'].get('content'):
|
||||
input_filename = os.path.join(tmpdir, 'input-form.pdf')
|
||||
with open(input_filename, mode='wb') as fd:
|
||||
fd.write(base64.b64decode(post_data['input-form']['content']))
|
||||
elif self.fill_form_file:
|
||||
input_filename = self.fill_form_file.path
|
||||
else:
|
||||
raise APIError("missing or bad 'input-form' property", http_status=400)
|
||||
# create xfdf
|
||||
xfdf_filename = os.path.join(tmpdir, 'fields.xfdf')
|
||||
xfdf_f.attrib['href'] = input_filename
|
||||
with open(xfdf_filename, mode='wb') as fd:
|
||||
ET.indent(xfdf_root)
|
||||
ET.ElementTree(xfdf_root).write(fd, encoding='UTF-8', xml_declaration=True)
|
||||
|
||||
# call pdftk fill_form
|
||||
pdf_content = self.run_pdftk(args=[input_filename, 'fill_form', xfdf_filename])
|
||||
|
||||
response = HttpResponse(pdf_content, content_type='application/pdf')
|
||||
response['Content-Disposition'] = 'attachment; filename="%s"' % filename
|
||||
return response
|
||||
|
||||
def pdftk_dump_data_fields_utf8(self):
|
||||
if not self.fill_form_file:
|
||||
return
|
||||
try:
|
||||
dump = self.run_pdftk(args=[self.fill_form_file.path, 'dump_data_fields_utf8']).decode()
|
||||
except APIError as apierror:
|
||||
return 'Error: %r' % apierror
|
||||
unflatten_separated = ''
|
||||
for line in dump.splitlines():
|
||||
unflatten_separated += '<br>%s' % line
|
||||
if line.startswith('FieldName: '):
|
||||
unflatten_separated += ' → <b>fields/%s</b>' % line[11:].replace('.', '/')
|
||||
return unflatten_separated
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
{% extends "passerelle/manage/service_view.html" %}
|
||||
{% load i18n passerelle %}
|
||||
|
||||
{% block extra-tab-buttons %}
|
||||
{% if user.is_staff and object.fill_form_file %}
|
||||
<button role="tab" aria-selected="false" aria-controls="panel-dumpfields" id="tab-dumpfields"
|
||||
tabindex="-1">{% trans "Fill Form default PDF Fields" %}</button>
|
||||
{% endif %}
|
||||
{% endblock %}
|
||||
|
||||
{% block extra-tab-panels %}
|
||||
{% if user.is_staff and object.fill_form_file %}
|
||||
<div id="panel-dumpfields" role="tabpanel" tabindex="-1" aria-labelledby="tab-dumpfields" hidden>
|
||||
<div>
|
||||
<p>{% blocktrans with file=object.fill_form_file %}PDFtk {{ file }} dump_data_fields_utf8 output{% endblocktrans %}</p>
|
||||
<p>{{ object.pdftk_dump_data_fields_utf8|safe }}</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endblock %}
|
|
@ -17,28 +17,38 @@
|
|||
import base64
|
||||
import os
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as ET
|
||||
from io import BytesIO
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
from django.core.exceptions import ValidationError
|
||||
from django.core.files import File
|
||||
from django.urls import reverse
|
||||
from pdfrw import PdfReader
|
||||
|
||||
from passerelle.apps.pdf.models import Resource
|
||||
from tests.test_manager import login
|
||||
from tests.utils import generic_endpoint_url, setup_access_rights
|
||||
|
||||
with open(os.path.join(os.path.dirname(__file__), 'data', 'minimal.pdf'), 'rb') as fd:
|
||||
pdf_content = base64.b64encode(fd.read()).decode()
|
||||
pdf_content = fd.read()
|
||||
pdf_b64content = base64.b64encode(pdf_content).decode()
|
||||
with open(os.path.join(os.path.dirname(__file__), 'data', 'pdf-form.pdf'), 'rb') as fd:
|
||||
acroform_content = fd.read()
|
||||
acroform_b64content = base64.b64encode(acroform_content).decode()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def pdf(db):
|
||||
return setup_access_rights(Resource.objects.create(slug='test'))
|
||||
return setup_access_rights(Resource.objects.create(slug='test', title='test', description='test'))
|
||||
|
||||
|
||||
@mock.patch('subprocess.check_output')
|
||||
def test_pdf_assemble(mocked_check_output, app, pdf):
|
||||
endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug)
|
||||
|
||||
payload = {'filename': 'foo.pdf', 'files/0': {'content': pdf_content}}
|
||||
payload = {'filename': 'foo.pdf', 'files/0': {'content': pdf_b64content}}
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert resp.headers['content-type'] == 'application/pdf'
|
||||
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
|
||||
|
@ -55,9 +65,9 @@ def test_pdf_assemble(mocked_check_output, app, pdf):
|
|||
payload = {
|
||||
'filename': 'bar.pdf',
|
||||
'files/0': {'content': ''},
|
||||
'files/1': {'content': pdf_content},
|
||||
'files/1': {'content': pdf_b64content},
|
||||
'files/2': None,
|
||||
'files/3': pdf_content,
|
||||
'files/3': pdf_b64content,
|
||||
'files/4': '',
|
||||
}
|
||||
mocked_check_output.reset_mock()
|
||||
|
@ -72,7 +82,7 @@ def test_pdf_assemble(mocked_check_output, app, pdf):
|
|||
assert pdftk_call[2].endswith('/pdf-3.pdf') # file 2
|
||||
|
||||
# pdftk errors (faked)
|
||||
payload = {'filename': 'out.pdf', 'files/0': {'content': pdf_content}}
|
||||
payload = {'filename': 'out.pdf', 'files/0': {'content': pdf_b64content}}
|
||||
mocked_check_output.reset_mock()
|
||||
mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20)
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
|
@ -112,18 +122,172 @@ def test_pdf_assemble(mocked_check_output, app, pdf):
|
|||
resp = app.get(endpoint, status=405)
|
||||
|
||||
|
||||
def test_pdf_real_pdftk_call(app, pdf, settings):
|
||||
def test_pdf_real_pdftk_assemble(app, pdf, settings):
|
||||
if not os.path.exists(settings.PDFTK_PATH):
|
||||
pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH)
|
||||
|
||||
endpoint = generic_endpoint_url('pdf', 'assemble', slug=pdf.slug)
|
||||
payload = {
|
||||
'filename': 'twopages.pdf',
|
||||
'files/0': {'content': pdf_content},
|
||||
'files/1': {'content': pdf_content},
|
||||
'files/0': {'content': pdf_b64content},
|
||||
'files/1': {'content': pdf_b64content},
|
||||
}
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert resp.headers['content-type'] == 'application/pdf'
|
||||
assert resp.headers['content-disposition'] == 'attachment; filename="twopages.pdf"'
|
||||
assert resp.content[:5] == b'%PDF-'
|
||||
assert PdfReader(fdata=resp.content).numPages == 2
|
||||
|
||||
|
||||
@mock.patch('subprocess.check_output')
|
||||
def test_pdf_fill_form(mocked_check_output, app, pdf):
|
||||
endpoint = generic_endpoint_url('pdf', 'fill-form', slug=pdf.slug)
|
||||
|
||||
def check_xml(args, **kwargs):
|
||||
# check XML FDF file
|
||||
xfdf = ET.parse(args[3]).getroot()
|
||||
assert xfdf.tag == '{http://ns.adobe.com/xfdf/}xfdf'
|
||||
assert xfdf.find('{http://ns.adobe.com/xfdf/}f').attrib['href'].endswith('.pdf')
|
||||
field = xfdf.find('{http://ns.adobe.com/xfdf/}fields').find('{http://ns.adobe.com/xfdf/}field')
|
||||
assert field.attrib['name'] == 'fname'
|
||||
assert field.find('{http://ns.adobe.com/xfdf/}value').text == 'John'
|
||||
|
||||
payload = {
|
||||
'filename': 'foo.pdf',
|
||||
'fields/fname': 'John',
|
||||
'input-form': {'content': acroform_b64content},
|
||||
}
|
||||
mocked_check_output.side_effect = check_xml
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert resp.headers['content-type'] == 'application/pdf'
|
||||
assert resp.headers['content-disposition'] == 'attachment; filename="foo.pdf"'
|
||||
assert mocked_check_output.call_count == 1
|
||||
pdftk_call = mocked_check_output.call_args.args[0]
|
||||
assert len(pdftk_call) == 6
|
||||
assert pdftk_call[0] == '/usr/bin/pdftk'
|
||||
assert pdftk_call[1].endswith('/input-form.pdf')
|
||||
assert pdftk_call[2] == 'fill_form'
|
||||
assert pdftk_call[3].endswith('/fields.xfdf')
|
||||
assert pdftk_call[4] == 'output'
|
||||
assert pdftk_call[5] == '-'
|
||||
assert mocked_check_output.call_args.kwargs['timeout'] == 20
|
||||
|
||||
pdf.fill_form_file = File(BytesIO(acroform_content), 'default.pdf')
|
||||
pdf.save()
|
||||
payload = {
|
||||
'filename': 'bar.pdf',
|
||||
'fields/fname': 'John',
|
||||
}
|
||||
mocked_check_output.reset_mock()
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert resp.headers['content-type'] == 'application/pdf'
|
||||
assert resp.headers['content-disposition'] == 'attachment; filename="bar.pdf"'
|
||||
assert mocked_check_output.call_count == 1
|
||||
pdftk_call = mocked_check_output.call_args.args[0]
|
||||
assert len(pdftk_call) == 6
|
||||
assert pdftk_call[0] == '/usr/bin/pdftk'
|
||||
assert pdftk_call[1].endswith('media/pdf/test/default.pdf')
|
||||
assert pdftk_call[2] == 'fill_form'
|
||||
assert pdftk_call[3].endswith('/fields.xfdf')
|
||||
assert pdftk_call[4] == 'output'
|
||||
assert pdftk_call[5] == '-'
|
||||
assert mocked_check_output.call_args.kwargs['timeout'] == 20
|
||||
|
||||
# pdftk errors (faked)
|
||||
payload = {
|
||||
'filename': 'foo.pdf',
|
||||
'fields/fname': 'Bill',
|
||||
'input-form': {'content': acroform_b64content},
|
||||
}
|
||||
mocked_check_output.reset_mock()
|
||||
mocked_check_output.side_effect = subprocess.TimeoutExpired(cmd=[], timeout=20)
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert mocked_check_output.call_count == 1
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'].startswith('pdftk timed out after 20 seconds')
|
||||
|
||||
mocked_check_output.reset_mock()
|
||||
mocked_check_output.side_effect = subprocess.CalledProcessError(cmd=[], returncode=42, output='ooops')
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert mocked_check_output.call_count == 1
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'].startswith('pdftk returned non-zero exit status 42')
|
||||
assert 'ooops' in resp.json['err_desc']
|
||||
|
||||
# bad calls errors
|
||||
resp = app.post(endpoint, status=400)
|
||||
assert resp.headers['content-type'].startswith('application/json')
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'].startswith('could not decode body to json')
|
||||
|
||||
payload = {}
|
||||
resp = app.post_json(endpoint, params=payload, status=400)
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'] == "'filename' is a required property"
|
||||
|
||||
payload = {'filename': 'out.pdf'}
|
||||
resp = app.post_json(endpoint, params=payload, status=400)
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'] == "'fields' is a required property"
|
||||
|
||||
payload = {'filename': 'out.pdf', 'fields': 'not-a-dict'}
|
||||
resp = app.post_json(endpoint, params=payload, status=400)
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'] == "fields: 'not-a-dict' is not of type 'object'"
|
||||
|
||||
pdf.fill_form_file = None # no default PDF form
|
||||
pdf.save()
|
||||
payload = {
|
||||
'filename': 'bar.pdf',
|
||||
'fields/fname': 'Alice',
|
||||
}
|
||||
resp = app.post_json(endpoint, params=payload, status=400)
|
||||
assert resp.json['err'] == 1
|
||||
assert resp.json['err_desc'] == "missing or bad 'input-form' property"
|
||||
|
||||
resp = app.get(endpoint, status=405)
|
||||
|
||||
|
||||
def test_pdf_real_pdftk_fillform(admin_user, app, pdf, settings):
|
||||
if not os.path.exists(settings.PDFTK_PATH):
|
||||
pytest.skip('pdftk (%s) not found' % settings.PDFTK_PATH)
|
||||
|
||||
endpoint = generic_endpoint_url('pdf', 'fill-form', slug=pdf.slug)
|
||||
payload = {
|
||||
'filename': 'filled.pdf',
|
||||
'fields/fname': 'ThisIsMyFirstName',
|
||||
'input-form': {'content': acroform_b64content},
|
||||
}
|
||||
resp = app.post_json(endpoint, params=payload, status=200)
|
||||
assert resp.headers['content-type'] == 'application/pdf'
|
||||
assert resp.headers['content-disposition'] == 'attachment; filename="filled.pdf"'
|
||||
assert PdfReader(fdata=resp.content).numPages == 1
|
||||
assert resp.content[:5] == b'%PDF-'
|
||||
# TODO: found an easy way to verify 'ThisIsMyFirstName' in resp.content
|
||||
|
||||
# dump fields in manager view
|
||||
pdf.fill_form_file = File(BytesIO(acroform_content), 'pdf-form.pdf')
|
||||
pdf.save()
|
||||
manage_url = reverse('view-connector', kwargs={'connector': 'pdf', 'slug': pdf.slug})
|
||||
resp = app.get(manage_url)
|
||||
assert 'panel-dumpfields' not in resp.text
|
||||
assert '<b>fields/fname</b>' not in resp.text
|
||||
app = login(app)
|
||||
resp = app.get(manage_url)
|
||||
assert 'panel-dumpfields' in resp.text
|
||||
assert '<b>fields/fname</b>' in resp.text
|
||||
|
||||
|
||||
def test_pdf_validator(pdf):
|
||||
pdf.fill_form_file = File(BytesIO(pdf_content), 'default.pdf')
|
||||
pdf.save()
|
||||
pdf.full_clean()
|
||||
|
||||
pdf.fill_form_file = File(BytesIO(acroform_content), 'default.pdf')
|
||||
pdf.save()
|
||||
pdf.full_clean()
|
||||
|
||||
pdf.fill_form_file = File(BytesIO(b'not a pdf'), 'test.txt')
|
||||
pdf.save()
|
||||
with pytest.raises(ValidationError):
|
||||
pdf.full_clean()
|
||||
|
|
Loading…
Reference in New Issue