# passerelle - uniform access to multiple data sources and services # Copyright (C) 2016 Entr'ouvert # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . import base64 import binascii import functools import re from contextlib import contextmanager from io import BytesIO from urllib import error as urllib2 import httplib2 from cmislib import CmisClient from cmislib.exceptions import ( CmisException, InvalidArgumentException, ObjectNotFoundException, PermissionDeniedException, UpdateConflictException, ) from django.db import models from django.http import HttpResponse from django.utils.functional import cached_property from django.utils.http import urlencode from django.utils.translation import gettext_lazy as _ from passerelle.base.models import BaseResource from passerelle.utils.api import endpoint from passerelle.utils.jsonresponse import APIError from passerelle.utils.logging import ignore_loggers SPECIAL_CHARS = '!#$%&+-^_`~;[]{}+=~' FILE_PATH_PATTERN = r'^(/|(/[\w%s]+)+)$' % re.escape(SPECIAL_CHARS) FILE_NAME_PATTERN = r'[\w%s\.]+$' % re.escape(SPECIAL_CHARS) UPLOAD_SCHEMA = { 'type': 'object', 'title': _('CMIS file upload'), 'properties': { 'file': { 'title': _('File object'), 'type': 'object', 'properties': { 'filename': { 'type': 'string', 'description': _('Filename'), 'pattern': FILE_NAME_PATTERN, 'pattern_description': _('Numbers, letters and special caracters "%s" are allowed.') % SPECIAL_CHARS, }, 'content': { 'type': 'string', 'description': _('Content'), }, 'content_type': { 'type': 'string', 'description': _('Content type'), }, }, 'required': ['content'], }, 'filename': { 'type': 'string', 'description': _('Filename (takes precendence over filename in "file" object)'), 'pattern': FILE_NAME_PATTERN, 'pattern_description': _('Numbers, letters and special caracters "%s" are allowed.') % SPECIAL_CHARS, }, 'path': { 'type': 'string', 'description': _('File path'), 'pattern': FILE_PATH_PATTERN, 'pattern_description': _('Must include leading but not trailing slash.'), }, 'object_type': { 'type': 'string', 'description': _('CMIS object type'), }, 'properties': { 'type': 'object', 'title': _('CMIS properties (dictionary with string keys)'), 'additionalProperties': {'type': 'string'}, }, }, 'required': ['file', 'path'], 'unflatten': True, } class CmisConnector(BaseResource): cmis_endpoint = models.URLField( max_length=400, verbose_name=_('CMIS Atom endpoint'), help_text=_('URL of the CMIS Atom endpoint') ) username = models.CharField(max_length=128, verbose_name=_('Service username')) password = models.CharField(max_length=128, verbose_name=_('Service password')) category = _('File Storage') class Meta: verbose_name = _('CMIS connector') def check_status(self): with self.get_cmis_gateway() as cmis_gateway: cmis_gateway.repo # pylint: disable=pointless-statement @endpoint( description=_('File upload'), perm='can_access', post={ 'request_body': { 'schema': { 'application/json': UPLOAD_SCHEMA, } } }, ) def uploadfile(self, request, post_data): error, error_msg, data = self._validate_inputs(post_data) if error: self.logger.debug("received invalid data: %s" % error_msg) raise APIError(error_msg, http_status=400) filename = data.get('filename') or data['file']['filename'] self.logger.info("received file_name: '%s', file_path: '%s'", filename, data["path"]) with self.get_cmis_gateway() as cmis_gateway: doc = cmis_gateway.create_doc( filename, data['path'], data['file_byte_content'], content_type=data['file'].get('content_type'), object_type=data.get('object_type'), properties=data.get('properties'), ) return {'data': {'properties': doc.properties}} @contextmanager def get_cmis_gateway(self): with ignore_loggers('cmislib', 'cmislib.atompub.binding'): import cmislib.atompub.binding as atompub_binding old_Rest = atompub_binding.Rest atompub_binding.Rest = lambda: RESTService(self) try: yield CMISGateway(self.cmis_endpoint, self.username, self.password, self.logger) finally: atompub_binding.Rest = old_Rest def _validate_inputs(self, data): """process dict return a tuple (error, error_msg, data) """ file_ = data['file'] if 'filename' not in file_ and 'filename' not in data: return True, '"filename" or "file[\'filename\']" is required', None try: data['file_byte_content'] = base64.b64decode(file_['content']) except (TypeError, binascii.Error): return True, '"file[\'content\']" must be a valid base64 string', None return False, '', data @endpoint( description=_('Get file'), perm='can_access', parameters={ 'object_id': { 'description': _('Object ID of file (can also be a path)'), } }, ) def getfile(self, request, object_id): with self.get_cmis_gateway() as cmis_gateway: if '/' in object_id: doc = cmis_gateway.get_object_by_path(object_id) else: doc = cmis_gateway.get_object(object_id) try: mime_type = doc.properties['cmis:contentStreamMimeType'] except KeyError: mime_type = 'application/octet-stream' bytes_io = doc.getContentStream() return HttpResponse(bytes_io, content_type=mime_type) @endpoint( description=_('Get file metadata'), perm='can_access', parameters={ 'object_id': { 'description': _('Object ID of file (can also be a path)'), } }, ) def getmetadata(self, request, object_id): with self.get_cmis_gateway() as cmis_gateway: if '/' in object_id: doc = cmis_gateway.get_object_by_path(object_id) else: doc = cmis_gateway.get_object(object_id) metadata = {} for key, value in doc.properties.items(): sub_metadata = metadata for subkey in key.split(':')[:-1]: if subkey not in sub_metadata: sub_metadata[subkey] = {} sub_metadata = sub_metadata[subkey] sub_metadata[key.split(':')[-1]] = value return {'data': metadata} def wrap_cmis_error(f): @functools.wraps(f) def wrapper(*args, **kwargs): try: return f(*args, **kwargs) except (urllib2.URLError, httplib2.HttpLib2Error) as e: # FIXME urllib2 still used for cmslib 0.5 compat raise APIError("connection error: %s" % e) except PermissionDeniedException as e: raise APIError("permission denied: %s" % e) except UpdateConflictException as e: raise APIError("update conflict: %s" % e) except InvalidArgumentException as e: raise APIError("invalid property name: %s" % e) except CmisException as e: raise APIError("cmis binding error: %s" % e) return wrapper class CMISGateway: def __init__(self, cmis_endpoint, username, password, logger): self._cmis_client = CmisClient(cmis_endpoint, username, password) self._logger = logger @cached_property def repo(self): return self._cmis_client.defaultRepository def _get_or_create_folder(self, file_path): try: self._logger.debug("searching '%s'" % file_path) res = self.repo.getObjectByPath(file_path) self._logger.debug("'%s' found" % file_path) return res except ObjectNotFoundException: self._logger.debug("'%s' not found" % file_path) basepath = "" folder = self.repo.rootFolder for path_part in file_path.strip('/').split('/'): basepath += '/%s' % path_part try: self._logger.debug("searching '%s'" % basepath) folder = self.repo.getObjectByPath(basepath) self._logger.debug("'%s' found" % basepath) except ObjectNotFoundException: self._logger.debug("'%s' not found" % basepath) folder = folder.createFolder(path_part) self._logger.debug("create folder '%s'" % basepath) return folder @wrap_cmis_error def create_doc( self, file_name, file_path, file_byte_content, content_type=None, object_type=None, properties=None ): folder = self._get_or_create_folder(file_path) properties = properties or {} if object_type: properties['cmis:objectTypeId'] = object_type return folder.createDocument( file_name, contentFile=BytesIO(file_byte_content), contentType=content_type, properties=properties ) @wrap_cmis_error def get_object_by_path(self, file_path): return self.repo.getObjectByPath(file_path) @wrap_cmis_error def get_object(self, object_id): return self.repo.getObject(object_id) # Mock API from cmilib.net.RESTService class RESTService: def __init__(self, resource): self.resource = resource def request(self, method, url, username, password, body=None, content_type=None, **kwargs): if username or password: auth = (username, password) else: auth = None headers = kwargs.pop('headers', {}) if kwargs: url = url + ('&' if '?' in url else '?') + urlencode(kwargs) if content_type: headers['Content-Type'] = content_type response = self.resource.requests.request( method=method, url=url, auth=auth, headers=headers, data=body ) return {'status': str(response.status_code)}, response.content def get(self, url, username=None, password=None, **kwargs): return self.request('GET', url, username, password, **kwargs) def delete(self, url, username=None, password=None, **kwargs): return self.request('DELETE', url, username, password, **kwargs) def put(self, url, payload, contentType, username=None, password=None, **kwargs): return self.request('PUT', url, username, password, body=payload, content_type=contentType, **kwargs) def post(self, url, payload, contentType, username=None, password=None, **kwargs): return self.request('POST', url, username, password, body=payload, content_type=contentType, **kwargs)