wcs/wcs/statistics/views.py

705 lines
29 KiB
Python

# w.c.s. - web application for online forms
# Copyright (C) 2005-2021 Entr'ouvert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
import collections
import time
from django.http import HttpResponseBadRequest, HttpResponseForbidden, JsonResponse
from django.urls import reverse
from django.views.generic import View
from wcs import sql
from wcs.api_utils import is_url_signed
from wcs.backoffice.data_management import CardPage
from wcs.backoffice.management import FormPage
from wcs.carddef import CardDef
from wcs.categories import Category
from wcs.formdata import FormData
from wcs.formdef import FormDef
from wcs.qommon import _, misc, pgettext_lazy
from wcs.qommon.storage import Contains, Equal, GreaterOrEqual, Less, Null, Or, StrictNotEqual
class RestrictedView(View):
def dispatch(self, *args, **kwargs):
if not is_url_signed():
return HttpResponseForbidden()
return super().dispatch(*args, **kwargs)
class IndexView(RestrictedView):
def get(self, request, *args, **kwargs):
categories = Category.select()
categories.sort(key=lambda x: misc.simplify(x.name))
category_options = [{'id': '_all', 'label': pgettext_lazy('categories', 'All')}] + [
{'id': x.url_name, 'label': x.name} for x in categories
]
channel_options = [{'id': '_all', 'label': pgettext_lazy('channel', 'All')}] + [
{'id': key, 'label': label} for key, label in FormData.get_submission_channels().items()
]
return JsonResponse(
{
'data': [
{
'name': _('Forms Count'),
'url': request.build_absolute_uri(reverse('api-statistics-forms-count')),
'id': 'forms_counts',
'filters': [
{
'id': 'time_interval',
'label': _('Interval'),
'options': [
{
'id': 'month',
'label': _('Month'),
},
{
'id': 'year',
'label': _('Year'),
},
{
'id': 'weekday',
'label': _('Week day'),
},
{
'id': 'hour',
'label': _('Hour'),
},
{
'id': 'none',
'label': _('None'),
},
],
'required': True,
'default': 'month',
},
{
'id': 'channel',
'label': _('Channel'),
'options': channel_options,
'required': True,
'default': '_all',
},
{
'id': 'category',
'label': _('Category'),
'options': category_options,
'required': True,
'default': '_all',
'deprecated': True,
'deprecation_hint': _(
'Category should now be selected using the Form field below.'
),
},
{
'id': 'form',
'label': _('Form'),
'options': self.get_form_options(FormDef),
'required': True,
'default': '_all',
'has_subfilters': True,
},
],
},
{
'name': _('Cards Count'),
'url': request.build_absolute_uri(reverse('api-statistics-cards-count')),
'id': 'cards_counts',
'filters': [
{
'id': 'time_interval',
'label': _('Interval'),
'options': [
{
'id': 'month',
'label': _('Month'),
},
{
'id': 'year',
'label': _('Year'),
},
{
'id': 'weekday',
'label': _('Week day'),
},
{
'id': 'hour',
'label': _('Hour'),
},
{
'id': 'none',
'label': _('None'),
},
],
'required': True,
'default': 'month',
},
{
'id': 'form',
'label': _('Card'),
'options': self.get_form_options(CardDef, include_all_option=False),
'required': True,
'has_subfilters': True,
},
],
},
{
'name': _('Time between two statuses (forms)'),
'url': request.build_absolute_uri(reverse('api-statistics-resolution-time')),
'id': 'resolution_time',
'data_type': 'seconds',
'filters': [
{
'id': 'form',
'label': _('Form'),
'options': self.get_form_options(FormDef, include_all_option=False),
'required': True,
'has_subfilters': True,
},
],
},
{
'name': _('Time between two statuses (cards)'),
'url': request.build_absolute_uri(reverse('api-statistics-resolution-time-cards')),
'id': 'resolution_time_cards',
'data_type': 'seconds',
'filters': [
{
'id': 'form',
'label': _('Card'),
'options': self.get_form_options(CardDef, include_all_option=False),
'required': True,
'has_subfilters': True,
},
],
},
]
}
)
@staticmethod
def get_form_options(formdef_class, include_all_option=True):
all_forms_option = [{'id': '_all', 'label': _('All Forms')}]
forms = formdef_class.select(lightweight=True)
forms.sort(key=lambda x: misc.simplify(x.name))
forms_with_category = [x for x in forms if x.category]
if not forms_with_category:
form_options = [{'id': x.url_name, 'label': x.name} for x in forms]
return all_forms_option + form_options if include_all_option else form_options
form_options = collections.defaultdict(list)
for x in forms_with_category:
if x.category.name not in form_options and include_all_option:
form_options[x.category.name] = [
{
'id': 'category:' + x.category.url_name,
'label': _('All forms of category %s') % x.category.name,
}
]
form_options[x.category.name].append({'id': x.url_name, 'label': x.name})
form_options = sorted(
((category, forms) for category, forms in form_options.items()), key=lambda x: misc.simplify(x[0])
)
forms_without_category_options = [
{'id': x.url_name, 'label': x.name} for x in forms if not x.category
]
if forms_without_category_options:
form_options.append((_('Misc'), forms_without_category_options))
if include_all_option:
form_options = [(None, all_forms_option)] + form_options
return form_options
class FormsCountView(RestrictedView):
formdef_class = FormDef
formpage_class = FormPage
has_global_count_support = True
label = _('Forms Count')
def get(self, request, *args, **kwargs):
time_interval = request.GET.get('time_interval', 'month')
totals_kwargs = {
'period_start': request.GET.get('start'),
'period_end': request.GET.get('end'),
'criterias': [],
}
category_slug = request.GET.get('category', '_all')
formdef_slug = request.GET.get('form', '_all' if self.has_global_count_support else '_nothing')
group_by = request.GET.get('group-by')
group_labels = {}
subfilters = []
if formdef_slug != '_all' and not formdef_slug.startswith('category:'):
try:
formdef = self.formdef_class.get_by_urlname(formdef_slug, ignore_migration=True)
except KeyError:
return HttpResponseBadRequest('invalid form')
form_page = self.formpage_class(formdef=formdef, update_breadcrumbs=False)
self.set_formdef_parameters(totals_kwargs, formdef)
totals_kwargs['criterias'].extend(self.get_filters_criterias(formdef, form_page))
self.set_group_by_parameters(group_by, formdef, form_page, totals_kwargs, group_labels)
subfilters = self.get_subfilters(form_page, group_by)
else:
totals_kwargs['criterias'].append(StrictNotEqual('status', 'draft'))
if formdef_slug.startswith('category:'):
category_slug = formdef_slug.split(':', 1)[1]
if category_slug != '_all':
try:
category = Category.get_by_urlname(category_slug)
except KeyError:
if category_slug.isdigit(): # legacy
totals_kwargs['criterias'].append(Equal('category_id', category_slug))
else:
return HttpResponseBadRequest('invalid category')
else:
totals_kwargs['criterias'].append(Equal('category_id', category.id))
channel = request.GET.get('channel', '_all')
if channel == 'web':
totals_kwargs['criterias'].append(
Or(
[
Equal('submission_channel', 'web'),
Equal('submission_channel', ''),
Null('submission_channel'),
]
)
)
elif channel != '_all':
totals_kwargs['criterias'].append(Equal('submission_channel', channel))
time_interval_methods = {
'month': sql.get_monthly_totals,
'year': sql.get_yearly_totals,
'weekday': sql.get_weekday_totals,
'hour': sql.get_hour_totals,
'none': sql.get_global_totals,
}
if time_interval in time_interval_methods:
totals = time_interval_methods[time_interval](**totals_kwargs)
else:
return HttpResponseBadRequest('invalid time_interval parameter')
if 'group_by' not in totals_kwargs:
x_labels = [x[0] for x in totals]
series = [{'label': self.label, 'data': [x[1] for x in totals]}]
elif time_interval == 'none':
x_labels, series = self.get_grouped_data(totals, group_labels)
else:
x_labels, series = self.get_grouped_time_data(totals, group_labels)
return JsonResponse(
{'data': {'x_labels': x_labels, 'series': series, 'subfilters': subfilters}, 'err': 0}
)
def set_formdef_parameters(self, totals_kwargs, formdef):
# set formdef_klass to None to deactivate switching to formdef specific table
totals_kwargs['criterias'].append(Equal('formdef_klass', None))
totals_kwargs['criterias'].append(Equal('formdef_id', formdef.id))
def transform_criteria(self, criteria):
if not hasattr(criteria, 'field'):
return criteria
attribute = "statistics_data->'%s'" % criteria.field.varname
if isinstance(criteria.value, bool):
value = str(criteria.value).lower()
else:
value = '"%s"' % criteria.value
return sql.ArrayContains(attribute, value)
def get_filters_criterias(self, formdef, form_page):
criterias = form_page.get_criterias_from_query(statistics_fields_only=True)
criterias = [self.transform_criteria(criteria) for criteria in criterias]
selected_status = self.request.GET.get('filter-status')
applied_filters = None
if selected_status and selected_status != '_all':
if selected_status == 'pending':
applied_filters = ['wf-%s' % x.id for x in formdef.workflow.get_not_endpoint_status()]
elif selected_status == 'done':
applied_filters = ['wf-%s' % x.id for x in formdef.workflow.get_endpoint_status()]
else:
try:
formdef.workflow.get_status(selected_status)
applied_filters = ['wf-%s' % selected_status]
except KeyError:
pass
if applied_filters:
criterias.append(Contains('status', applied_filters))
else:
criterias = [StrictNotEqual('status', 'draft')] + criterias
return criterias
def get_subfilters(self, form_page, group_by):
subfilters = []
field_choices = []
for field in form_page.get_formdef_fields():
if not getattr(field, 'include_in_statistics', False) or not field.contextual_varname:
continue
field_key = 'filter-%s' % field.contextual_varname
field.required = False
if field.type == 'status':
waitpoint_status = form_page.formdef.workflow.get_waitpoint_status()
if not waitpoint_status:
continue
field.required = True
field.default_filter_value = '_all'
options = [
('_all', _('All')),
('pending', pgettext_lazy('statistics', 'Open')),
('done', pgettext_lazy('statistics', 'Done')),
]
for status in waitpoint_status:
options.append((status.id, status.name))
elif field.type in ('item', 'items'):
options = form_page.get_item_filter_options(field, selected_filter='all', anonymised=True)
if not options:
continue
elif field.type == 'bool':
options = [('true', _('Yes')), ('false', _('No'))]
else:
continue
filter_description = {
'id': field_key,
'label': field.label,
'options': [{'id': x[0], 'label': x[1]} for x in options],
'required': field.required,
}
if hasattr(field, 'default_filter_value'):
filter_description['default'] = field.default_filter_value
subfilters.append(filter_description)
if not hasattr(field, 'block_field'):
field_choices.append((field.contextual_varname, field.label))
if field_choices:
additionnal_filters = [
{
'id': 'group-by',
'label': _('Group by'),
'options': [
{'id': 'channel', 'label': _('Channel')},
{'id': 'simple-status', 'label': _('Simplified status')},
]
+ [{'id': x[0], 'label': x[1]} for x in field_choices],
'has_subfilters': True,
}
]
if group_by not in (None, 'channel', 'simple-status', 'status'):
group_by_field = self.get_group_by_field(form_page, group_by)
if group_by_field:
additionnal_filters.append(
{
'id': 'hide_none_label',
'label': _('Ignore forms where "%s" is empty.') % group_by_field.label,
'options': [{'id': 'true', 'label': _('Yes')}, {'id': 'false', 'label': _('No')}],
'required': True,
'default': 'false',
}
)
subfilters = additionnal_filters + subfilters
return subfilters
def get_group_by_field(self, form_page, group_by):
fields = [
x
for x in form_page.get_formdef_fields()
if getattr(x, 'contextual_varname', None) == group_by
and getattr(x, 'include_in_statistics', False)
]
if fields:
if not hasattr(fields[0], 'block_field'): # block fields are not supported
return fields[0]
def get_group_labels(self, group_by_field, formdef, form_page, group_by):
group_labels = {}
if group_by == 'status':
group_labels = {'wf-%s' % status.id: status.name for status in formdef.workflow.possible_status}
elif group_by == 'simple-status':
group_labels['wf-%s' % formdef.workflow.possible_status[0].id] = _('New')
for status in formdef.workflow.possible_status[1:]:
if status.is_endpoint():
group_labels['wf-%s' % status.id] = _('Done')
else:
group_labels['wf-%s' % status.id] = _('In progress')
elif group_by_field.type == 'bool':
group_labels = {True: _('Yes'), False: _('No')}
elif group_by_field.type in ('item', 'items'):
options = form_page.get_item_filter_options(
group_by_field, selected_filter='all', anonymised=True
)
group_labels = {option[0]: option[1] for option in options}
group_labels[None] = _('None')
return group_labels
def set_group_by_parameters(self, group_by, formdef, form_page, totals_kwargs, group_labels):
if not group_by:
return
if group_by == 'channel':
totals_kwargs['group_by'] = 'submission_channel'
totals_kwargs['null_values'] = ('web', '')
group_labels.update(FormData.get_submission_channels())
group_labels[None] = _('Web')
group_labels[''] = _('Web')
return
elif group_by == 'simple-status':
group_by_field = self.get_group_by_field(form_page, 'status')
else:
group_by_field = self.get_group_by_field(form_page, group_by)
if not group_by_field:
return
if group_by_field.type == 'status':
totals_kwargs['group_by'] = 'status'
else:
totals_kwargs['group_by'] = "statistics_data->'%s'" % group_by_field.varname
if self.request.GET.get('hide_none_label') == 'true':
totals_kwargs['criterias'].append(StrictNotEqual(totals_kwargs['group_by'], '[]'))
group_labels.update(self.get_group_labels(group_by_field, formdef, form_page, group_by))
def get_grouped_time_data(self, totals, group_labels):
totals_by_time = collections.OrderedDict(
# time1: {group1: total_11, group2: total_12},
# time2: {group1: total_21}
)
seen_group_values = set(
# group1, group2
)
for total in totals:
totals_by_group = totals_by_time.setdefault(total[0], collections.Counter())
if len(total) == 2:
# ignore empty value used to fill time gaps
continue
groups = total[1]
if not isinstance(groups, list):
groups = [groups]
if not groups:
groups = [None]
for group in groups:
totals_by_group[group] += total[2]
seen_group_values.add(group)
totals_by_group = {
# group1: [total_11, total_21],
# group2: [total_12, None],
}
for group in seen_group_values:
totals_by_group[group] = [totals.get(group) for totals in totals_by_time.values()]
totals_by_label = self.get_totals_by_label(totals_by_group, group_labels)
x_labels = list(totals_by_time)
series = [{'label': label, 'data': data} for label, data in totals_by_label.items()]
return x_labels, series
def get_grouped_data(self, totals, group_labels):
totals_by_group = collections.Counter()
for groups, total in totals:
if not isinstance(groups, list):
groups = [groups]
if not groups:
groups = [None]
for group in groups:
totals_by_group[group] += total
totals_by_label = self.get_totals_by_label(totals_by_group, group_labels)
x_labels = list(totals_by_label)
series = [{'label': self.label, 'data': [total for total in totals_by_label.values()]}]
return x_labels, series
def get_totals_by_label(self, totals_by_group, group_labels):
groups = list(totals_by_group)
group_label_indexes = {group: i for i, group in enumerate(group_labels)}
def get_group_order(group):
if group is None:
# None choice should always be last
return len(group_label_indexes) + 1
if group not in group_label_indexes:
# unknown group should be last but before none
return len(group_label_indexes)
return group_label_indexes[group]
totals_by_label = {}
for group in sorted(groups, key=get_group_order):
label = group_labels.get(group, group)
if label in totals_by_label:
if isinstance(totals_by_label[label], list):
for i, (x, y) in enumerate(zip(totals_by_group[group], totals_by_label[label])):
totals_by_label[label][i] = (x or 0) + (y or 0) if x or y else None
totals_by_label[label][i] = ((x or 0) + (y or 0)) or None
else:
totals_by_label[label] = (
(totals_by_label[label] or 0) + (totals_by_group[group] or 0)
) or None
else:
totals_by_label[label] = totals_by_group[group]
return totals_by_label
class CardsCountView(FormsCountView):
formdef_class = CardDef
formpage_class = CardPage
has_global_count_support = False
label = _('Cards Count')
def set_formdef_parameters(self, totals_kwargs, formdef):
# formdef_klass is a fake criteria, it will be used in time interval functions
# to switch to appropriate class, it must appear before formdef_id.
totals_kwargs['criterias'].append(Equal('formdef_klass', CardDef))
totals_kwargs['criterias'].append(Equal('formdef_id', formdef.id))
class ResolutionTimeView(RestrictedView):
formdef_class = FormDef
label = _('Time between two statuses (forms)')
def get(self, request, *args, **kwargs):
formdef_slug = request.GET.get('form', '_nothing')
try:
formdef = self.formdef_class.get_by_urlname(formdef_slug, ignore_migration=True)
except KeyError:
return HttpResponseBadRequest('invalid form')
results = self.get_statistics(formdef)
return JsonResponse(
{
'data': {
'x_labels': [x[0] for x in results],
'series': [{'label': _('Time between two statuses'), 'data': [x[1] for x in results]}],
'subfilters': self.get_subfilters(formdef),
},
'err': 0,
}
)
@staticmethod
def get_subfilters(formdef):
status_options = [
{'id': status.id, 'label': status.name} for status in formdef.workflow.possible_status
]
return [
{
'id': 'start_status',
'label': _('Start status'),
'options': status_options,
'required': True,
'default': status_options[0]['id'],
},
{
'id': 'end_status',
'label': _('End status'),
'options': [{'id': 'done', 'label': _('Any final status')}] + status_options[1:],
'required': True,
'default': 'done',
},
]
def get_statistics(self, formdef):
criterias = [StrictNotEqual('status', 'draft')]
if self.request.GET.get('start'):
criterias.append(GreaterOrEqual('receipt_time', self.request.GET['start']))
if self.request.GET.get('end'):
criterias.append(Less('receipt_time', self.request.GET['end']))
values = formdef.data_class().select(criterias)
# load all evolutions in a single batch, to avoid as many query as
# there are formdata when computing resolution times statistics.
formdef.data_class().load_all_evolutions(values)
start_status = self.request.GET.get('start_status', formdef.workflow.possible_status[0].id)
end_status = self.request.GET.get('end_status', 'done')
try:
start_status = formdef.workflow.get_status(start_status)
except KeyError:
start_status = formdef.workflow.possible_status[0]
end_statuses = None
if end_status != 'done':
try:
end_statuses = {'wf-%s' % formdef.workflow.get_status(end_status).id}
except KeyError:
pass
if not end_statuses:
end_statuses = {'wf-%s' % status.id for status in formdef.workflow.get_endpoint_status()}
res_time_forms = []
for filled in values:
start_time = None
for evo in filled.evolution or []:
if start_status and evo.status == 'wf-%s' % start_status.id:
start_time = time.mktime(evo.time)
elif evo.status in end_statuses:
if start_status and not start_time:
break
start_time = start_time or time.mktime(filled.receipt_time)
res_time_forms.append(time.mktime(evo.time) - start_time)
break
if not res_time_forms:
return []
res_time_forms.sort()
sum_times = sum(res_time_forms)
len_times = len(res_time_forms)
mean = sum_times // len_times
if len_times % 2:
median = res_time_forms[len_times // 2]
else:
midpt = len_times // 2
median = (res_time_forms[midpt - 1] + res_time_forms[midpt]) // 2
return [
(_('Minimum time'), res_time_forms[0]),
(_('Maximum time'), res_time_forms[-1]),
(_('Mean'), mean),
(_('Median'), median),
]
class CardsResolutionTimeView(ResolutionTimeView):
label = _('Time between two statuses (cards)')
formdef_class = CardDef