misc: normalize phone numbers for full text search (#72773) #123

Merged
fpeters merged 1 commits from wip/72773-fts-phone-numbers into main 2023-02-24 07:00:48 +01:00
7 changed files with 82 additions and 10 deletions

View File

@ -22,7 +22,7 @@ from wcs.formdef import FormDef
from wcs.qommon import force_str
from wcs.qommon.http_request import HTTPRequest
from wcs.qommon.misc import file_digest
from wcs.qommon.storage import atomic_write
from wcs.qommon.storage import FtsMatch, atomic_write
from wcs.qommon.substitution import CompatibilityNamesDict
from wcs.qommon.template import Template
from wcs.qommon.upload_storage import PicklableUpload
@ -4413,3 +4413,38 @@ def test_merged_roles_dict_compat(pub, local_user):
formdata.just_created()
formdata.workflow_roles = {'_receiver': 2}
formdata.store()
def test_fts_phone(pub):
FormDef.wipe()
formdef = FormDef()
formdef.name = 'test'
formdef.fields = [
fields.StringField(id='1', label='phone', validation={'type': 'phone'}),
fields.StringField(id='2', label='other'),
]
formdef.store()
formdata = formdef.data_class()()
formdata.data = {'1': '01 23 45 67 89', '2': 'foo'}
formdata.just_created()
formdata.store()
assert formdef.data_class().count([FtsMatch('01 23 45 67 89')]) == 1
assert formdef.data_class().count([FtsMatch('0123456789')]) == 1
assert formdef.data_class().count([FtsMatch('+33123456789')]) == 1
assert formdef.data_class().count([FtsMatch('+33(0)123456789')]) == 1
assert formdef.data_class().count([FtsMatch('+33(0)123456789 foo')]) == 1
assert formdef.data_class().count([FtsMatch('+33(0)123456789 bar')]) == 0
assert formdef.data_class().count([FtsMatch('foo +33(0)123456789')]) == 1
assert formdef.data_class().count([FtsMatch('bar +33(0)123456789')]) == 0
formdata.data = {'1': '+32 2 345 67 89', '2': 'foo'}
formdata.store()
assert formdef.data_class().count([FtsMatch('023456789')]) == 0
pub.load_site_options()
if not pub.site_options.has_section('options'):
pub.site_options.add_section('options')
pub.site_options.set('options', 'default-country-code', 'BE')
assert formdef.data_class().count([FtsMatch('023456789')]) == 1

View File

@ -72,7 +72,7 @@ class I18nDirectory(Directory):
criterias.append(Equal('translatable', not (bool(get_request().form.get('non_translatable')))))
if get_request().form.get('q'):
search_term = get_request().form.get('q')
criterias.append(Or([ILike('string', search_term), FtsMatch(search_term)]))
criterias.append(Or([ILike('string', search_term), FtsMatch(search_term, extra_normalize=False)]))
offset = misc.get_int_or_400(get_request().form.get('offset', 0))
limit = misc.get_int_or_400(get_request().form.get('limit', 20))
@ -299,7 +299,7 @@ class ExportAfterJob(AfterJob):
criterias = []
if self.q:
criterias.append(Or([ILike('string', self.q), FtsMatch(self.q)]))
criterias.append(Or([ILike('string', self.q), FtsMatch(self.q, extra_normalize=False)]))
self.total_count = TranslatableMessage.count(criterias)

View File

@ -1059,6 +1059,11 @@ class WidgetField(Field):
return [self.convert_value_to_str(element)]
return [element]
def get_fts_value(self, data, **kwargs):
if self.store_display_value:
return data.get('%s_display' % self.id)
return data.get(str(self.id))
field_classes = []
field_types = []
@ -1348,6 +1353,17 @@ class StringField(WidgetField):
return None
return str(value)
def get_fts_value(self, data, **kwargs):
value = super().get_fts_value(data, **kwargs)
if value and self.validation and self.validation['type']:
validation_method = ValidationWidget.validation_methods.get(self.validation['type'])
if validation_method and validation_method.get('normalize_for_fts'):
# index both original and normalized value
# in the case of phone numbers this makes sure the "international/E164"
# format (ex: +33199001234) is indexed.
value = '%s %s' % (value, validation_method.get('normalize_for_fts')(value))
return value
def migrate(self):
changed = super().migrate()
if isinstance(self.validation, str): # 2019-08-10

View File

@ -1112,6 +1112,7 @@ class ValidationWidget(CompositeWidget):
'regex': r'\+?[-\(\)\d\.\s/]+',
'error_message': _('Invalid phone number'),
'html_input_type': 'tel',
'normalize_for_fts': misc.normalize_phone_number_for_fts,
},
),
(
@ -1121,6 +1122,7 @@ class ValidationWidget(CompositeWidget):
'function': 'validate_phone_fr',
'error_message': _('Invalid phone number'),
'html_input_type': 'tel',
'normalize_for_fts': misc.normalize_phone_number_for_fts,
},
),
(

View File

@ -857,6 +857,18 @@ def get_formatted_phone(number, country_code=None):
return number
def normalize_phone_number_for_fts(value):
country_code = get_publisher().get_site_option('default-country-code') or 'FR'
try:
pn = phonenumbers.parse(value)
except phonenumbers.NumberParseException:
try:
pn = phonenumbers.parse(value, country_code)
except phonenumbers.NumberParseException:
return value
return phonenumbers.format_number(pn, phonenumbers.PhoneNumberFormat.E164)
def validate_siren(string_value):
return validate_luhn(string_value, length=9)

View File

@ -326,8 +326,9 @@ class ILike(Criteria):
class FtsMatch(Criteria):
def __init__(self, value):
def __init__(self, value, extra_normalize=True):
self.value = self.get_fts_value(value)
self.extra_normalize = extra_normalize # will be handled in sql.FtsMatch
@classmethod
def get_fts_value(cls, value):

View File

@ -62,7 +62,7 @@ from . import qommon
from .carddef import CardDef
from .formdef import FormDef
from .publisher import UnpicklerClass
from .qommon import _, get_cfg
from .qommon import _, get_cfg, misc
from .qommon.misc import JSONEncoder, strftime
from .qommon.storage import NothingToUpdate, _take, classonlymethod
from .qommon.storage import parse_clause as parse_storage_clause
@ -436,8 +436,16 @@ class ILike(Criteria):
class FtsMatch(Criteria):
def __init__(self, value):
def __init__(self, value, extra_normalize=True):
self.value = self.get_fts_value(value)
if extra_normalize:
phone_match = re.match(r'.*?(?P<phone>\b\+?[-\(\)\d\.\s/]{6,20}\b)', self.value)
if phone_match:
# if it looks like a phone number, normalize it to its
# "international/E164" format to match what's stored in the
# database.
phone_value = misc.normalize_phone_number_for_fts(phone_match.group('phone').strip())
self.value = self.value.replace(phone_match.group('phone').strip(), phone_value)
@classmethod
def get_fts_value(cls, value):
@ -2814,10 +2822,8 @@ class SqlDataMixin(SqlMixin):
if not data.get(field.id):
continue
value = None
if field.key in ('string', 'text', 'email'):
value = data.get(field.id)
elif field.key in ('item', 'items'):
value = data.get('%s_display' % field.id)
if field.key in ('string', 'text', 'email', 'item', 'items'):
value = field.get_fts_value(data)
if value:
weight = 'C'
if field.include_in_listing: