Skip to content

Commit 8c5d91b

Browse files
authored
Improve performance of name normalization (#533)
2 parents 1738b20 + 27169dc commit 8c5d91b

File tree

3 files changed

+50
-1
lines changed

3 files changed

+50
-1
lines changed

exercises.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,13 @@ def entrypoint_regexp_perf():
4545
input = '0' + ' ' * 2**10 + '0' # end warmup
4646

4747
re.match(importlib_metadata.EntryPoint.pattern, input)
48+
49+
50+
def normalize_perf():
51+
# python/cpython#143658
52+
import importlib_metadata # end warmup
53+
54+
# operation completes in < 1ms, so repeat it to get visibility
55+
# https://github.com/jaraco/pytest-perf/issues/12
56+
for _ in range(1000):
57+
importlib_metadata.Prepared.normalize('sample')

importlib_metadata/__init__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -945,8 +945,15 @@ def __init__(self, name: str | None):
945945
def normalize(name):
946946
"""
947947
PEP 503 normalization plus dashes as underscores.
948+
949+
Specifically avoids ``re.sub`` as prescribed for performance
950+
benefits (see python/cpython#143658).
948951
"""
949-
return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
952+
value = name.lower().replace("-", "_").replace(".", "_")
953+
# Condense repeats
954+
while "__" in value:
955+
value = value.replace("__", "_")
956+
return value
950957

951958
@staticmethod
952959
def legacy_normalize(name):

tests/test_api.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from importlib_metadata import (
77
Distribution,
88
PackageNotFoundError,
9+
Prepared,
910
distribution,
1011
entry_points,
1112
files,
@@ -317,3 +318,34 @@ class InvalidateCache(unittest.TestCase):
317318
def test_invalidate_cache(self):
318319
# No externally observable behavior, but ensures test coverage...
319320
importlib.invalidate_caches()
321+
322+
323+
class PreparedTests(unittest.TestCase):
324+
@fixtures.parameterize(
325+
# Simple
326+
dict(input='sample', expected='sample'),
327+
# Mixed case
328+
dict(input='Sample', expected='sample'),
329+
dict(input='SAMPLE', expected='sample'),
330+
dict(input='SaMpLe', expected='sample'),
331+
# Separator conversions
332+
dict(input='sample-pkg', expected='sample_pkg'),
333+
dict(input='sample.pkg', expected='sample_pkg'),
334+
dict(input='sample_pkg', expected='sample_pkg'),
335+
# Multiple separators
336+
dict(input='sample---pkg', expected='sample_pkg'),
337+
dict(input='sample___pkg', expected='sample_pkg'),
338+
dict(input='sample...pkg', expected='sample_pkg'),
339+
# Mixed separators
340+
dict(input='sample-._pkg', expected='sample_pkg'),
341+
dict(input='sample_.-pkg', expected='sample_pkg'),
342+
# Complex
343+
dict(input='Sample__Pkg-name.foo', expected='sample_pkg_name_foo'),
344+
dict(input='Sample__Pkg.name__foo', expected='sample_pkg_name_foo'),
345+
# Uppercase with separators
346+
dict(input='SAMPLE-PKG', expected='sample_pkg'),
347+
dict(input='Sample.Pkg', expected='sample_pkg'),
348+
dict(input='SAMPLE_PKG', expected='sample_pkg'),
349+
)
350+
def test_normalize(self, input, expected):
351+
self.assertEqual(Prepared.normalize(input), expected)

0 commit comments

Comments
 (0)