Skip to content

Commit 5a92b2a

Browse files
medmundsbitdancer
andcommitted
Move adjacent ew detection to get_phrase
Switch to @bitdancer's fix from review feedback. Recharacterize space between ews as fws after parsing in get_phrase (rather than peeking ahead after first ew in get_word). Co-authored-by: R David Murray <rdmurray@bitdance.com>
1 parent 704c03f commit 5a92b2a

2 files changed

Lines changed: 12 additions & 15 deletions

File tree

Lib/email/_header_value_parser.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,18 +1352,6 @@ def get_atom(value):
13521352
atom.append(token)
13531353
if value and value[0] in CFWS_LEADER:
13541354
token, value = get_cfws(value)
1355-
# Peek ahead to ignore linear-white-space between adjacent encoded-words.
1356-
if (
1357-
atom[-1].token_type == 'encoded-word'
1358-
and value.startswith('=?')
1359-
and all(ws.token_type == 'fws' for ws in token) # not comments
1360-
):
1361-
try:
1362-
get_encoded_word(value)
1363-
except errors.HeaderParseError:
1364-
pass
1365-
else:
1366-
token = EWWhiteSpaceTerminal(token, 'fws')
13671355
atom.append(token)
13681356
return atom, value
13691357

@@ -1473,6 +1461,16 @@ def get_phrase(value):
14731461
else:
14741462
try:
14751463
token, value = get_word(value)
1464+
if (token[0].token_type == 'encoded-word'
1465+
and phrase
1466+
and phrase[-1].token_type == 'atom'
1467+
and len(phrase[-1]) > 1
1468+
and phrase[-1][-2].token_type == 'encoded-word'
1469+
and phrase[-1][-1].token_type == 'cfws'
1470+
and not phrase[-1][-1].comments
1471+
):
1472+
# linear ws between ews needs special handing...
1473+
phrase[-1][-1] = EWWhiteSpaceTerminal(phrase[-1], 'fws')
14761474
except errors.HeaderParseError:
14771475
if value[0] in CFWS_LEADER:
14781476
token, value = get_cfws(value)

Lib/test/test_email/test__header_value_parser.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1061,9 +1061,8 @@ def get_phrase_cfws_only_raises(self):
10611061
parser.get_phrase(' (foo) ')
10621062

10631063
def test_get_phrase_adjacent_ew(self):
1064-
# In structured headers, the requirement to ignore linear-white-space
1065-
# between adjacent encoded-words is actually implemented by get_atom.
1066-
# But it's easier to see the results by testing get_phrase.
1064+
# "'linear-white-space' that separates a pair of adjacent
1065+
# 'encoded-word's is ignored" (rfc2047 section 6.2)
10671066
self._test_get_x(parser.get_phrase, '=?ascii?q?Joi?= \t =?ascii?q?ned?=', 'Joined', 'Joined', [], '')
10681067

10691068
def test_get_phrase_adjacent_ew_different_encodings(self):

0 commit comments

Comments
 (0)