Skip to content

Commit bd4ab52

Browse files
serhiy-storchakamiss-islington
authored andcommitted
[3.14] gh-145264: Do not ignore excess Base64 data after the first padded quad (GH-145267) (GH-146326)
Base64 decoder (see binascii.a2b_base64(), base64.b64decode(), etc) no longer ignores excess data after the first padded quad in non-strict (default) mode. Instead, in conformance with RFC 4648, it ignores the pad character, "=", if it is present before the end of the encoded data. (cherry picked from commit 4561f64) (cherry picked from commit e31c551) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
1 parent 43fe06b commit bd4ab52

File tree

3 files changed

+74
-60
lines changed

3 files changed

+74
-60
lines changed

Lib/test/test_binascii.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,16 @@ def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
143143
_assertRegexTemplate(r'(?i)Excess padding', data, non_strict_mode_expected_result)
144144

145145
# Test excess data exceptions
146-
assertExcessData(b'ab==a', b'i')
147-
assertExcessData(b'ab===', b'i')
148-
assertExcessData(b'ab====', b'i')
149-
assertExcessData(b'ab==:', b'i')
150-
assertExcessData(b'abc=a', b'i\xb7')
151-
assertExcessData(b'abc=:', b'i\xb7')
152-
assertExcessData(b'ab==\n', b'i')
153-
assertExcessData(b'abc==', b'i\xb7')
154-
assertExcessData(b'abc===', b'i\xb7')
155-
assertExcessData(b'abc====', b'i\xb7')
156-
assertExcessData(b'abc=====', b'i\xb7')
146+
assertExcessPadding(b'ab===', b'i')
147+
assertExcessPadding(b'ab====', b'i')
148+
assertNonBase64Data(b'ab==:', b'i')
149+
assertExcessData(b'abc=a', b'i\xb7\x1a')
150+
assertNonBase64Data(b'abc=:', b'i\xb7')
151+
assertNonBase64Data(b'ab==\n', b'i')
152+
assertExcessPadding(b'abc==', b'i\xb7')
153+
assertExcessPadding(b'abc===', b'i\xb7')
154+
assertExcessPadding(b'abc====', b'i\xb7')
155+
assertExcessPadding(b'abc=====', b'i\xb7')
157156

158157
# Test non-base64 data exceptions
159158
assertNonBase64Data(b'\nab==', b'i')
@@ -175,6 +174,20 @@ def assertExcessPadding(data, non_strict_mode_expected_result: bytes):
175174
assertExcessPadding(b'abcd====', b'i\xb7\x1d')
176175
assertExcessPadding(b'abcd=====', b'i\xb7\x1d')
177176

177+
def test_base64_excess_data(self):
178+
# Test excess data exceptions
179+
def assertExcessData(data, expected):
180+
assert_regex = r'(?i)Excess data'
181+
data = self.type2test(data)
182+
with self.assertRaisesRegex(binascii.Error, assert_regex):
183+
binascii.a2b_base64(data, strict_mode=True)
184+
self.assertEqual(binascii.a2b_base64(data, strict_mode=False),
185+
expected)
186+
self.assertEqual(binascii.a2b_base64(data), expected)
187+
188+
assertExcessData(b'ab==c=', b'i\xb7')
189+
assertExcessData(b'ab==cd', b'i\xb7\x1d')
190+
assertExcessData(b'abc=d', b'i\xb7\x1d')
178191

179192
def test_base64errors(self):
180193
# Test base64 with invalid padding
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Base64 decoder (see :func:`binascii.a2b_base64`, :func:`base64.b64decode`, etc) no
2+
longer ignores excess data after the first padded quad in non-strict
3+
(default) mode. Instead, in conformance with :rfc:`4648`, section 3.3, it now ignores
4+
the pad character, "=", if it is present before the end of the encoded data.

Modules/binascii.c

Lines changed: 46 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
383383
const unsigned char *ascii_data = data->buf;
384384
size_t ascii_len = data->len;
385385
binascii_state *state = NULL;
386-
char padding_started = 0;
387386

388387
/* Allocate the buffer */
389388
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
@@ -394,14 +393,6 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
394393
return NULL;
395394
unsigned char *bin_data_start = bin_data;
396395

397-
if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
398-
state = get_binascii_state(module);
399-
if (state) {
400-
PyErr_SetString(state->Error, "Leading padding not allowed");
401-
}
402-
goto error_end;
403-
}
404-
405396
int quad_pos = 0;
406397
unsigned char leftchar = 0;
407398
int pads = 0;
@@ -412,35 +403,34 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
412403
** the invalid ones.
413404
*/
414405
if (this_ch == BASE64_PAD) {
415-
padding_started = 1;
416-
417-
if (strict_mode && quad_pos == 0) {
418-
state = get_binascii_state(module);
419-
if (state) {
420-
PyErr_SetString(state->Error, "Excess padding not allowed");
421-
}
422-
goto error_end;
406+
pads++;
407+
if (quad_pos >= 2 && quad_pos + pads <= 4) {
408+
continue;
423409
}
424-
if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
425-
/* A pad sequence means we should not parse more input.
426-
** We've already interpreted the data from the quad at this point.
427-
** in strict mode, an error should raise if there's excess data after the padding.
428-
*/
429-
if (strict_mode && i + 1 < ascii_len) {
430-
state = get_binascii_state(module);
431-
if (state) {
432-
PyErr_SetString(state->Error, "Excess data after padding");
433-
}
434-
goto error_end;
435-
}
436-
437-
goto done;
410+
// See RFC 4648, section-3.3: "specifications MAY ignore the
411+
// pad character, "=", treating it as non-alphabet data, if
412+
// it is present before the end of the encoded data" and
413+
// "the excess pad characters MAY also be ignored."
414+
if (!strict_mode) {
415+
continue;
438416
}
439-
continue;
417+
if (quad_pos == 1) {
418+
/* Set an error below. */
419+
break;
420+
}
421+
state = get_binascii_state(module);
422+
if (state) {
423+
PyErr_SetString(state->Error,
424+
(quad_pos == 0 && i == 0)
425+
? "Leading padding not allowed"
426+
: "Excess padding not allowed");
427+
}
428+
goto error_end;
440429
}
441430

442431
this_ch = table_a2b_base64[this_ch];
443432
if (this_ch >= 64) {
433+
// See RFC 4648, section-3.3.
444434
if (strict_mode) {
445435
state = get_binascii_state(module);
446436
if (state) {
@@ -451,11 +441,14 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
451441
continue;
452442
}
453443

454-
// Characters that are not '=', in the middle of the padding, are not allowed
455-
if (strict_mode && padding_started) {
444+
// Characters that are not '=', in the middle of the padding, are
445+
// not allowed (except when they are). See RFC 4648, section-3.3.
446+
if (pads && strict_mode) {
456447
state = get_binascii_state(module);
457448
if (state) {
458-
PyErr_SetString(state->Error, "Discontinuous padding not allowed");
449+
PyErr_SetString(state->Error, (quad_pos + pads == 4)
450+
? "Excess data after padding"
451+
: "Discontinuous padding not allowed");
459452
}
460453
goto error_end;
461454
}
@@ -484,31 +477,35 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
484477
}
485478
}
486479

487-
if (quad_pos != 0) {
480+
if (quad_pos == 1) {
481+
/* There is exactly one extra valid, non-padding, base64 character.
482+
* * This is an invalid length, as there is no possible input that
483+
** could encoded into such a base64 string.
484+
*/
488485
state = get_binascii_state(module);
489-
if (state == NULL) {
490-
/* error already set, from get_binascii_state */
491-
} else if (quad_pos == 1) {
492-
/*
493-
** There is exactly one extra valid, non-padding, base64 character.
494-
** This is an invalid length, as there is no possible input that
495-
** could encoded into such a base64 string.
496-
*/
486+
if (state) {
497487
PyErr_Format(state->Error,
498488
"Invalid base64-encoded string: "
499489
"number of data characters (%zd) cannot be 1 more "
500490
"than a multiple of 4",
501491
(bin_data - bin_data_start) / 3 * 4 + 1);
502-
} else {
492+
}
493+
goto error_end;
494+
}
495+
496+
if (quad_pos != 0 && quad_pos + pads < 4) {
497+
state = get_binascii_state(module);
498+
if (state) {
503499
PyErr_SetString(state->Error, "Incorrect padding");
504500
}
505-
error_end:
506-
_PyBytesWriter_Dealloc(&writer);
507-
return NULL;
501+
goto error_end;
508502
}
509503

510-
done:
511504
return _PyBytesWriter_Finish(&writer, bin_data);
505+
506+
error_end:
507+
_PyBytesWriter_Dealloc(&writer);
508+
return NULL;
512509
}
513510

514511

0 commit comments

Comments
 (0)