Skip to content

Commit 89d8bbe

Browse files
author
Rémy Voet (ryv)
committed
gh-140746: Fix Thread.start() can hang forever
When a new thread is started (`Thread.start()`), the current thread waits for the new thread's `_started` signal (`self._started.wait()`). If the new thread doesn't have enough memory, it might crash before signaling its start to the parent thread, causing the parent thread to wait indefinitely (still in `Thread.start()`). To fix the issue, remove the _started python attribute from the Thread class and converted the logic at C level (PyEvent). A flaw of this method, it that the threading module will still contains the zombie thread into the _limbo dictionnary. We also change `Thread._delete()` to use `pop` to remove the thread from `_active`, as there is no guarantee that the thread exists in `_active[get_ident()]`, thus avoiding a potential `KeyError`. This can happen if `_bootstrap_inner` crashes before `_active[self._ident] = self` executes. We use `self._ident` because we know `set_ident()` has already been called. Moreover, remove the old comment in `_delete` because `_active_limbo_lock` became reentrant in commit. 243fd01.
1 parent 909f76d commit 89d8bbe

File tree

4 files changed

+60
-24
lines changed

4 files changed

+60
-24
lines changed

Lib/test/test_threading.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -215,11 +215,6 @@ def test_various_ops(self):
215215
self.assertRegex(repr(t), r'^<TestThread\(.*, initial\)>$')
216216
t.start()
217217

218-
if hasattr(threading, 'get_native_id'):
219-
native_ids = set(t.native_id for t in threads) | {threading.get_native_id()}
220-
self.assertNotIn(None, native_ids)
221-
self.assertEqual(len(native_ids), NUMTASKS + 1)
222-
223218
if verbose:
224219
print('waiting for all tasks to complete')
225220
for t in threads:
@@ -228,6 +223,12 @@ def test_various_ops(self):
228223
self.assertNotEqual(t.ident, 0)
229224
self.assertIsNotNone(t.ident)
230225
self.assertRegex(repr(t), r'^<TestThread\(.*, stopped -?\d+\)>$')
226+
227+
if hasattr(threading, 'get_native_id'):
228+
native_ids = set(t.native_id for t in threads) | {threading.get_native_id()}
229+
self.assertNotIn(None, native_ids)
230+
self.assertEqual(len(native_ids), NUMTASKS + 1)
231+
231232
if verbose:
232233
print('all tasks done')
233234
self.assertEqual(numrunning.get(), 0)
@@ -307,7 +308,7 @@ def f(mutex):
307308
# Issue gh-106236:
308309
with self.assertRaises(RuntimeError):
309310
dummy_thread.join()
310-
dummy_thread._started.clear()
311+
dummy_thread._os_thread_handle._set_done()
311312
with self.assertRaises(RuntimeError):
312313
dummy_thread.is_alive()
313314
# Busy wait for the following condition: after the thread dies, the
@@ -1457,6 +1458,32 @@ def run_in_bg():
14571458
self.assertEqual(err, b"")
14581459
self.assertEqual(out.strip(), b"Exiting...")
14591460

1461+
def test_memory_error_bootstrap(self):
1462+
# gh-140746: Test that Thread.start() doesn't hang indefinitely if
1463+
# the new thread fails (MemoryError) during its initialization
1464+
1465+
def serving_thread():
1466+
1467+
def nothing():
1468+
pass
1469+
1470+
def _set_ident_memory_error():
1471+
raise MemoryError()
1472+
1473+
thread = threading.Thread(target=nothing)
1474+
with (
1475+
support.catch_unraisable_exception(),
1476+
mock.patch.object(thread, '_set_ident', _set_ident_memory_error)
1477+
):
1478+
thread.start()
1479+
self.assertFalse(thread.is_alive())
1480+
1481+
serving_thread = threading.Thread(target=serving_thread)
1482+
serving_thread.start()
1483+
serving_thread.join(0.1)
1484+
self.assertFalse(serving_thread.is_alive())
1485+
1486+
14601487
class ThreadJoinOnShutdown(BaseTestCase):
14611488

14621489
def _run_and_join(self, script):

Lib/threading.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,6 @@ class is implemented.
930930
if _HAVE_THREAD_NATIVE_ID:
931931
self._native_id = None
932932
self._os_thread_handle = _ThreadHandle()
933-
self._started = Event()
934933
self._initialized = True
935934
# Copy of sys.stderr used by self._invoke_excepthook()
936935
self._stderr = _sys.stderr
@@ -940,7 +939,6 @@ class is implemented.
940939

941940
def _after_fork(self, new_ident=None):
942941
# Private! Called by threading._after_fork().
943-
self._started._at_fork_reinit()
944942
if new_ident is not None:
945943
# This thread is alive.
946944
self._ident = new_ident
@@ -955,7 +953,7 @@ def _after_fork(self, new_ident=None):
955953
def __repr__(self):
956954
assert self._initialized, "Thread.__init__() was not called"
957955
status = "initial"
958-
if self._started.is_set():
956+
if self._os_thread_handle.is_running():
959957
status = "started"
960958
if self._os_thread_handle.is_done():
961959
status = "stopped"
@@ -978,7 +976,7 @@ def start(self):
978976
if not self._initialized:
979977
raise RuntimeError("thread.__init__() not called")
980978

981-
if self._started.is_set():
979+
if self._os_thread_handle.is_running():
982980
raise RuntimeError("threads can only be started once")
983981

984982
with _active_limbo_lock:
@@ -1001,7 +999,6 @@ def start(self):
1001999
with _active_limbo_lock:
10021000
del _limbo[self]
10031001
raise
1004-
self._started.wait() # Will set ident and native_id
10051002

10061003
def run(self):
10071004
"""Method representing the thread's activity.
@@ -1061,7 +1058,6 @@ def _bootstrap_inner(self):
10611058
if _HAVE_THREAD_NATIVE_ID:
10621059
self._set_native_id()
10631060
self._set_os_name()
1064-
self._started.set()
10651061
with _active_limbo_lock:
10661062
_active[self._ident] = self
10671063
del _limbo[self]
@@ -1081,11 +1077,7 @@ def _bootstrap_inner(self):
10811077
def _delete(self):
10821078
"Remove current thread from the dict of currently running threads."
10831079
with _active_limbo_lock:
1084-
del _active[get_ident()]
1085-
# There must not be any python code between the previous line
1086-
# and after the lock is released. Otherwise a tracing function
1087-
# could try to acquire the lock again in the same thread, (in
1088-
# current_thread()), and would block.
1080+
_active.pop(self._ident, None)
10891081

10901082
def join(self, timeout=None):
10911083
"""Wait until the thread terminates.
@@ -1113,7 +1105,7 @@ def join(self, timeout=None):
11131105
"""
11141106
if not self._initialized:
11151107
raise RuntimeError("Thread.__init__() not called")
1116-
if not self._started.is_set():
1108+
if not self._os_thread_handle.is_done() and not self._os_thread_handle.is_running():
11171109
raise RuntimeError("cannot join thread before it is started")
11181110
if self is current_thread():
11191111
raise RuntimeError("cannot join current thread")
@@ -1176,7 +1168,7 @@ def is_alive(self):
11761168
11771169
"""
11781170
assert self._initialized, "Thread.__init__() not called"
1179-
return self._started.is_set() and not self._os_thread_handle.is_done()
1171+
return self._os_thread_handle.is_running() and not self._os_thread_handle.is_done()
11801172

11811173
@property
11821174
def daemon(self):
@@ -1199,7 +1191,7 @@ def daemon(self, daemonic):
11991191
raise RuntimeError("Thread.__init__() not called")
12001192
if daemonic and not _daemon_threads_allowed():
12011193
raise RuntimeError('daemon threads are disabled in this interpreter')
1202-
if self._started.is_set():
1194+
if self._os_thread_handle.is_running():
12031195
raise RuntimeError("cannot set daemon status of active thread")
12041196
self._daemonic = daemonic
12051197

@@ -1385,7 +1377,6 @@ class _MainThread(Thread):
13851377

13861378
def __init__(self):
13871379
Thread.__init__(self, name="MainThread", daemon=False)
1388-
self._started.set()
13891380
self._ident = _get_main_thread_ident()
13901381
self._os_thread_handle = _make_thread_handle(self._ident)
13911382
if _HAVE_THREAD_NATIVE_ID:
@@ -1433,7 +1424,6 @@ class _DummyThread(Thread):
14331424
def __init__(self):
14341425
Thread.__init__(self, name=_newname("Dummy-%d"),
14351426
daemon=_daemon_threads_allowed())
1436-
self._started.set()
14371427
self._set_ident()
14381428
self._os_thread_handle = _make_thread_handle(self._ident)
14391429
if _HAVE_THREAD_NATIVE_ID:
@@ -1443,7 +1433,7 @@ def __init__(self):
14431433
_DeleteDummyThreadOnDel(self)
14441434

14451435
def is_alive(self):
1446-
if not self._os_thread_handle.is_done() and self._started.is_set():
1436+
if self._os_thread_handle.is_running() and not self._os_thread_handle.is_done():
14471437
return True
14481438
raise RuntimeError("thread is not alive")
14491439

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix :func:`threading.Thread.start` that can hang indefinitely in case of heap memory exhaustion during initialization of the new thread.

Modules/_threadmodule.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ struct bootstate {
337337
PyObject *kwargs;
338338
ThreadHandle *handle;
339339
PyEvent handle_ready;
340+
PyEvent handle_running;
340341
};
341342

342343
static void
@@ -359,6 +360,8 @@ thread_run(void *boot_raw)
359360

360361
// Wait until the handle is marked as running
361362
PyEvent_Wait(&boot->handle_ready);
363+
// Signal that this Thread is running
364+
_PyEvent_Notify(&boot->handle_running);
362365

363366
// `handle` needs to be manipulated after bootstate has been freed
364367
ThreadHandle *handle = boot->handle;
@@ -468,6 +471,7 @@ ThreadHandle_start(ThreadHandle *self, PyObject *func, PyObject *args,
468471
boot->handle = self;
469472
ThreadHandle_incref(self);
470473
boot->handle_ready = (PyEvent){0};
474+
boot->handle_running = (PyEvent){0};
471475

472476
PyThread_ident_t ident;
473477
PyThread_handle_t os_handle;
@@ -488,8 +492,9 @@ ThreadHandle_start(ThreadHandle *self, PyObject *func, PyObject *args,
488492
self->state = THREAD_HANDLE_RUNNING;
489493
PyMutex_Unlock(&self->mutex);
490494

491-
// Unblock the thread
495+
// Unblock the thread and wait the running signal
492496
_PyEvent_Notify(&boot->handle_ready);
497+
PyEvent_Wait(&boot->handle_running);
493498

494499
return 0;
495500

@@ -707,6 +712,18 @@ PyThreadHandleObject_join(PyObject *op, PyObject *args)
707712
Py_RETURN_NONE;
708713
}
709714

715+
static PyObject *
716+
PyThreadHandleObject_is_running(PyObject *op, PyObject *Py_UNUSED(dummy))
717+
{
718+
PyThreadHandleObject *self = PyThreadHandleObject_CAST(op);
719+
if (get_thread_handle_state(self->handle) == THREAD_HANDLE_RUNNING) {
720+
Py_RETURN_TRUE;
721+
}
722+
else {
723+
Py_RETURN_FALSE;
724+
}
725+
}
726+
710727
static PyObject *
711728
PyThreadHandleObject_is_done(PyObject *op, PyObject *Py_UNUSED(dummy))
712729
{
@@ -740,6 +757,7 @@ static PyGetSetDef ThreadHandle_getsetlist[] = {
740757
static PyMethodDef ThreadHandle_methods[] = {
741758
{"join", PyThreadHandleObject_join, METH_VARARGS, NULL},
742759
{"_set_done", PyThreadHandleObject_set_done, METH_NOARGS, NULL},
760+
{"is_running", PyThreadHandleObject_is_running, METH_NOARGS, NULL},
743761
{"is_done", PyThreadHandleObject_is_done, METH_NOARGS, NULL},
744762
{0, 0}
745763
};

0 commit comments

Comments
 (0)