Skip to content

Commit 2d28830

Browse files
committed
(arigo, plan_rich) a signal interrupting a thread will now always record the stack trace of the same thread (not others)
1 parent 16bcfcb commit 2d28830

4 files changed

Lines changed: 28 additions & 47 deletions

File tree

src/vmprof_common.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -143,13 +143,3 @@ void *volatile _PyThreadState_Current;
143143
# define _Py_atomic_load_relaxed(pp) (*(pp))
144144
#endif
145145

146-
PyThreadState * get_current_thread_state(void)
147-
{
148-
#if PY_MAJOR_VERSION < 3
149-
return _PyThreadState_Current;
150-
#elif PY_VERSION_HEX < 0x03050200
151-
return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
152-
#else
153-
return _PyThreadState_UncheckedGet();
154-
#endif
155-
}

src/vmprof_main.h

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -98,36 +98,14 @@ static char atfork_hook_installed = 0;
9898
* *************************************************************
9999
*/
100100

101-
static int get_stack_trace(void** result, int max_depth, ucontext_t *ucontext)
101+
static int get_stack_trace(PyThreadState * current, void** result, int max_depth, ucontext_t *ucontext)
102102
{
103-
PyThreadState* current = get_current_thread_state();
104-
105103
if (!current)
106104
return 0;
107105
PyFrameObject *frame = current->frame;
108106
return read_trace_from_cpy_frame(current->frame, result, max_depth);
109107
}
110108

111-
static void *get_current_thread_id(void)
112-
{
113-
/* xxx This function is a hack on two fronts:
114-
115-
- It assumes that pthread_self() is async-signal-safe. This
116-
should be true on Linux and OS X. I hope it is also true elsewhere.
117-
118-
- It abuses pthread_self() by assuming it just returns an
119-
integer. According to comments in CPython's source code, the
120-
platforms where it is not the case are rare nowadays.
121-
122-
An alternative would be to try to look if the information is
123-
available in the ucontext_t in the caller.
124-
*/
125-
#ifdef __APPLE__
126-
return (void *)get_current_thread_state();
127-
#else
128-
return (void *)pthread_self();
129-
#endif
130-
}
131109

132110

133111
/* *************************************************************
@@ -148,31 +126,32 @@ static void segfault_handler(int arg)
148126

149127
static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
150128
{
151-
#ifdef __APPLE__
129+
PyThreadState * tstate = NULL;
130+
void (*prevhandler)(int);
152131
// TERRIBLE HACK AHEAD
153132
// on OS X, the thread local storage is sometimes uninitialized
154133
// when the signal handler runs - it means it's impossible to read errno
155134
// or call any syscall or read PyThread_Current or pthread_self. Additionally,
156135
// it seems impossible to read the register gs.
157136
// here we register segfault handler (all guarded by a spinlock) and call
158137
// longjmp in case segfault happens while reading a thread local
138+
//
139+
// We do the same error detection for linux to ensure that
140+
// get_current_thread_state returns a sane result
159141
while (__sync_lock_test_and_set(&spinlock, 1)) {
160142
}
161-
signal(SIGSEGV, &segfault_handler);
143+
prevhandler = signal(SIGSEGV, &segfault_handler);
162144
int fault_code = setjmp(restore_point);
163145
if (fault_code == 0) {
164146
pthread_self();
165-
get_current_thread_state();
147+
tstate = PyGILState_GetThisThreadState();
166148
} else {
167-
signal(SIGSEGV, SIG_DFL);
168-
__sync_synchronize();
169-
spinlock = 0;
149+
signal(SIGSEGV, prevhandler);
150+
__sync_lock_release(&spinlock);
170151
return;
171152
}
172-
signal(SIGSEGV, SIG_DFL);
173-
__sync_synchronize();
174-
spinlock = 0;
175-
#endif
153+
signal(SIGSEGV, prevhandler);
154+
__sync_lock_release(&spinlock);
176155
long val = __sync_fetch_and_add(&signal_handler_value, 2L);
177156

178157
if ((val & 1) == 0) {
@@ -189,11 +168,11 @@ static void sigprof_handler(int sig_nr, siginfo_t* info, void *ucontext)
189168
struct prof_stacktrace_s *st = (struct prof_stacktrace_s *)p->data;
190169
st->marker = MARKER_STACKTRACE;
191170
st->count = 1;
192-
depth = get_stack_trace(st->stack, MAX_STACK_DEPTH-1, ucontext);
171+
depth = get_stack_trace(tstate, st->stack, MAX_STACK_DEPTH-1, ucontext);
193172
//st->stack[0] = GetPC((ucontext_t*)ucontext);
194173
// we gonna need that for pypy
195174
st->depth = depth;
196-
st->stack[depth++] = get_current_thread_id();
175+
st->stack[depth++] = tstate;
197176
long rss = get_current_proc_rss();
198177
if (rss >= 0)
199178
st->stack[depth++] = (void*)rss;

src/vmprof_main_win32.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,18 @@ int vmprof_snapshot_thread(DWORD thread_id, PyThreadState *tstate, prof_stacktra
7676
return depth;
7777
}
7878

79+
static
80+
PyThreadState * get_current_thread_state(void)
81+
{
82+
#if PY_MAJOR_VERSION < 3
83+
return _PyThreadState_Current;
84+
#elif PY_VERSION_HEX < 0x03050200
85+
return (PyThreadState*) _Py_atomic_load_relaxed(&_PyThreadState_Current);
86+
#else
87+
return _PyThreadState_UncheckedGet();
88+
#endif
89+
}
90+
7991
long __stdcall vmprof_mainloop(void *arg)
8092
{
8193
prof_stacktrace_s *stack = (prof_stacktrace_s*)malloc(SINGLE_BUF_SIZE);

vmprof/test/test_run.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,8 +112,8 @@ def test_nested_call():
112112
assert len(t[''].children) == 0
113113

114114
def test_multithreaded():
115-
if '__pypy__' in sys.builtin_module_names or PY3K:
116-
py.test.skip("not supported on pypy and python3 just yet")
115+
if '__pypy__' in sys.builtin_module_names:
116+
py.test.skip("not supported on pypy just yet")
117117
import threading
118118
finished = []
119119

0 commit comments

Comments
 (0)