Skip to content
Merged
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
Fix an issue with thread identifiers being sign-extended on some platforms.
30 changes: 21 additions & 9 deletions Python/thread_pthread.h
Original file line numberDiff line numberDiff line change
Expand Up@@ -306,16 +306,33 @@ do_start_joinable_thread(void (*func)(void *), void *arg, pthread_t* out_id)
return 0;
}

/* Helper to convert pthread_t to PyThread_ident_t. POSIX allows pthread_t to be
non-arithmetic, e.g., musl typedefs it as a pointer. */
static PyThread_ident_t
_pthread_t_to_ident(pthread_t value){
// Cast through an integer type of the same size to avoid sign-extension.
#if SIZEOF_PTHREAD_T == SIZEOF_VOID_P
return (uintptr_t) value;
#elif SIZEOF_PTHREAD_T == SIZEOF_LONG
return (unsigned long) value;
#elif SIZEOF_PTHREAD_T == SIZEOF_INT
return (unsigned int) value;
#elif SIZEOF_PTHREAD_T == SIZEOF_LONG_LONG
return (unsigned long long) value;
#else
#error "Unsupported SIZEOF_PTHREAD_T value"
#endif
}

int
PyThread_start_joinable_thread(void (*func)(void *), void *arg,
PyThread_ident_t* ident, PyThread_handle_t* handle){
pthread_t th = (pthread_t) 0;
if (do_start_joinable_thread(func, arg, &th)){
return -1;
}
*ident = (PyThread_ident_t) th;
*ident = _pthread_t_to_ident(th);
*handle = (PyThread_handle_t) th;
assert(th == (pthread_t) *ident);
assert(th == (pthread_t) *handle);
return 0;
}
Expand All@@ -328,11 +345,7 @@ PyThread_start_new_thread(void (*func)(void *), void *arg)
return PYTHREAD_INVALID_THREAD_ID;
}
pthread_detach(th);
#if SIZEOF_PTHREAD_T <= SIZEOF_LONG
return (unsigned long) th;
#else
return (unsigned long) *(unsigned long *) &th;
#endif
return (unsigned long) _pthread_t_to_ident(th);
}

int
Expand All@@ -357,8 +370,7 @@ PyThread_get_thread_ident_ex(void){
if (!initialized)
PyThread_init_thread();
threadid = pthread_self();
assert(threadid == (pthread_t) (PyThread_ident_t) threadid);
return (PyThread_ident_t) threadid;
return _pthread_t_to_ident(threadid);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is potentially lossy compared to the previous version if SIZEOF_PTHREAD_T > SIZEOF_LONG. Previously we would cast directly to a PyThread_ident_t (an unsigned long long), whereas we now cast through an unsigned long.

Copy link
ContributorAuthor

@vfaziovfazioMar 31, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I understand your concern.

This was an edge case in PyThread_start_new_thread, my guess is that it was there for the situations where pthread_t was not a ulong and was, instead, a pointer or struct.

#ifSIZEOF_PTHREAD_T <= SIZEOF_LONGreturn (unsigned long) th; #elsereturn (unsigned long) *(unsigned long*) &th;

It's probably fine to drop the condition in _pthread_t_to_ident and let the caller here truncate it since this API is specifically returning a ULONG and not a PyThread_ident_t.

Looking at the history:

2565bff

635f6fb

The cast through ulong* was for Alpha OSF which was dropped from PEP 11 a few years ago (CPython 3.3) https://bugs.python.org/issue8606. My guess is it was also defined as a pointer or struct type and this was a way to work around it by returning at least long bytes.

I think if we find other platforms where we need to support this workaround, they can be chained to the MUSL #ifdef I think. The only time it would maybe be a problem is if sizeof(uintptr_t) < sizeof(ulong).

If others agree, I can drop the condition so the function looks like so:

staticPyThread_ident_t_pthread_t_to_ident(pthread_tvalue){PyThread_ident_tident; #if defined(__linux__) && !defined(__GLIBC__) ident= (PyThread_ident_t) (uintptr_t) value; assert(pthread_equal(value, (pthread_t) (uintptr_t) ident)); #elseident= (PyThread_ident_t) value; assert(pthread_equal(value, (pthread_t) ident)); #endifreturnident}

I do not suggest this as a long term solution; I do think we need to work towards making this opaque. I'm just trying to find something that is a stop-gap that can be ported back with relative ease that doesn't cause a regression.

}

unsigned long
Expand Down
Loading