Skip to content

Support thread local storage - Partially Fixes #291 #1019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/rp2_common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ if (NOT PICO_BARE_METAL)
pico_add_subdirectory(pico_mem_ops)
pico_add_subdirectory(pico_malloc)
pico_add_subdirectory(pico_printf)
pico_add_subdirectory(pico_tls)

pico_add_subdirectory(pico_stdio)
pico_add_subdirectory(pico_stdio_semihosting)
Expand Down
6 changes: 6 additions & 0 deletions src/rp2_common/pico_standard_link/memmap_blocked_ram.ld
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,14 @@ SECTIONS
*(.text*)
. = ALIGN(4);
*(.rodata*)

. = ALIGN(4);
/* emutls objects */
PROVIDE_HIDDEN (__emutls_array_start = .);
*(.*.__emutls_v.*)
PROVIDE_HIDDEN (__emutls_array_end = .);

. = ALIGN(4);
*(.data*)

. = ALIGN(4);
Expand Down
6 changes: 6 additions & 0 deletions src/rp2_common/pico_standard_link/memmap_copy_to_ram.ld
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,14 @@ SECTIONS

. = ALIGN(4);
*(.rodata*)

. = ALIGN(4);
/* emutls objects */
PROVIDE_HIDDEN (__emutls_array_start = .);
*(.*.__emutls_v.*)
PROVIDE_HIDDEN (__emutls_array_end = .);

. = ALIGN(4);
*(.data*)

. = ALIGN(4);
Expand Down
6 changes: 6 additions & 0 deletions src/rp2_common/pico_standard_link/memmap_default.ld
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,14 @@ SECTIONS
*(.text*)
. = ALIGN(4);
*(.rodata*)

. = ALIGN(4);
/* emutls objects */
PROVIDE_HIDDEN (__emutls_array_start = .);
*(.*.__emutls_v.*)
PROVIDE_HIDDEN (__emutls_array_end = .);

. = ALIGN(4);
*(.data*)

. = ALIGN(4);
Expand Down
9 changes: 9 additions & 0 deletions src/rp2_common/pico_standard_link/memmap_no_flash.ld
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ SECTIONS
__etext = .;
__data_start__ = .;
*(vtable)


. = ALIGN(4);
/* emutls objects */
PROVIDE_HIDDEN (__emutls_array_start = .);
*(.*.__emutls_v.*)
PROVIDE_HIDDEN (__emutls_array_end = .);

. = ALIGN(4);
*(.data*)

. = ALIGN(4);
Expand Down
38 changes: 38 additions & 0 deletions src/rp2_common/pico_tls/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
if (NOT TARGET pico_tls)
# library to be depended on - we make this depend on particular implementations using per target generator expressions
pico_add_impl_library(pico_tls)

# no custom implementation; falls thru to compiler
pico_add_impl_library(pico_tls_compiler)

# add alias "default" which is just core_thread.
add_library(pico_tls_default INTERFACE)
target_link_libraries(pico_tls_default INTERFACE pico_tls_core_thread)

set(PICO_DEFAULT_TLS_IMPL pico_tls_default)

target_link_libraries(pico_tls INTERFACE
$<IF:$<BOOL:$<TARGET_PROPERTY:PICO_TARGET_TLS_IMPL>>,$<TARGET_PROPERTY:PICO_TARGET_TLS_IMPL>,${PICO_DEFAULT_TLS_IMPL}>)

add_library(pico_tls_core_thread_explicit INTERFACE)
target_sources(pico_tls_core_thread_explicit INTERFACE
${CMAKE_CURRENT_LIST_DIR}/tls.c
${CMAKE_CURRENT_LIST_DIR}/tls.S
)

pico_add_impl_library(pico_tls_core_thread)

target_link_libraries(pico_tls_core_thread INTERFACE pico_tls_core_thread_explicit)

pico_wrap_function(pico_tls_core_thread __emutls_get_address)

# Call this to substitute an alternate implementation, e.g. if using an RTOS.
macro(pico_set_tls_implementation TARGET IMPL)
get_target_property(target_type ${TARGET} TYPE)
if ("EXECUTABLE" STREQUAL "${target_type}")
set_target_properties(${TARGET} PROPERTIES PICO_TARGET_TLS_IMPL "pico_tls_${IMPL}")
else()
message(FATAL_ERROR "tls implementation must be set on executable not library")
endif()
endmacro()
endif()
10 changes: 10 additions & 0 deletions src/rp2_common/pico_tls/tls.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#include "pico/asm_helper.S"

// This has to happen after memcpy() and memset() are available.
__pre_init tls_init, 10000
101 changes: 101 additions & 0 deletions src/rp2_common/pico_tls/tls.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#include "pico.h"

#include <assert.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>

// From emutls.c:
// 'For every TLS variable xyz, there is one __emutls_control variable named __emutls_v.xyz. If xyz has
// non-zero initial value, __emutls_v.xyz's "value" will point to __emutls_t.xyz, which has the initial value.'
//
// The linker script groups all the __emutls_v.xyz variables into a single array and provides symbols
// __emutls_array_start and __emutls_array_end, which can be used to iterate over the array. This allows
// the storage for each core's thread local variables to be pre-allocated and pre-initialized, which leaves
// minimal work for __wrap___emutls_get_address.
//
// This array is available to other TLS implementations too, such a TLS implementation for an RTOS.

// Same layout as libgcc __emutls_object. Unfortunately, __emutls_object doesn't appear in any header files.
typedef struct {
uint size;
uint align;
union {
struct {
uint offset;
void *template;
} s;
void* lookup[NUM_CORES];
} u;
} tls_object;

extern tls_object __emutls_array_start;
extern tls_object __emutls_array_end;

void tls_init(void);
void* __wrap___emutls_get_address(tls_object*);

// Must be called after it is safe to call memcpy & memset.
void tls_init(void) {
// Three passes:
// 1) Calculate the offset of each thread local variable and the total storage to be allocated for each thread.
uint offset = 0;
uint max_align = 1;
for (tls_object* object = &__emutls_array_start; object < &__emutls_array_end; ++object) {
assert((object->align & (object->align - 1)) == 0);

if (object->align > max_align) {
max_align = object->align;
}

offset = (offset + object->align - 1) & ~(object->align - 1);
object->u.s.offset = offset;
offset += object->size;
}

if (offset == 0) {
return;
}

// 2) Allocate storage for each thread and initialize the thread local variables to their initial value.
char* stores[NUM_CORES];
for (uint i = 0; i < NUM_CORES; ++i) {
// TODO: tls_init is invoked before pico_malloc's auto-initialized mutex has been initialized.
// However, aligned_alloc and memalign are not wrapped by pico_malloc so don't acquire or release
// the mutex so this works, though not for a satisfying reason.
//
// What I would like to do here, since malloc and friends ought not to be called at this point in
// initialization, is decrement the heap limit by the TLS storage size. At time of writing, the
// heap limit is &__StackLimit, i.e. static. It could be dynamic though.
char* storage = stores[i] = (char*) memalign(max_align, offset);

for (tls_object* object = &__emutls_array_start; object < &__emutls_array_end; ++object) {
if (object->u.s.template) {
memcpy(storage + object->u.s.offset, object->u.s.template, object->size);
} else {
memset(storage + object->u.s.offset, 0, object->size);
}
}
}

// 3) Repurpose the tls_objects so each contains a lookup table mapping from core index to pointer to
// thread local variable.
for (tls_object* object = &__emutls_array_start; object < &__emutls_array_end; ++object) {
uint offset = object->u.s.offset;
for (uint i = 0; i < NUM_CORES; ++i) {
object->u.lookup[i] = stores[i] + offset;
}
}
}

void* __wrap___emutls_get_address(tls_object* object) {
// TLS storage is not allocated for exceptions.
assert(!__get_current_exception());
return object->u.lookup[get_core_num()];
}
1 change: 1 addition & 0 deletions test/kitchen_sink/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ target_link_libraries(kitchen_sink_libs INTERFACE
pico_platform
pico_stdlib
pico_sync
pico_tls
pico_time
pico_unique_id
pico_util
Expand Down
16 changes: 16 additions & 0 deletions test/kitchen_sink/kitchen_sink.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,15 @@ __force_inline int something_inlined(int x) {
auto_init_mutex(mutex);
auto_init_recursive_mutex(recursive_mutex);

#ifndef __cplusplus
#define thread_local __thread
#endif

thread_local int initialized_tls_var = 7;
thread_local int __attribute__((section("other_section"))) other_section_tls_var = 7;
thread_local int uninitialized_tls_var;
thread_local int garbage_collected_tls_var;

int main(void) {
spiggle();

Expand All @@ -129,6 +138,13 @@ int main(void) {
hard_assert(!mutex_try_enter(&mutex, NULL));
hard_assert(recursive_mutex_try_enter(&recursive_mutex, NULL));
hard_assert(recursive_mutex_try_enter(&recursive_mutex, NULL));

hard_assert(initialized_tls_var == 7);
hard_assert(other_section_tls_var == 7);
hard_assert(uninitialized_tls_var == 0);
initialized_tls_var = 8;
hard_assert(initialized_tls_var == 8);

// this should compile as we are Cortex M0+
__asm volatile("SVC #3");

Expand Down