restructure thread_local! for better codegen (especially on macos)
This commit is contained in:
parent
7acfb99adc
commit
dfe51a7249
4 changed files with 195 additions and 106 deletions
|
@ -2,10 +2,7 @@
|
|||
|
||||
#![unstable(feature = "thread_local_internals", issue = "0")]
|
||||
|
||||
use crate::cell::UnsafeCell;
|
||||
use crate::fmt;
|
||||
use crate::hint;
|
||||
use crate::mem;
|
||||
|
||||
/// A thread local storage key which owns its contents.
|
||||
///
|
||||
|
@ -92,10 +89,7 @@ pub struct LocalKey<T: 'static> {
|
|||
// trivially devirtualizable by LLVM because the value of `inner` never
|
||||
// changes and the constant should be readonly within a crate. This mainly
|
||||
// only runs into problems when TLS statics are exported across crates.
|
||||
inner: unsafe fn() -> Option<&'static UnsafeCell<Option<T>>>,
|
||||
|
||||
// initialization routine to invoke to create a value
|
||||
init: fn() -> T,
|
||||
inner: unsafe fn() -> Option<&'static T>,
|
||||
}
|
||||
|
||||
#[stable(feature = "std_debug", since = "1.16.0")]
|
||||
|
@ -159,10 +153,7 @@ macro_rules! __thread_local_inner {
|
|||
#[inline]
|
||||
fn __init() -> $t { $init }
|
||||
|
||||
unsafe fn __getit() -> $crate::option::Option<
|
||||
&'static $crate::cell::UnsafeCell<
|
||||
$crate::option::Option<$t>>>
|
||||
{
|
||||
unsafe fn __getit() -> $crate::option::Option<&'static $t> {
|
||||
#[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))]
|
||||
static __KEY: $crate::thread::__StaticLocalKeyInner<$t> =
|
||||
$crate::thread::__StaticLocalKeyInner::new();
|
||||
|
@ -182,11 +173,11 @@ macro_rules! __thread_local_inner {
|
|||
static __KEY: $crate::thread::__OsLocalKeyInner<$t> =
|
||||
$crate::thread::__OsLocalKeyInner::new();
|
||||
|
||||
__KEY.get()
|
||||
__KEY.get(__init)
|
||||
}
|
||||
|
||||
unsafe {
|
||||
$crate::thread::LocalKey::new(__getit, __init)
|
||||
$crate::thread::LocalKey::new(__getit)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -221,11 +212,9 @@ impl<T: 'static> LocalKey<T> {
|
|||
#[unstable(feature = "thread_local_internals",
|
||||
reason = "recently added to create a key",
|
||||
issue = "0")]
|
||||
pub const unsafe fn new(inner: unsafe fn() -> Option<&'static UnsafeCell<Option<T>>>,
|
||||
init: fn() -> T) -> LocalKey<T> {
|
||||
pub const unsafe fn new(inner: unsafe fn() -> Option<&'static T>) -> LocalKey<T> {
|
||||
LocalKey {
|
||||
inner,
|
||||
init,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -246,37 +235,6 @@ impl<T: 'static> LocalKey<T> {
|
|||
after it is destroyed")
|
||||
}
|
||||
|
||||
unsafe fn init(&self, slot: &UnsafeCell<Option<T>>) -> &T {
|
||||
// Execute the initialization up front, *then* move it into our slot,
|
||||
// just in case initialization fails.
|
||||
let value = (self.init)();
|
||||
let ptr = slot.get();
|
||||
|
||||
// note that this can in theory just be `*ptr = Some(value)`, but due to
|
||||
// the compiler will currently codegen that pattern with something like:
|
||||
//
|
||||
// ptr::drop_in_place(ptr)
|
||||
// ptr::write(ptr, Some(value))
|
||||
//
|
||||
// Due to this pattern it's possible for the destructor of the value in
|
||||
// `ptr` (e.g., if this is being recursively initialized) to re-access
|
||||
// TLS, in which case there will be a `&` and `&mut` pointer to the same
|
||||
// value (an aliasing violation). To avoid setting the "I'm running a
|
||||
// destructor" flag we just use `mem::replace` which should sequence the
|
||||
// operations a little differently and make this safe to call.
|
||||
mem::replace(&mut *ptr, Some(value));
|
||||
|
||||
// After storing `Some` we want to get a reference to the contents of
|
||||
// what we just stored. While we could use `unwrap` here and it should
|
||||
// always work it empirically doesn't seem to always get optimized away,
|
||||
// which means that using something like `try_with` can pull in
|
||||
// panicking code and cause a large size bloat.
|
||||
match *ptr {
|
||||
Some(ref x) => x,
|
||||
None => hint::unreachable_unchecked(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires a reference to the value in this TLS key.
|
||||
///
|
||||
/// This will lazily initialize the value if this thread has not referenced
|
||||
|
@ -293,13 +251,68 @@ impl<T: 'static> LocalKey<T> {
|
|||
F: FnOnce(&T) -> R,
|
||||
{
|
||||
unsafe {
|
||||
let slot = (self.inner)().ok_or(AccessError {
|
||||
let thread_local = (self.inner)().ok_or(AccessError {
|
||||
_private: (),
|
||||
})?;
|
||||
Ok(f(match *slot.get() {
|
||||
Some(ref inner) => inner,
|
||||
None => self.init(slot),
|
||||
}))
|
||||
Ok(f(thread_local))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mod lazy {
|
||||
use crate::cell::UnsafeCell;
|
||||
use crate::mem;
|
||||
use crate::hint;
|
||||
|
||||
pub struct LazyKeyInner<T> {
|
||||
inner: UnsafeCell<Option<T>>,
|
||||
}
|
||||
|
||||
impl<T> LazyKeyInner<T> {
|
||||
pub const fn new() -> LazyKeyInner<T> {
|
||||
LazyKeyInner {
|
||||
inner: UnsafeCell::new(None),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub unsafe fn get(&self) -> Option<&'static T> {
|
||||
(*self.inner.get()).as_ref()
|
||||
}
|
||||
|
||||
pub unsafe fn initialize<F: FnOnce() -> T>(&self, init: F) -> &'static T {
|
||||
// Execute the initialization up front, *then* move it into our slot,
|
||||
// just in case initialization fails.
|
||||
let value = init();
|
||||
let ptr = self.inner.get();
|
||||
|
||||
// note that this can in theory just be `*ptr = Some(value)`, but due to
|
||||
// the compiler will currently codegen that pattern with something like:
|
||||
//
|
||||
// ptr::drop_in_place(ptr)
|
||||
// ptr::write(ptr, Some(value))
|
||||
//
|
||||
// Due to this pattern it's possible for the destructor of the value in
|
||||
// `ptr` (e.g., if this is being recursively initialized) to re-access
|
||||
// TLS, in which case there will be a `&` and `&mut` pointer to the same
|
||||
// value (an aliasing violation). To avoid setting the "I'm running a
|
||||
// destructor" flag we just use `mem::replace` which should sequence the
|
||||
// operations a little differently and make this safe to call.
|
||||
mem::replace(&mut *ptr, Some(value));
|
||||
|
||||
// After storing `Some` we want to get a reference to the contents of
|
||||
// what we just stored. While we could use `unwrap` here and it should
|
||||
// always work it empirically doesn't seem to always get optimized away,
|
||||
// which means that using something like `try_with` can pull in
|
||||
// panicking code and cause a large size bloat.
|
||||
match *ptr {
|
||||
Some(ref x) => x,
|
||||
None => hint::unreachable_unchecked(),
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn take(&mut self) -> Option<T> {
|
||||
(*self.inner.get()).take()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -309,11 +322,12 @@ impl<T: 'static> LocalKey<T> {
|
|||
#[doc(hidden)]
|
||||
#[cfg(all(target_arch = "wasm32", not(target_feature = "atomics")))]
|
||||
pub mod statik {
|
||||
use super::lazy::LazyKeyInner;
|
||||
use crate::cell::UnsafeCell;
|
||||
use crate::fmt;
|
||||
|
||||
pub struct Key<T> {
|
||||
inner: UnsafeCell<Option<T>>,
|
||||
inner: LazyKeyInner<T>,
|
||||
}
|
||||
|
||||
unsafe impl<T> Sync for Key<T> { }
|
||||
|
@ -327,12 +341,17 @@ pub mod statik {
|
|||
impl<T> Key<T> {
|
||||
pub const fn new() -> Key<T> {
|
||||
Key {
|
||||
inner: UnsafeCell::new(None),
|
||||
inner: LazyKeyInner::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn get(&self) -> Option<&'static UnsafeCell<Option<T>>> {
|
||||
Some(&*(&self.inner as *const _))
|
||||
#[inline]
|
||||
pub unsafe fn get(&self, init: fn() -> T) -> Option<&'static T> {
|
||||
let value = match self.inner.get() {
|
||||
Some(ref value) => value,
|
||||
None => self.inner.initialize(init),
|
||||
};
|
||||
Some(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -340,19 +359,33 @@ pub mod statik {
|
|||
#[doc(hidden)]
|
||||
#[cfg(target_thread_local)]
|
||||
pub mod fast {
|
||||
use crate::cell::{Cell, UnsafeCell};
|
||||
use super::lazy::LazyKeyInner;
|
||||
use crate::cell::Cell;
|
||||
use crate::fmt;
|
||||
use crate::mem;
|
||||
use crate::ptr;
|
||||
use crate::sys::fast_thread_local::register_dtor;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
enum DtorState {
|
||||
Unregistered,
|
||||
Registered,
|
||||
RunningOrHasRun,
|
||||
}
|
||||
|
||||
pub struct Key<T> {
|
||||
inner: UnsafeCell<Option<T>>,
|
||||
// If `LazyKeyInner::get` returns `None`, that indicates either:
|
||||
// * The value has never been initialized
|
||||
// * The value is being recursively initialized
|
||||
// * The value has already been destroyed or is being destroyed
|
||||
// To determine which kind of `None`, check `dtor_state`.
|
||||
//
|
||||
// This is very optimizer friendly for the fast path - initialized but
|
||||
// not yet dropped.
|
||||
inner: LazyKeyInner<T>,
|
||||
|
||||
// Metadata to keep track of the state of the destructor. Remember that
|
||||
// these variables are thread-local, not global.
|
||||
dtor_registered: Cell<bool>,
|
||||
dtor_running: Cell<bool>,
|
||||
// this variable is thread-local, not global.
|
||||
dtor_state: Cell<DtorState>,
|
||||
}
|
||||
|
||||
impl<T> fmt::Debug for Key<T> {
|
||||
|
@ -364,45 +397,84 @@ pub mod fast {
|
|||
impl<T> Key<T> {
|
||||
pub const fn new() -> Key<T> {
|
||||
Key {
|
||||
inner: UnsafeCell::new(None),
|
||||
dtor_registered: Cell::new(false),
|
||||
dtor_running: Cell::new(false)
|
||||
inner: LazyKeyInner::new(),
|
||||
dtor_state: Cell::new(DtorState::Unregistered),
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn get(&self) -> Option<&'static UnsafeCell<Option<T>>> {
|
||||
if mem::needs_drop::<T>() && self.dtor_running.get() {
|
||||
return None
|
||||
#[inline]
|
||||
pub unsafe fn get<F: FnOnce() -> T>(&self, init: F) -> Option<&'static T> {
|
||||
match self.inner.get() {
|
||||
Some(val) => Some(val),
|
||||
None => {
|
||||
if mem::needs_drop::<T>() {
|
||||
self.try_initialize_drop(init)
|
||||
} else {
|
||||
Some(self.try_initialize_nodrop(init))
|
||||
}
|
||||
}
|
||||
}
|
||||
self.register_dtor();
|
||||
Some(&*(&self.inner as *const _))
|
||||
}
|
||||
|
||||
unsafe fn register_dtor(&self) {
|
||||
if !mem::needs_drop::<T>() || self.dtor_registered.get() {
|
||||
return
|
||||
// `try_initialize_nodrop` is only called once per fast thread local
|
||||
// variable, except in corner cases where it is being recursively
|
||||
// initialized.
|
||||
//
|
||||
// Macos: Inlining this function causes two `tlv_get_addr` calls to be
|
||||
// performed for every call to `Key::get`.
|
||||
// LLVM issue: https://bugs.llvm.org/show_bug.cgi?id=41722
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
unsafe fn try_initialize_nodrop<F: FnOnce() -> T>(&self, init: F) -> &'static T {
|
||||
self.inner.initialize(init)
|
||||
}
|
||||
|
||||
// `try_initialize_drop` is only called once per fast thread local
|
||||
// variable, except in corner cases where thread_local dtors reference
|
||||
// other thread_local's, or it is being recursively initialized.
|
||||
#[inline(never)]
|
||||
#[cold]
|
||||
unsafe fn try_initialize_drop<F: FnOnce() -> T>(&self, init: F) -> Option<&'static T> {
|
||||
// We don't put a `needs_drop` check around this and call it a day
|
||||
// because this function is not inlined. Unwrapping code gets
|
||||
// generated for callers of `LocalKey::with` even if we always
|
||||
// return `Some` here.
|
||||
match self.dtor_state.get() {
|
||||
DtorState::Unregistered => {
|
||||
// dtor registration happens before initialization.
|
||||
register_dtor(self as *const _ as *mut u8,
|
||||
destroy_value::<T>);
|
||||
self.dtor_state.set(DtorState::Registered);
|
||||
}
|
||||
DtorState::Registered => {
|
||||
// recursively initialized
|
||||
}
|
||||
DtorState::RunningOrHasRun => {
|
||||
return None
|
||||
}
|
||||
}
|
||||
|
||||
register_dtor(self as *const _ as *mut u8,
|
||||
destroy_value::<T>);
|
||||
self.dtor_registered.set(true);
|
||||
Some(self.inner.initialize(init))
|
||||
}
|
||||
}
|
||||
|
||||
unsafe extern fn destroy_value<T>(ptr: *mut u8) {
|
||||
let ptr = ptr as *mut Key<T>;
|
||||
// Right before we run the user destructor be sure to flag the
|
||||
// destructor as running for this thread so calls to `get` will return
|
||||
// `None`.
|
||||
(*ptr).dtor_running.set(true);
|
||||
|
||||
ptr::drop_in_place((*ptr).inner.get());
|
||||
|
||||
// Right before we run the user destructor be sure to set the
|
||||
// `Option<T>` to `None`, and `dtor_state` to `RunningOrHasRun`. This
|
||||
// causes future calls to `get` to run `try_initialize_drop` again,
|
||||
// which will now fail, and return `None`.
|
||||
let value = (*ptr).inner.take();
|
||||
(*ptr).dtor_state.set(DtorState::RunningOrHasRun);
|
||||
drop(value);
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub mod os {
|
||||
use crate::cell::{Cell, UnsafeCell};
|
||||
use super::lazy::LazyKeyInner;
|
||||
use crate::cell::Cell;
|
||||
use crate::fmt;
|
||||
use crate::marker;
|
||||
use crate::ptr;
|
||||
|
@ -423,8 +495,8 @@ pub mod os {
|
|||
unsafe impl<T> Sync for Key<T> { }
|
||||
|
||||
struct Value<T: 'static> {
|
||||
inner: LazyKeyInner<T>,
|
||||
key: &'static Key<T>,
|
||||
value: UnsafeCell<Option<T>>,
|
||||
}
|
||||
|
||||
impl<T: 'static> Key<T> {
|
||||
|
@ -435,24 +507,43 @@ pub mod os {
|
|||
}
|
||||
}
|
||||
|
||||
pub unsafe fn get(&'static self) -> Option<&'static UnsafeCell<Option<T>>> {
|
||||
pub unsafe fn get(&'static self, init: fn() -> T) -> Option<&'static T> {
|
||||
let ptr = self.os.get() as *mut Value<T>;
|
||||
if !ptr.is_null() {
|
||||
if ptr as usize == 1 {
|
||||
return None
|
||||
if ptr as usize > 1 {
|
||||
match (*ptr).inner.get() {
|
||||
Some(ref value) => return Some(value),
|
||||
None => {},
|
||||
}
|
||||
return Some(&(*ptr).value);
|
||||
}
|
||||
self.try_initialize(init)
|
||||
}
|
||||
|
||||
// `try_initialize` is only called once per os thread local variable,
|
||||
// except in corner cases where thread_local dtors reference other
|
||||
// thread_local's, or it is being recursively initialized.
|
||||
unsafe fn try_initialize(&'static self, init: fn() -> T) -> Option<&'static T> {
|
||||
let ptr = self.os.get() as *mut Value<T>;
|
||||
if ptr as usize == 1 {
|
||||
// destructor is running
|
||||
return None
|
||||
}
|
||||
|
||||
// If the lookup returned null, we haven't initialized our own
|
||||
// local copy, so do that now.
|
||||
let ptr: Box<Value<T>> = box Value {
|
||||
key: self,
|
||||
value: UnsafeCell::new(None),
|
||||
let ptr = if ptr.is_null() {
|
||||
// If the lookup returned null, we haven't initialized our own
|
||||
// local copy, so do that now.
|
||||
let ptr: Box<Value<T>> = box Value {
|
||||
inner: LazyKeyInner::new(),
|
||||
key: self,
|
||||
};
|
||||
let ptr = Box::into_raw(ptr);
|
||||
self.os.set(ptr as *mut u8);
|
||||
ptr
|
||||
} else {
|
||||
// recursive initialization
|
||||
ptr
|
||||
};
|
||||
let ptr = Box::into_raw(ptr);
|
||||
self.os.set(ptr as *mut u8);
|
||||
Some(&(*ptr).value)
|
||||
|
||||
Some((*ptr).inner.initialize(init))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue