Skip to content

replace the system allocator in executables #18915

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mk/clean.mk
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ clean$(1)_T_$(2)_H_$(3): \
$$(foreach tool,$$(TOOLS),clean$(1)_T_$(2)_H_$(3)-tool-$$(tool))
$$(Q)rm -f $$(TLIB$(1)_T_$(2)_H_$(3))/libmorestack.a
$$(Q)rm -f $$(TLIB$(1)_T_$(2)_H_$(3))/libcompiler-rt.a
$$(Q)rm -f $$(TLIB$(1)_T_$(2)_H_$(3))/librust_malloc.a
$(Q)rm -f $$(TLIB$(1)_T_$(2)_H_$(3))/librun_pass_stage* # For unix
$(Q)rm -f $$(TLIB$(1)_T_$(2)_H_$(3))/run_pass_stage* # For windows

Expand Down
3 changes: 2 additions & 1 deletion mk/main.mk
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,8 @@ endif
TSREQ$(1)_T_$(2)_H_$(3) = \
$$(HSREQ$(1)_H_$(3)) \
$$(TLIB$(1)_T_$(2)_H_$(3))/libmorestack.a \
$$(TLIB$(1)_T_$(2)_H_$(3))/libcompiler-rt.a
$$(TLIB$(1)_T_$(2)_H_$(3))/libcompiler-rt.a \
$$(TLIB$(1)_T_$(2)_H_$(3))/librust_malloc.a

# Prerequisites for a working stageN compiler and libraries, for a specific
# target
Expand Down
3 changes: 2 additions & 1 deletion mk/rt.mk
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
# target.
################################################################################
NATIVE_LIBS := rust_builtin hoedown morestack miniz context_switch \
rustrt_native rust_test_helpers
rustrt_native rust_test_helpers rust_malloc

# $(1) is the target triple
define NATIVE_LIBRARIES
Expand All @@ -58,6 +58,7 @@ NATIVE_DEPS_rustrt_native_$(1) := \
arch/$$(HOST_$(1))/record_sp.S
NATIVE_DEPS_rust_test_helpers_$(1) := rust_test_helpers.c
NATIVE_DEPS_morestack_$(1) := arch/$$(HOST_$(1))/morestack.S
NATIVE_DEPS_rust_malloc_$(1) := rust_malloc.c
NATIVE_DEPS_context_switch_$(1) := \
arch/$$(HOST_$(1))/_context.S

Expand Down
6 changes: 6 additions & 0 deletions mk/target.mk
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ $$(TLIB$(1)_T_$(2)_H_$(3))/libmorestack.a: \
| $$(TLIB$(1)_T_$(2)_H_$(3))/ $$(SNAPSHOT_RUSTC_POST_CLEANUP)
@$$(call E, cp: $$@)
$$(Q)cp $$< $$@

$$(TLIB$(1)_T_$(2)_H_$(3))/librust_malloc.a: \
$$(RT_OUTPUT_DIR_$(2))/$$(call CFG_STATIC_LIB_NAME_$(2),rust_malloc) \
| $$(TLIB$(1)_T_$(2)_H_$(3))/ $$(SNAPSHOT_RUSTC_POST_CLEANUP)
@$$(call E, cp: $$@)
$$(Q)cp $$< $$@
endef

$(foreach source,$(CFG_HOST), \
Expand Down
9 changes: 9 additions & 0 deletions src/librustc/session/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,13 @@ macro_rules! cgoptions(
}
)*

fn parse_opt_toggle(slot: &mut Option<bool>, v: Option<&str>) -> bool {
match v.and_then(from_str) {
Some(b) => { *slot = Some(b); true },
None => false
}
}

fn parse_bool(slot: &mut bool, v: Option<&str>) -> bool {
match v {
Some(..) => false,
Expand Down Expand Up @@ -447,6 +454,8 @@ cgoptions!(
"print remarks for these optimization passes (space separated, or \"all\")"),
no_stack_check: bool = (false, parse_bool,
"disable checks for stack exhaustion (a memory-safety hazard!)"),
replace_allocator: Option<bool> = (None, parse_opt_toggle,
"attempt to replace the system allocator with jemalloc"),
)

pub fn build_codegen_options(matches: &getopts::Matches) -> CodegenOptions
Expand Down
2 changes: 2 additions & 0 deletions src/librustc/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub struct Session {
pub crate_types: RefCell<Vec<config::CrateType>>,
pub crate_metadata: RefCell<Vec<String>>,
pub features: RefCell<feature_gate::Features>,
pub use_std: Cell<bool>,

/// The maximum recursion limit for potentially infinitely recursive
/// operations such as auto-dereference and monomorphization.
Expand Down Expand Up @@ -252,6 +253,7 @@ pub fn build_session_(sopts: config::Options,
crate_types: RefCell::new(Vec::new()),
crate_metadata: RefCell::new(Vec::new()),
features: RefCell::new(feature_gate::Features::new()),
use_std: Cell::new(false),
recursion_limit: Cell::new(64),
};

Expand Down
1 change: 1 addition & 0 deletions src/librustc_back/target/linux_base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ pub fn opts() -> TargetOptions {
"-Wl,--as-needed".to_string(),
),
position_independent_executables: true,
weak_malloc: true,
.. Default::default()
}
}
3 changes: 3 additions & 0 deletions src/librustc_back/target/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ pub struct TargetOptions {
/// advantage of ASLR, as otherwise the functions in the executable are not randomized and can
/// be used during an exploit of a vulnerability in any code.
pub position_independent_executables: bool,
/// The platform allocator can be replaced via weak symbols
pub weak_malloc: bool
}

impl Default for TargetOptions {
Expand Down Expand Up @@ -191,6 +193,7 @@ impl Default for TargetOptions {
has_rpath: false,
no_compiler_rt: false,
position_independent_executables: false,
weak_malloc: false
}
}
}
Expand Down
13 changes: 13 additions & 0 deletions src/librustc_trans/back/link.rs
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,19 @@ fn link_args(cmd: &mut Command,
}
}

if !dylib && t.options.weak_malloc &&
sess.opts.cg.replace_allocator.unwrap_or(sess.use_std.get()) {
if t.options.is_like_osx {
let rust_malloc = lib_path.join("librust_malloc.a");

let mut v = b"-Wl,-force_load,".to_vec();
v.push_all(rust_malloc.as_vec());
cmd.arg(v.as_slice());
} else {
cmd.args(["-Wl,--whole-archive", "-lrust_malloc", "-Wl,--no-whole-archive"]);
}
}

// When linking a dynamic library, we put the metadata into a section of the
// executable. This metadata is in a separate object file from the main
// object file, so we link that in here.
Expand Down
1 change: 1 addition & 0 deletions src/librustc_trans/driver/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ pub fn phase_2_configure_and_expand(sess: &Session,
collect_crate_types(sess, krate.attrs.as_slice());
*sess.crate_metadata.borrow_mut() =
collect_crate_metadata(sess, krate.attrs.as_slice());
sess.use_std.set(syntax::std_inject::use_std(&krate));

time(time_passes, "gated feature checking", (), |_| {
let (features, unknown_features) =
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/std_inject.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ pub fn maybe_inject_prelude(krate: ast::Crate) -> ast::Crate {
}
}

fn use_std(krate: &ast::Crate) -> bool {
pub fn use_std(krate: &ast::Crate) -> bool {
!attr::contains_name(krate.attrs.as_slice(), "no_std")
}

Expand Down
111 changes: 111 additions & 0 deletions src/rt/rust_malloc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#include <stddef.h>

void *je_malloc(size_t size);
void *je_calloc(size_t num, size_t size);
int je_posix_memalign(void **memptr, size_t alignment, size_t size);
void *je_aligned_alloc(size_t alignment, size_t size);
void *je_realloc(void *ptr, size_t size);
void je_free(void *ptr);

void *je_mallocx(size_t size, int flags);
void *je_rallocx(void *ptr, size_t size, int flags);
size_t je_xallocx(void *ptr, size_t size, size_t extra, int flags);
size_t je_sallocx(const void *ptr, int flags);
void je_dallocx(void *ptr, int flags);
void je_sdallocx(void *ptr, size_t size, int flags);
size_t je_nallocx(size_t size, int flags);

int je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
int je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp);
int je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen);
void je_malloc_stats_print(void (*write_cb)(void *, const char *), void *je_cbopaque,
const char *opts);
size_t je_malloc_usable_size(const void *ptr);

void *je_memalign(size_t alignment, size_t size);
#if !defined(__ANDROID__)
void *je_valloc(size_t size);
#endif

void *malloc(size_t size) {
return je_malloc(size);
}

void *calloc(size_t num, size_t size) {
return je_calloc(num, size);
}

int posix_memalign(void **memptr, size_t alignment, size_t size) {
return je_posix_memalign(memptr, alignment, size);
}

void *aligned_alloc(size_t alignment, size_t size) {
return je_aligned_alloc(alignment, size);
}

void *realloc(void *ptr, size_t size) {
return je_realloc(ptr, size);
}

void free(void *ptr) {
je_free(ptr);
}

void *mallocx(size_t size, int flags) {
return je_mallocx(size, flags);
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How come this reexports a number of jemalloc symbols without the je_ prefix? I would expect the standard libc weak symbols to be exposed, but the jemalloc symbols aren't able to be overridden, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of the public symbols defined by jemalloc are weak symbols. I'm exporting these to address the demand that mallocx be usable as it is in vanilla jemalloc with no prefix.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you elaborate on this "demand" a little more? This is basically one of the possible shims rustc can inject, and the purpose is to override the system malloc/free, and I am unaware of the desire to export jemalloc-specific symbols as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's necessary for Rust's jemalloc to satisfy the needs of third party code calling into jemalloc. That was the primary argument against the last pull request...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If Rust doesn't do this, then third party code using jemalloc cannot be used. C libraries don't usually have versions in the symbol names, so you can't just have multiple copies living side-by-side without problems.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was under the impression that this "third party code" was primarily code in other processes that Rust itself was linked into. Either via a staticlib, dylib, or dlopen()'d dylib. Within a Rust executable itself (which this PR is focused on), however, I don't think that this would help too much. Libraries should be written knowing that the allocator is not their decision, and should plan appropriately (not relying on an upstream definition of jemalloc). Native code linked into an executable cannot rely on the existence of these symbols as the compiler is the one choosing whether to link in jemalloc or not, not the code itself.

Note that I'm just at this from the perspective of having this shim be as small as possible. I'd rather stick to well-known standardized apis like malloc than duplicate the nonstandard apis of jemalloc. If these were to fall out of sync with the jemalloc definitions, then I imagine badness could ensue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was under the impression that this "third party code" was primarily code in other processes that Rust itself was linked into.

If that code depends on jemalloc, then it will need to be using Rust's jemalloc.

I was under the impression that this "third party code" was primarily code in other processes that Rust itself was linked into. Either via a staticlib, dylib, or dlopen()'d dylib. Within a Rust executable itself (which this PR is focused on), however, I don't think that this would help too much. Libraries should be written knowing that the allocator is not their decision, and should plan appropriately (not relying on an upstream definition of jemalloc). Native code linked into an executable cannot rely on the existence of these symbols as the compiler is the one choosing whether to link in jemalloc or not, not the code itself.

The only argument against the previous one was that it would break code relying on mixing mallocx and free. The previous pull request was simpler and didn't have the added overhead of these wrapper functions. I'll just reopen it in favour of this one if that dubious argument has been abandoned.

I'd rather stick to well-known standardized apis like malloc than duplicate the nonstandard apis of jemalloc.

They are not "duplicated" in any way. It is manually removing the prefix because you rejected my pull request doing this the easy and low-overhead way by using the default configuration.

If these were to fall out of sync with the jemalloc definitions, then I imagine badness could ensue.

It's a stable API. There was a long deprecation period for the old experimental API before the shift to this one.


void *rallocx(void *ptr, size_t size, int flags) {
return je_rallocx(ptr, size, flags);
}

size_t xallocx(void *ptr, size_t size, size_t extra, int flags) {
return je_xallocx(ptr, size, extra, flags);
}

size_t sallocx(const void *ptr, int flags) {
return je_sallocx(ptr, flags);
}

void dallocx(void *ptr, int flags) {
je_dallocx(ptr, flags);
}

void sdallocx(void *ptr, size_t size, int flags) {
je_sdallocx(ptr, size, flags);
}

size_t nallocx(size_t size, int flags) {
return je_nallocx(size, flags);
}

int mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
return je_mallctl(name, oldp, oldlenp, newp, newlen);
}

int mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
return je_mallctlnametomib(name, mibp, miblenp);
}

int mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, void *newp,
size_t newlen) {
return je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen);
}

void malloc_stats_print(void (*write_cb)(void *, const char *), void *je_cbopaque,
const char *opts) {
return je_malloc_stats_print(write_cb, je_cbopaque, opts);
}

size_t malloc_usable_size(const void *ptr) {
return je_malloc_usable_size(ptr);
}

void *memalign(size_t alignment, size_t size) {
return je_memalign(alignment, size);
}

void *valloc(size_t size) {
return je_valloc(size);
}