Skip to content

rustc has wrong signature for C function with 16-byte aligned stack argument in x86_64 Linux #80127

Closed
@bobbobbio

Description

@bobbobbio

We were writing some code which interacts with a C shared library, but when we were calling a function in the library from Rust, we found our program was crashing. Upon inspection in the debugger, it seems that some of the arguments were getting corrupted. The Rust function signature looked correct so it was surprising. I have managed to reduce the issue down to a minimal repro.

#[repr(C)]
#[repr(align(16))]
#[derive(Debug, Copy, Clone)]
pub struct bar {
    pub a: ::std::os::raw::c_ulong,
    pub b: ::std::os::raw::c_ulong,
}

#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct baz {
    pub a: bool,
    pub b: ::std::os::raw::c_uint,
}

extern "C" {
    pub fn foo_func(
        a: *mut ::std::os::raw::c_void,
        b: *mut ::std::os::raw::c_void,
        c: *const ::std::os::raw::c_char,
        d: ::std::os::raw::c_ulong,
        e: bool,
        f: baz,
        g: *mut ::std::os::raw::c_void,
        h: bar,
        i: *mut ::std::os::raw::c_void,
        j: *mut ::std::os::raw::c_void,
        k: *mut ::std::os::raw::c_void,
        l: *mut ::std::os::raw::c_void,
        m: *const ::std::os::raw::c_char,
    ) -> ::std::os::raw::c_int;
}

fn main() {
    let f = baz { a: true, b: 67 };
    let h = bar { a: 0, b: 99 };
    let m = std::ffi::CString::new("Hello, world").unwrap();
    unsafe {
        foo_func(
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            12,
            true,
            f,
            std::ptr::null_mut(),
            h,
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            std::ptr::null_mut(),
            m.as_ptr(),
        )
    };
}

foo.h

#include <stdbool.h>

struct bar {
    unsigned long a;
    unsigned long b;
}  __attribute__((aligned(16)));

struct baz {
    bool a;
    unsigned b;
};

int
foo_func(
    void *a,
    void *b,
    const char *c,
    unsigned long d,
    bool e,
    struct baz f,
    void *g,
    struct bar h,
    void *i,
    void *j,
    void *k,
    void *l,
    const char *m);

foo.c

#include <foo.h>
#include <stdio.h>

int
foo_func(
    void *a,
    void *b,
    const char *c,
    unsigned long d,
    bool e,
    struct baz f,
    void *g,
    struct bar h,
    void *i,
    void *j,
    void *k,
    void *l,
    const char *m)
{
    printf("m = %s", m);
    return 0;
}

I compiled the C code into a shared library using clang
clang -shared foo.c -o libfoo.so -I. -g

here is the version of clang

clang version 11.0.0
Target: x86_64-unknown-linux-gnu
Thread model: posix

I compiled the Rust code using cargo and rustc 1.46.0 (04488afe3 2020-08-24), but told it to link against libfoo.so
cargo:rustc-link-lib=dylib=foo

I set an rpath on the binary so it can find the library and when I run it, it crashes with a stack overflow

thread 'main' has overflowed its stack
fatal runtime error: stack overflow
fish: “target/debug/bug_repro” terminated by signal SIGABRT (Abort)

Opening it up in the debugger we can see the argument corruption

In this case the stack overflow happens in printf

#0  __strlen_avx2 () at ../sysdeps/x86_64/multiarch/strlen-avx2.S:65
#1  0x00007ffff7bf0d8e in __vfprintf_internal (s=0x7ffff7d5d760 <_IO_2_1_stdout_>, format=0x7ffff7dca5f5 "m = %s", ap=ap@entry=0x7fffffffe590, mode_flags=mode_flags@entry=0) at vfprintf-internal.c:1645
#2  0x00007ffff7bda8d8 in __printf (format=<optimized out>) at printf.c:33
#3  0x00007ffff7dca5d0 in foo_func (a=0x0, b=0x0, c=0x0, d=12, e=true, f=..., g=0x0, h=..., i=0x0, j=0x0, k=0x0, l=0x555555815b40, m=0x7fffff7fe000 <error: Cannot access memory at address 0x7fffff7fe000>) at foo.c:20
#4  0x00005555555597b9 in bug_repro::main () at src/main.rs:39

Comparing the local variables and arguments of the two frames

(gdb) info locals
m = std::ffi::c_str::CString {inner: alloc::boxed::Box<[u8]> {data_ptr: 0x555555815b40 "Hello, world\000", length: 13}}
h = bug_repro::bar {a: 0, b: 99}
f = bug_repro::baz {a: true, b: 67}
(gdb) frame 3
#3  0x00007ffff7dca5d0 in foo_func (a=0x0, b=0x0, c=0x0, d=12, e=true, f=..., g=0x0, h=..., i=0x0, j=0x0, k=0x0, l=0x555555815b40, m=0x7fffff7fe000 <error: Cannot access memory at address 0x7fffff7fe000>) at foo.c:20
20          printf("m = %s", m);
(gdb) info args
a = 0x0
b = 0x0
c = 0x0
d = 12
e = true
f = {a = true, b = 67}
g = 0x0
h = {a = 99, b = 0}
i = 0x0
j = 0x0
k = 0x0
l = 0x555555815b40
m = 0x7fffff7fe000 <error: Cannot access memory at address 0x7fffff7fe000>
(gdb)

You can see things start going off the rails starting with argument h which doesn't have the right value, and every argument after that is wrong. Including m which has some garbage stack data, so its no surprise the stack overflowed when printf was reading at m

If we take a look at the llvm-ir we can see the problem

The Rust declaration of the function looks like this

; Function Attrs: nounwind nonlazybind uwtable
declare i32 @foo_func(i8*, i8*, i8*, i64, i1 zeroext, i64, i8*, %bar* noalias nocapture byval(%bar) dereferenceable(16), i8*, i8*, i8*, i8*, i8*) unnamed_addr #3

and the C definition of the function looks like this

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @foo_func(i8* %0, i8* %1, i8* %2, i64 %3, i1 zeroext %4, i64 %5, i8* %6, %struct.bar* byval(%struct.bar) align 16 %7, i8* %8, i8* %9, i8* %10, i8* %11, i8* %12) #0 {

In my limited understanding of this, the only difference is the align 16 for the byval argument which Rust is missing. I can find this mentioned in the LLVM reference

The byval attribute also supports specifying an alignment with the align attribute. It indicates the alignment of the stack slot to form and the known alignment of the pointer specified to the call site. If the alignment is not specified, then the code generator makes a target-specific assumption.

So, since the argument is being passed via the stack, it seems that the alignment of the stack storage can be specified. I wasn't sure at first if this mattered but going back and looking at the assembly produced I was able to determine that it does.

On the C side of the function call we can see that h has this address

(gdb) print &h
$2 = (struct bar *) 0x7fffffffe720

if we subtract 8 from this address, we can see we then get the right value

(gdb) print *(struct bar *)(((void *)&h) - 8)
$5 = {a = 0, b = 99}

The function we are calling is expecting the argument to be at $rbp + 0x20, but the Rust call-site is putting it at $rbp + 0x18 (which is not a 16-byte aligned address) So I assume this missing alignment is causing issues because the caller and callee don't agree where to look on the stack for the argument

This issue has been assigned to @pcwalton via this comment.

Metadata

Metadata

Assignees

Labels

A-FFIArea: Foreign function interface (FFI)C-bugCategory: This is a bug.I-unsoundIssue: A soundness hole (worst kind of bug), see: https://en.wikipedia.org/wiki/SoundnessP-highHigh priorityT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions