Skip to content

Commit 711f531

Browse files
committed
doc: Touch up the unsafe guide
* Change ~ references to Box * Rewrite examples so they can be compiled an run * Mention libcore * Update wording about compiler-required functions
1 parent 1ccc51c commit 711f531

File tree

1 file changed

+144
-52
lines changed

1 file changed

+144
-52
lines changed

src/doc/guide-unsafe.md

Lines changed: 144 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,11 @@ Raw pointers have much fewer guarantees than other pointer types
8989
offered by the Rust language and libraries. For example, they
9090

9191
- are not guaranteed to point to valid memory and are not even
92-
guaranteed to be non-null (unlike both `~` and `&`);
93-
- do not have any automatic clean-up, unlike `~`, and so require
92+
guaranteed to be non-null (unlike both `Box` and `&`);
93+
- do not have any automatic clean-up, unlike `Box`, and so require
9494
manual resource management;
9595
- are plain-old-data, that is, they don't move ownership, again unlike
96-
`~`, hence the Rust compiler cannot protect against bugs like
96+
`Box`, hence the Rust compiler cannot protect against bugs like
9797
use-after-free;
9898
- are considered sendable (if their contents is considered sendable),
9999
so the compiler offers no assistance with ensuring their use is
@@ -189,7 +189,7 @@ code:
189189

190190
As an example, we give a reimplementation of owned boxes by wrapping
191191
`malloc` and `free`. Rust's move semantics and lifetimes mean this
192-
reimplementation is as safe as the built-in `~` type.
192+
reimplementation is as safe as the `Box` type.
193193

194194
```
195195
extern crate libc;
@@ -198,13 +198,14 @@ use std::mem;
198198
use std::ptr;
199199
200200
// Define a wrapper around the handle returned by the foreign code.
201-
// Unique<T> has the same semantics as ~T
201+
// Unique<T> has the same semantics as Box<T>
202202
pub struct Unique<T> {
203203
// It contains a single raw, mutable pointer to the object in question.
204204
ptr: *mut T
205205
}
206206
207207
// Implement methods for creating and using the values in the box.
208+
208209
// NB: For simplicity and correctness, we require that T has kind Send
209210
// (owned boxes relax this restriction, and can contain managed (GC) boxes).
210211
// This is because, as implemented, the garbage collector would not know
@@ -215,23 +216,26 @@ impl<T: Send> Unique<T> {
215216
let ptr = malloc(std::mem::size_of::<T>() as size_t) as *mut T;
216217
// we *need* valid pointer.
217218
assert!(!ptr.is_null());
218-
// `*ptr` is uninitialized, and `*ptr = value` would attempt to destroy it
219-
// `overwrite` moves a value into this memory without
220-
// attempting to drop the original value.
219+
// `*ptr` is uninitialized, and `*ptr = value` would
220+
// attempt to destroy it `overwrite` moves a value into
221+
// this memory without attempting to drop the original
222+
// value.
221223
mem::overwrite(&mut *ptr, value);
222224
Unique{ptr: ptr}
223225
}
224226
}
225227
226-
// the 'r lifetime results in the same semantics as `&*x` with ~T
228+
// the 'r lifetime results in the same semantics as `&*x` with
229+
// Box<T>
227230
pub fn borrow<'r>(&'r self) -> &'r T {
228231
// By construction, self.ptr is valid
229232
unsafe { &*self.ptr }
230233
}
231234
232-
// the 'r lifetime results in the same semantics as `&mut *x` with ~T
235+
// the 'r lifetime results in the same semantics as `&mut *x` with
236+
// Box<T>
233237
pub fn borrow_mut<'r>(&'r mut self) -> &'r mut T {
234-
unsafe { &mut*self.ptr }
238+
unsafe { &mut *self.ptr }
235239
}
236240
}
237241
@@ -246,7 +250,6 @@ impl<T: Send> Unique<T> {
246250
impl<T: Send> Drop for Unique<T> {
247251
fn drop(&mut self) {
248252
unsafe {
249-
250253
// Copy the object out from the pointer onto the stack,
251254
// where it is covered by normal Rust destructor semantics
252255
// and cleans itself up, if necessary
@@ -428,11 +431,9 @@ this is undesirable, and can be avoided with the `#![no_std]`
428431
attribute attached to the crate.
429432

430433
```ignore
431-
# // FIXME #12903: linking failures due to no_std
432-
// the minimal library
434+
// a minimal library
433435
#![crate_type="lib"]
434436
#![no_std]
435-
436437
# // fn main() {} tricked you, rustdoc!
437438
```
438439

@@ -444,20 +445,23 @@ default shim for the C `main` function with your own.
444445
The function marked `#[start]` is passed the command line parameters
445446
in the same format as a C:
446447

447-
```ignore
448-
# // FIXME #12903: linking failures due to no_std
448+
```
449449
#![no_std]
450450
451-
extern "rust-intrinsic" { fn abort() -> !; }
452-
#[no_mangle] pub extern fn rust_stack_exhausted() {
453-
unsafe { abort() }
454-
}
451+
// Pull in the system libc library for what crt0.o likely requires
452+
extern crate libc;
455453
454+
// Entry point for this program
456455
#[start]
457456
fn start(_argc: int, _argv: **u8) -> int {
458457
0
459458
}
460459
460+
// These functions are invoked by the compiler, but not
461+
// for a bare-bones hello world. These are normally
462+
// provided by libstd.
463+
#[lang = "stack_exhausted"] extern fn stack_exhausted() {}
464+
#[lang = "eh_personality"] extern fn eh_personality() {}
461465
# // fn main() {} tricked you, rustdoc!
462466
```
463467

@@ -467,29 +471,115 @@ correct ABI and the correct name, which requires overriding the
467471
compiler's name mangling too:
468472

469473
```ignore
470-
# // FIXME #12903: linking failures due to no_std
471474
#![no_std]
472475
#![no_main]
473476
474-
extern "rust-intrinsic" { fn abort() -> !; }
475-
#[no_mangle] pub extern fn rust_stack_exhausted() {
476-
unsafe { abort() }
477-
}
477+
extern crate libc;
478478
479479
#[no_mangle] // ensure that this symbol is called `main` in the output
480-
extern "C" fn main(_argc: int, _argv: **u8) -> int {
480+
pub extern fn main(argc: int, argv: **u8) -> int {
481481
0
482482
}
483483
484+
#[lang = "stack_exhausted"] extern fn stack_exhausted() {}
485+
#[lang = "eh_personality"] extern fn eh_personality() {}
484486
# // fn main() {} tricked you, rustdoc!
485487
```
486488

487489

488-
Unfortunately the Rust compiler assumes that symbols with certain
489-
names exist; and these have to be defined (or linked in). This is the
490-
purpose of the `rust_stack_exhausted`: it is called when a function
491-
detects that it will overflow its stack. The example above uses the
492-
`abort` intrinsic which ensures that execution halts.
490+
The compiler currently makes a few assumptions about symbols which are available
491+
in the executable to call. Normally these functions are provided by the standard
492+
library, but without it you must define your own.
493+
494+
The first of these two functions, `stack_exhausted`, is invoked whenever stack
495+
overflow is detected. This function has a number of restrictions about how it
496+
can be called and what it must do, but if the stack limit register is not being
497+
maintained then a task always has an "infinite stack" and this function
498+
shouldn't get triggered.
499+
500+
The second of these two functions, `eh_personality`, is used by the failure
501+
mechanisms of the compiler. This is often mapped to GCC's personality function
502+
(see the [libstd implementation](../std/rt/unwind/) for more information), but
503+
crates which do not trigger failure can be assured that this function is never
504+
called.
505+
506+
## Using libcore
507+
508+
> **Note**: the core library's structure is unstable, and it is recommended to
509+
> use the standard library instead wherever possible.
510+
511+
With the above techniques, we've got a bare-metal executable running some Rust
512+
code. There is a good deal of functionality provided by the standard library,
513+
however, that is necessary to be productive in Rust. If the standard library is
514+
not sufficient, then [libcore](../core/) is designed to be used instead.
515+
516+
The core library has very few dependencies and is much more portable than the
517+
standard library itself. Additionally, the core library has most of the
518+
necessary functionality for writing idiomatic and effective Rust code.
519+
520+
As an example, here is a program that will calculate the dot product of two
521+
vectors provided from C, using idiomatic Rust practices.
522+
523+
```
524+
#![no_std]
525+
526+
# extern crate libc;
527+
extern crate core;
528+
529+
use core::prelude::*;
530+
531+
use core::mem;
532+
use core::raw::Slice;
533+
534+
#[no_mangle]
535+
pub extern fn dot_product(a: *u32, a_len: u32,
536+
b: *u32, b_len: u32) -> u32 {
537+
// Convert the provided arrays into Rust slices.
538+
// The core::raw module guarantees that the Slice
539+
// structure has the same memory layout as a &[T]
540+
// slice.
541+
//
542+
// This is an unsafe operation because the compiler
543+
// cannot tell the pointers are valid.
544+
let (a_slice, b_slice): (&[u32], &[u32]) = unsafe {
545+
mem::transmute((
546+
Slice { data: a, len: a_len as uint },
547+
Slice { data: b, len: b_len as uint },
548+
))
549+
};
550+
551+
// Iterate over the slices, collecting the result
552+
let mut ret = 0;
553+
for (i, j) in a_slice.iter().zip(b_slice.iter()) {
554+
ret += (*i) * (*j);
555+
}
556+
return ret;
557+
}
558+
559+
#[lang = "begin_unwind"]
560+
extern fn begin_unwind(args: &core::fmt::Arguments,
561+
file: &str,
562+
line: uint) -> ! {
563+
loop {}
564+
}
565+
566+
#[lang = "stack_exhausted"] extern fn stack_exhausted() {}
567+
#[lang = "eh_personality"] extern fn eh_personality() {}
568+
# #[start] fn start(argc: int, argv: **u8) -> int { 0 }
569+
# fn main() {}
570+
```
571+
572+
Note that there is one extra lang item here which differs from the examples
573+
above, `begin_unwind`. This must be defined by consumers of libcore because the
574+
core library declares failure, but it does not define it. The `begin_unwind`
575+
lang item is this crate's definition of failure, and it must be guaranteed to
576+
never return.
577+
578+
As can be seen in this example, the core library is intended to provide the
579+
power of Rust in all circumstances, regardless of platform requirements. Further
580+
libraries, such as liballoc, add functionality to libcore which make other
581+
platform-specific assumptions, but continue to be more portable than the
582+
standard library itself.
493583

494584
# Interacting with the compiler internals
495585

@@ -512,6 +602,10 @@ libraries to interact directly with the compiler and vice versa:
512602

513603
## Intrinsics
514604

605+
> **Note**: intrinsics will forever have an unstable interface, it is
606+
> recommended to use the stable interfaces of libcore rather than intrinsics
607+
> directly.
608+
515609
These are imported as if they were FFI functions, with the special
516610
`rust-intrinsic` ABI. For example, if one was in a freestanding
517611
context, but wished to be able to `transmute` between types, and
@@ -530,36 +624,33 @@ As with any other FFI functions, these are always `unsafe` to call.
530624

531625
## Lang items
532626

627+
> **Note**: lang items are often provided by crates in the Rust distribution,
628+
> and lang items themselves have an unstable interface. It is recommended to use
629+
> officially distributed crates instead of defining your own lang items.
630+
533631
The `rustc` compiler has certain pluggable operations, that is,
534632
functionality that isn't hard-coded into the language, but is
535633
implemented in libraries, with a special marker to tell the compiler
536634
it exists. The marker is the attribute `#[lang="..."]` and there are
537635
various different values of `...`, i.e. various different "lang
538636
items".
539637

540-
For example, `~` pointers require two lang items, one for allocation
541-
and one for deallocation. A freestanding program that uses the `~`
638+
For example, `Box` pointers require two lang items, one for allocation
639+
and one for deallocation. A freestanding program that uses the `Box`
542640
sugar for dynamic allocations via `malloc` and `free`:
543641

544-
```ignore
545-
# // FIXME #12903: linking failures due to no_std
642+
```
546643
#![no_std]
547644
548-
#[allow(ctypes)] // `uint` == `size_t` on Rust's platforms
549-
extern {
550-
fn malloc(size: uint) -> *mut u8;
551-
fn free(ptr: *mut u8);
645+
extern crate libc;
552646
647+
extern {
553648
fn abort() -> !;
554649
}
555650
556-
#[no_mangle] pub extern fn rust_stack_exhausted() {
557-
unsafe { abort() }
558-
}
559-
560651
#[lang="exchange_malloc"]
561-
unsafe fn allocate(size: uint) -> *mut u8 {
562-
let p = malloc(size);
652+
unsafe fn allocate(size: uint, _align: uint) -> *mut u8 {
653+
let p = libc::malloc(size as libc::size_t) as *mut u8;
563654
564655
// malloc failed
565656
if p as uint == 0 {
@@ -569,18 +660,19 @@ unsafe fn allocate(size: uint) -> *mut u8 {
569660
p
570661
}
571662
#[lang="exchange_free"]
572-
unsafe fn deallocate(ptr: *mut u8) {
573-
free(ptr)
663+
unsafe fn deallocate(ptr: *mut u8, _size: uint, _align: uint) {
664+
libc::free(ptr as *mut libc::c_void)
574665
}
575666
576667
#[start]
577-
fn main(_argc: int, _argv: **u8) -> int {
578-
let _x = ~1;
668+
fn main(argc: int, argv: **u8) -> int {
669+
let x = box 1;
579670
580671
0
581672
}
582673
583-
# // fn main() {} tricked you, rustdoc!
674+
#[lang = "stack_exhausted"] extern fn stack_exhausted() {}
675+
#[lang = "eh_personality"] extern fn eh_personality() {}
584676
```
585677

586678
Note the use of `abort`: the `exchange_malloc` lang item is assumed to
@@ -602,6 +694,6 @@ Other features provided by lang items include:
602694
`contravariant_lifetime`, `no_share_bound`, etc.
603695

604696
Lang items are loaded lazily by the compiler; e.g. if one never uses
605-
`~` then there is no need to define functions for `exchange_malloc`
697+
`Box` then there is no need to define functions for `exchange_malloc`
606698
and `exchange_free`. `rustc` will emit an error when an item is needed
607699
but not found in the current crate or any that it depends on.

0 commit comments

Comments
 (0)