Skip to content

Commit 0d687bd

Browse files
authored
Merge pull request rust-lang#190 from rust-lang/feature/more-simd
Feature/more simd
2 parents 14e0924 + 5f4777e commit 0d687bd

14 files changed

+226
-23
lines changed

Cargo.lock

Lines changed: 9 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
2727
# Local copy.
2828
#gccjit = { path = "../gccjit.rs" }
2929

30+
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
3031
target-lexicon = "0.10.0"
3132

3233
ar = "0.8.0"
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
From c3821e02fbd6cb5ad6e06d759fccdc9073712375 Mon Sep 17 00:00:00 2001
2+
From: Antoni Boucher <[email protected]>
3+
Date: Tue, 7 Jun 2022 21:40:13 -0400
4+
Subject: [PATCH] Add stdarch Cargo.toml for testing
5+
6+
---
7+
library/stdarch/Cargo.toml | 20 ++++++++++++++++++++
8+
1 file changed, 20 insertions(+)
9+
create mode 100644 library/stdarch/Cargo.toml
10+
11+
diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml
12+
new file mode 100644
13+
index 0000000..fbe0a95
14+
--- /dev/null
15+
+++ b/library/stdarch/Cargo.toml
16+
@@ -0,0 +1,20 @@
17+
+[workspace]
18+
+members = [
19+
+ "crates/core_arch",
20+
+ "crates/std_detect",
21+
+ "crates/stdarch-gen",
22+
+ "examples/"
23+
+]
24+
+exclude = [
25+
+ "crates/wasm-assert-instr-tests"
26+
+]
27+
+
28+
+[profile.release]
29+
+debug = true
30+
+opt-level = 3
31+
+incremental = true
32+
+
33+
+[profile.bench]
34+
+debug = 1
35+
+opt-level = 3
36+
+incremental = true
37+
--
38+
2.26.2.7.g19db9cfb68.dirty
39+

src/attributes.rs

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#[cfg_attr(not(feature="master"), allow(unused_imports))]
2+
use gccjit::FnAttribute;
3+
use gccjit::Function;
4+
use rustc_attr::InstructionSetAttr;
5+
use rustc_codegen_ssa::target_features::tied_target_features;
6+
use rustc_data_structures::fx::FxHashMap;
7+
use rustc_middle::ty;
8+
use rustc_session::Session;
9+
use rustc_span::symbol::sym;
10+
use smallvec::{smallvec, SmallVec};
11+
12+
use crate::context::CodegenCx;
13+
14+
// Given a map from target_features to whether they are enabled or disabled,
15+
// ensure only valid combinations are allowed.
16+
pub fn check_tied_features(sess: &Session, features: &FxHashMap<&str, bool>) -> Option<&'static [&'static str]> {
17+
for tied in tied_target_features(sess) {
18+
// Tied features must be set to the same value, or not set at all
19+
let mut tied_iter = tied.iter();
20+
let enabled = features.get(tied_iter.next().unwrap());
21+
if tied_iter.any(|feature| enabled != features.get(feature)) {
22+
return Some(tied);
23+
}
24+
}
25+
None
26+
}
27+
28+
// TODO(antoyo): maybe move to a new module gcc_util.
29+
// To find a list of GCC's names, check https://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html
30+
fn to_gcc_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
31+
let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
32+
match (arch, s) {
33+
("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
34+
("x86", "pclmulqdq") => smallvec!["pclmul"],
35+
("x86", "rdrand") => smallvec!["rdrnd"],
36+
("x86", "bmi1") => smallvec!["bmi"],
37+
("x86", "cmpxchg16b") => smallvec!["cx16"],
38+
("x86", "avx512vaes") => smallvec!["vaes"],
39+
("x86", "avx512gfni") => smallvec!["gfni"],
40+
("x86", "avx512vpclmulqdq") => smallvec!["vpclmulqdq"],
41+
// NOTE: seems like GCC requires 'avx512bw' for 'avx512vbmi2'.
42+
("x86", "avx512vbmi2") => smallvec!["avx512vbmi2", "avx512bw"],
43+
// NOTE: seems like GCC requires 'avx512bw' for 'avx512bitalg'.
44+
("x86", "avx512bitalg") => smallvec!["avx512bitalg", "avx512bw"],
45+
("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
46+
("aarch64", "dpb") => smallvec!["ccpp"],
47+
("aarch64", "dpb2") => smallvec!["ccdp"],
48+
("aarch64", "frintts") => smallvec!["fptoint"],
49+
("aarch64", "fcma") => smallvec!["complxnum"],
50+
("aarch64", "pmuv3") => smallvec!["perfmon"],
51+
("aarch64", "paca") => smallvec!["pauth"],
52+
("aarch64", "pacg") => smallvec!["pauth"],
53+
// Rust ties fp and neon together. In LLVM neon implicitly enables fp,
54+
// but we manually enable neon when a feature only implicitly enables fp
55+
("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
56+
("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
57+
("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
58+
("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
59+
("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
60+
("aarch64", "sve") => smallvec!["sve", "neon"],
61+
("aarch64", "sve2") => smallvec!["sve2", "neon"],
62+
("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
63+
("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
64+
("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
65+
("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
66+
(_, s) => smallvec![s],
67+
}
68+
}
69+
70+
/// Composite function which sets GCC attributes for function depending on its AST (`#[attribute]`)
71+
/// attributes.
72+
pub fn from_fn_attrs<'gcc, 'tcx>(
73+
cx: &CodegenCx<'gcc, 'tcx>,
74+
#[cfg_attr(not(feature="master"), allow(unused_variables))]
75+
func: Function<'gcc>,
76+
instance: ty::Instance<'tcx>,
77+
) {
78+
let codegen_fn_attrs = cx.tcx.codegen_fn_attrs(instance.def_id());
79+
80+
let function_features =
81+
codegen_fn_attrs.target_features.iter().map(|features| features.as_str()).collect::<Vec<&str>>();
82+
83+
if let Some(features) = check_tied_features(cx.tcx.sess, &function_features.iter().map(|features| (*features, true)).collect()) {
84+
let span = cx.tcx
85+
.get_attr(instance.def_id(), sym::target_feature)
86+
.map_or_else(|| cx.tcx.def_span(instance.def_id()), |a| a.span);
87+
let msg = format!("the target features {} must all be either enabled or disabled together", features.join(", "));
88+
let mut err = cx.tcx.sess.struct_span_err(span, &msg);
89+
err.help("add the missing features in a `target_feature` attribute");
90+
err.emit();
91+
return;
92+
}
93+
94+
let mut function_features = function_features
95+
.iter()
96+
.flat_map(|feat| to_gcc_features(cx.tcx.sess, feat).into_iter())
97+
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
98+
InstructionSetAttr::ArmA32 => "-thumb-mode", // TODO(antoyo): support removing feature.
99+
InstructionSetAttr::ArmT32 => "thumb-mode",
100+
}))
101+
.collect::<Vec<_>>();
102+
103+
// TODO(antoyo): check if we really need global backend features. (Maybe they could be applied
104+
// globally?)
105+
let mut global_features = cx.tcx.global_backend_features(()).iter().map(|s| s.as_str());
106+
function_features.extend(&mut global_features);
107+
let target_features = function_features.join(",");
108+
if !target_features.is_empty() {
109+
#[cfg(feature="master")]
110+
func.add_attribute(FnAttribute::Target, &target_features);
111+
}
112+
}

src/base.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,23 @@ pub fn compile_codegen_unit<'tcx>(tcx: TyCtxt<'tcx>, cgu_name: Symbol, supports_
8383
context.add_command_line_option("-mavx2");
8484
// FIXME(antoyo): the following causes an illegal instruction on vmovdqu64 in std_example on my CPU.
8585
// Only add if the CPU supports it.
86-
/*context.add_command_line_option("-mavx512f");
8786
context.add_command_line_option("-msha");
8887
context.add_command_line_option("-mpclmul");
8988
context.add_command_line_option("-mfma");
9089
context.add_command_line_option("-mfma4");
91-
context.add_command_line_option("-mavx512vpopcntdq");
92-
context.add_command_line_option("-mavx512vl");
9390
context.add_command_line_option("-m64");
94-
context.add_command_line_option("-mbmi");*/
91+
context.add_command_line_option("-mbmi");
92+
context.add_command_line_option("-mgfni");
93+
context.add_command_line_option("-mavxvnni");
94+
context.add_command_line_option("-mf16c");
95+
context.add_command_line_option("-maes");
96+
context.add_command_line_option("-mxsavec");
97+
context.add_command_line_option("-mbmi2");
98+
context.add_command_line_option("-mrtm");
99+
context.add_command_line_option("-mvaes");
100+
context.add_command_line_option("-mvpclmulqdq");
101+
context.add_command_line_option("-mavx");
102+
95103
for arg in &tcx.sess.opts.cg.llvm_args {
96104
context.add_command_line_option(arg);
97105
}

src/builder.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
213213

214214
let actual_ty = actual_val.get_type();
215215
if expected_ty != actual_ty {
216-
if !actual_ty.is_vector() && !expected_ty.is_vector() && actual_ty.is_integral() && expected_ty.is_integral() {
216+
if !actual_ty.is_vector() && !expected_ty.is_vector() && (actual_ty.is_integral() && expected_ty.is_integral()) || (actual_ty.get_pointee().is_some() && expected_ty.get_pointee().is_some()) {
217217
self.context.new_cast(None, actual_val, expected_ty)
218218
}
219219
else if on_stack_param_indices.contains(&index) {
@@ -1490,6 +1490,9 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
14901490
let zeros = self.context.new_rvalue_from_vector(None, cond_type, &zeros);
14911491

14921492
let masks = self.context.new_comparison(None, ComparisonOp::NotEquals, cond, zeros);
1493+
// NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
1494+
// the & operation work.
1495+
let masks = self.bitcast_if_needed(masks, then_val.get_type());
14931496
let then_vals = masks & then_val;
14941497

14951498
let ones = vec![self.context.new_rvalue_one(element_type); num_units];
@@ -1509,6 +1512,16 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
15091512
fn difference_or_zero<'gcc>(a: RValue<'gcc>, b: RValue<'gcc>, context: &'gcc Context<'gcc>) -> RValue<'gcc> {
15101513
let difference = a - b;
15111514
let masks = context.new_comparison(None, ComparisonOp::GreaterThanEquals, b, a);
1515+
// NOTE: masks is a vector of integers, but the values can be vectors of floats, so use bitcast to make
1516+
// the & operation work.
1517+
let a_type = a.get_type();
1518+
let masks =
1519+
if masks.get_type() != a_type {
1520+
context.new_bitcast(None, masks, a_type)
1521+
}
1522+
else {
1523+
masks
1524+
};
15121525
difference & masks
15131526
}
15141527

src/callee.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use rustc_middle::ty::{self, Instance, TypeFoldable};
44
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
55

66
use crate::abi::FnAbiGccExt;
7+
use crate::attributes;
78
use crate::context::CodegenCx;
89

910
/// Codegens a reference to a fn/method item, monomorphizing and
@@ -67,8 +68,12 @@ pub fn get_fn<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, instance: Instance<'tcx>)
6768
cx.linkage.set(FunctionType::Extern);
6869
let func = cx.declare_fn(&sym, &fn_abi);
6970

71+
attributes::from_fn_attrs(cx, func, instance);
72+
7073
// TODO(antoyo): set linkage and attributes.
71-
func
74+
75+
// FIXME(antoyo): this is a wrong cast. That requires changing the compiler API.
76+
unsafe { std::mem::transmute(func) }
7277
};
7378

7479
cx.function_instances.borrow_mut().insert(instance, func);

src/common.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,18 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
177177
}
178178

179179
let value = self.const_uint_big(self.type_ix(bitsize), data);
180-
// TODO(bjorn3): assert size is correct
181-
self.const_bitcast(value, ty)
180+
let bytesize = layout.size(self).bytes();
181+
if bitsize > 1 && ty.is_integral() && bytesize as u32 == ty.get_size() {
182+
// NOTE: since the intrinsic _xabort is called with a bitcast, which
183+
// is non-const, but expects a constant, do a normal cast instead of a bitcast.
184+
// FIXME(antoyo): fix bitcast to work in constant contexts.
185+
// TODO(antoyo): perhaps only use bitcast for pointers?
186+
self.context.new_cast(None, value, ty)
187+
}
188+
else {
189+
// TODO(bjorn3): assert size is correct
190+
self.const_bitcast(value, ty)
191+
}
182192
}
183193
Scalar::Ptr(ptr, _size) => {
184194
let (alloc_id, offset) = ptr.into_parts();

src/declare.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,12 +79,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
7979
unsafe { std::mem::transmute(func) }
8080
}
8181

82-
pub fn declare_fn(&self, name: &str, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> RValue<'gcc> {
82+
pub fn declare_fn(&self, name: &str, fn_abi: &FnAbi<'tcx, Ty<'tcx>>) -> Function<'gcc> {
8383
let (return_type, params, variadic, on_stack_param_indices) = fn_abi.gcc_type(self);
8484
let func = declare_raw_fn(self, name, () /*fn_abi.llvm_cconv()*/, return_type, &params, variadic);
8585
self.on_stack_function_params.borrow_mut().insert(func, on_stack_param_indices);
86-
// FIXME(antoyo): this is a wrong cast. That requires changing the compiler API.
87-
unsafe { std::mem::transmute(func) }
86+
func
8887
}
8988

9089
pub fn define_global(&self, name: &str, ty: Type<'gcc>, is_tls: bool, link_section: Option<Symbol>) -> LValue<'gcc> {

src/intrinsic/llvm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc
287287
pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(builder: &Builder<'a, 'gcc, 'tcx>, mut return_value: RValue<'gcc>, func_name: &str, args: &[RValue<'gcc>], args_adjusted: bool) -> RValue<'gcc> {
288288
match func_name {
289289
"__builtin_ia32_vfmaddss3_round" | "__builtin_ia32_vfmaddsd3_round" => {
290-
let zero = builder.context.new_rvalue_zero(builder.int_type);
291290
#[cfg(feature="master")]
292291
{
292+
let zero = builder.context.new_rvalue_zero(builder.int_type);
293293
return_value = builder.context.new_vector_access(None, return_value, zero).to_rvalue();
294294
}
295295
},

src/intrinsic/simd.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
use gccjit::{BinaryOp, RValue, Type, ToRValue, ComparisonOp, UnaryOp};
1+
#[cfg_attr(not(feature="master"), allow(unused_imports))]
2+
use gccjit::{ToRValue, ComparisonOp, UnaryOp};
3+
use gccjit::{BinaryOp, RValue, Type};
24
use rustc_codegen_ssa::base::compare_simd_types;
35
use rustc_codegen_ssa::common::{TypeKind, span_invalid_monomorphization_error};
46
use rustc_codegen_ssa::mir::operand::OperandRef;
@@ -216,7 +218,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
216218
let variable = bx.current_func().new_local(None, vector.get_type(), "new_vector");
217219
bx.llbb().add_assignment(None, variable, vector);
218220
let lvalue = bx.context.new_vector_access(None, variable.to_rvalue(), index);
219-
// TODO: if simd_insert is constant, use BIT_REF.
221+
// TODO(antoyo): if simd_insert is constant, use BIT_REF.
220222
bx.llbb().add_assignment(None, lvalue, value);
221223
return Ok(variable.to_rvalue());
222224
}
@@ -545,9 +547,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
545547
},
546548
(true, true) => {
547549
// Algorithm from: https://codereview.stackexchange.com/questions/115869/saturated-signed-addition
548-
// TODO: improve using conditional operators if possible.
550+
// TODO(antoyo): improve using conditional operators if possible.
549551
let arg_type = lhs.get_type();
550-
// TODO: convert lhs and rhs to unsigned.
552+
// TODO(antoyo): convert lhs and rhs to unsigned.
551553
let sum = lhs + rhs;
552554
let vector_type = arg_type.dyncast_vector().expect("vector type");
553555
let unit = vector_type.get_num_units();
@@ -581,7 +583,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(bx: &mut Builder<'a, 'gcc, 'tcx>,
581583
// negative of the right operand. Find a proper subtraction algorithm.
582584
let rhs = bx.context.new_unary_op(None, UnaryOp::Minus, arg_type, rhs);
583585

584-
// TODO: convert lhs and rhs to unsigned.
586+
// TODO(antoyo): convert lhs and rhs to unsigned.
585587
let sum = lhs + rhs;
586588
let vector_type = arg_type.dyncast_vector().expect("vector type");
587589
let unit = vector_type.get_num_units();

src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#![warn(unused_lifetimes)]
1414

1515
extern crate rustc_ast;
16+
extern crate rustc_attr;
1617
extern crate rustc_codegen_ssa;
1718
extern crate rustc_data_structures;
1819
extern crate rustc_errors;
@@ -32,6 +33,7 @@ mod abi;
3233
mod allocator;
3334
mod archive;
3435
mod asm;
36+
mod attributes;
3537
mod back;
3638
mod base;
3739
mod builder;
@@ -302,9 +304,11 @@ pub fn target_features(sess: &Session) -> Vec<Symbol> {
302304
.filter(|_feature| {
303305
// TODO(antoyo): implement a way to get enabled feature in libgccjit.
304306
// Probably using the equivalent of __builtin_cpu_supports.
307+
// TODO(antoyo): maybe use whatever outputs the following command:
308+
// gcc -march=native -Q --help=target
305309
#[cfg(feature="master")]
306310
{
307-
_feature.contains("sse") || _feature.contains("avx")
311+
(_feature.contains("sse") || _feature.contains("avx")) && !_feature.contains("avx512")
308312
}
309313
#[cfg(not(feature="master"))]
310314
{

0 commit comments

Comments
 (0)