|
41 | 41 | // no-pretty-expanded FIXME #15189
|
42 | 42 |
|
43 | 43 | #![allow(non_snake_case)]
|
| 44 | +#![feature(unboxed_closures, overloaded_calls)] |
44 | 45 |
|
45 |
| -use std::from_str::FromStr; |
46 |
| -use std::iter::count; |
47 |
| -use std::cmp::min; |
| 46 | +use std::iter::AdditiveIterator; |
| 47 | +use std::mem; |
48 | 48 | use std::os;
|
49 |
| -use std::sync::{Arc, RWLock}; |
| 49 | +use std::raw::Repr; |
| 50 | +use std::simd::f64x2; |
50 | 51 |
|
51 |
| -fn A(i: uint, j: uint) -> f64 { |
52 |
| - ((i + j) * (i + j + 1) / 2 + i + 1) as f64 |
| 52 | +fn main() { |
| 53 | + let args = os::args(); |
| 54 | + let answer = spectralnorm(if os::getenv("RUST_BENCH").is_some() { |
| 55 | + 5500 |
| 56 | + } else if args.len() < 2 { |
| 57 | + 2000 |
| 58 | + } else { |
| 59 | + from_str(args[1].as_slice()).unwrap() |
| 60 | + }); |
| 61 | + println!("{:.9f}", answer); |
53 | 62 | }
|
54 | 63 |
|
55 |
| -fn dot(v: &[f64], u: &[f64]) -> f64 { |
56 |
| - let mut sum = 0.0; |
57 |
| - for (&v_i, &u_i) in v.iter().zip(u.iter()) { |
58 |
| - sum += v_i * u_i; |
| 64 | +fn spectralnorm(n: uint) -> f64 { |
| 65 | + assert!(n % 2 == 0, "only even lengths are accepted"); |
| 66 | + let mut u = Vec::from_elem(n, 1.0); |
| 67 | + let mut v = Vec::from_elem(n, 1.0); |
| 68 | + let mut tmp = Vec::from_elem(n, 1.0); |
| 69 | + for _ in range(0u, 10) { |
| 70 | + mult_AtAv(u.as_slice(), v.as_mut_slice(), tmp.as_mut_slice()); |
| 71 | + mult_AtAv(v.as_slice(), u.as_mut_slice(), tmp.as_mut_slice()); |
59 | 72 | }
|
60 |
| - sum |
| 73 | + (dot(u.as_slice(), v.as_slice()) / dot(v.as_slice(), v.as_slice())).sqrt() |
61 | 74 | }
|
62 | 75 |
|
63 |
| -fn mult(v: Arc<RWLock<Vec<f64>>>, out: Arc<RWLock<Vec<f64>>>, |
64 |
| - f: fn(&Vec<f64>, uint) -> f64) { |
65 |
| - // We launch in different tasks the work to be done. To finish |
66 |
| - // this function, we need to wait for the completion of every |
67 |
| - // tasks. To do that, we give to each tasks a wait_chan that we |
68 |
| - // drop at the end of the work. At the end of this function, we |
69 |
| - // wait until the channel hang up. |
70 |
| - let (tx, rx) = channel(); |
71 |
| - |
72 |
| - let len = out.read().len(); |
73 |
| - let chunk = len / 20 + 1; |
74 |
| - for chk in count(0, chunk) { |
75 |
| - if chk >= len {break;} |
76 |
| - let tx = tx.clone(); |
77 |
| - let v = v.clone(); |
78 |
| - let out = out.clone(); |
79 |
| - spawn(proc() { |
80 |
| - for i in range(chk, min(len, chk + chunk)) { |
81 |
| - let val = f(&*v.read(), i); |
82 |
| - *out.write().get_mut(i) = val; |
83 |
| - } |
84 |
| - drop(tx) |
85 |
| - }); |
86 |
| - } |
87 |
| - |
88 |
| - // wait until the channel hang up (every task finished) |
89 |
| - drop(tx); |
90 |
| - for () in rx.iter() {} |
| 76 | +fn mult_AtAv(v: &[f64], out: &mut [f64], tmp: &mut [f64]) { |
| 77 | + mult_Av(v, tmp); |
| 78 | + mult_Atv(tmp, out); |
91 | 79 | }
|
92 | 80 |
|
93 |
| -fn mult_Av_impl(v: &Vec<f64> , i: uint) -> f64 { |
94 |
| - let mut sum = 0.; |
95 |
| - for (j, &v_j) in v.iter().enumerate() { |
96 |
| - sum += v_j / A(i, j); |
97 |
| - } |
98 |
| - sum |
| 81 | +fn mult_Av(v: &[f64], out: &mut [f64]) { |
| 82 | + parallel(out, |&: start, out| mult(v, out, start, |i, j| A(i, j))); |
99 | 83 | }
|
100 | 84 |
|
101 |
| -fn mult_Av(v: Arc<RWLock<Vec<f64>>>, out: Arc<RWLock<Vec<f64>>>) { |
102 |
| - mult(v, out, mult_Av_impl); |
| 85 | +fn mult_Atv(v: &[f64], out: &mut [f64]) { |
| 86 | + parallel(out, |&: start, out| mult(v, out, start, |i, j| A(j, i))); |
103 | 87 | }
|
104 | 88 |
|
105 |
| -fn mult_Atv_impl(v: &Vec<f64> , i: uint) -> f64 { |
106 |
| - let mut sum = 0.; |
107 |
| - for (j, &v_j) in v.iter().enumerate() { |
108 |
| - sum += v_j / A(j, i); |
| 89 | +fn mult(v: &[f64], out: &mut [f64], start: uint, a: |uint, uint| -> f64) { |
| 90 | + for (i, slot) in out.iter_mut().enumerate().map(|(i, s)| (i + start, s)) { |
| 91 | + let mut sum = f64x2(0.0, 0.0); |
| 92 | + for (j, chunk) in v.chunks(2).enumerate().map(|(j, s)| (2 * j, s)) { |
| 93 | + let top = f64x2(chunk[0], chunk[1]); |
| 94 | + let bot = f64x2(a(i, j), a(i, j + 1)); |
| 95 | + sum += top / bot; |
| 96 | + } |
| 97 | + let f64x2(a, b) = sum; |
| 98 | + *slot = a + b; |
109 | 99 | }
|
110 |
| - sum |
111 | 100 | }
|
112 | 101 |
|
113 |
| -fn mult_Atv(v: Arc<RWLock<Vec<f64>>>, out: Arc<RWLock<Vec<f64>>>) { |
114 |
| - mult(v, out, mult_Atv_impl); |
| 102 | +fn A(i: uint, j: uint) -> f64 { |
| 103 | + ((i + j) * (i + j + 1) / 2 + i + 1) as f64 |
115 | 104 | }
|
116 | 105 |
|
117 |
| -fn mult_AtAv(v: Arc<RWLock<Vec<f64>>>, out: Arc<RWLock<Vec<f64>>>, |
118 |
| - tmp: Arc<RWLock<Vec<f64>>>) { |
119 |
| - mult_Av(v, tmp.clone()); |
120 |
| - mult_Atv(tmp, out); |
| 106 | +fn dot(v: &[f64], u: &[f64]) -> f64 { |
| 107 | + v.iter().zip(u.iter()).map(|(a, b)| *a * *b).sum() |
121 | 108 | }
|
122 | 109 |
|
123 |
| -fn main() { |
124 |
| - let args = os::args(); |
125 |
| - let args = args.as_slice(); |
126 |
| - let n = if os::getenv("RUST_BENCH").is_some() { |
127 |
| - 5500 |
128 |
| - } else if args.len() < 2 { |
129 |
| - 2000 |
130 |
| - } else { |
131 |
| - FromStr::from_str(args[1].as_slice()).unwrap() |
132 |
| - }; |
133 |
| - let u = Arc::new(RWLock::new(Vec::from_elem(n, 1f64))); |
134 |
| - let v = Arc::new(RWLock::new(Vec::from_elem(n, 1f64))); |
135 |
| - let tmp = Arc::new(RWLock::new(Vec::from_elem(n, 1f64))); |
136 |
| - for _ in range(0u8, 10) { |
137 |
| - mult_AtAv(u.clone(), v.clone(), tmp.clone()); |
138 |
| - mult_AtAv(v.clone(), u.clone(), tmp.clone()); |
139 |
| - } |
| 110 | +// Executes a closure in parallel over the given mutable slice. The closure `f` |
| 111 | +// is run in parallel and yielded the starting index within `v` as well as a |
| 112 | +// sub-slice of `v`. |
| 113 | +fn parallel<'a, T, F>(v: &'a mut [T], f: F) |
| 114 | + where T: Send + Sync, |
| 115 | + F: Fn(uint, &'a mut [T]) + Sync { |
| 116 | + let (tx, rx) = channel(); |
| 117 | + let size = v.len() / os::num_cpus() + 1; |
140 | 118 |
|
141 |
| - let u = u.read(); |
142 |
| - let v = v.read(); |
143 |
| - println!("{:.9f}", (dot(u.as_slice(), v.as_slice()) / |
144 |
| - dot(v.as_slice(), v.as_slice())).sqrt()); |
| 119 | + for (i, chunk) in v.chunks_mut(size).enumerate() { |
| 120 | + let tx = tx.clone(); |
| 121 | + |
| 122 | + // Need to convert `f` and `chunk` to something that can cross the task |
| 123 | + // boundary. |
| 124 | + let f = &f as *const _ as *const uint; |
| 125 | + let raw = chunk.repr(); |
| 126 | + spawn(proc() { |
| 127 | + let f = f as *const F; |
| 128 | + unsafe { (*f)(i * size, mem::transmute(raw)) } |
| 129 | + drop(tx) |
| 130 | + }); |
| 131 | + } |
| 132 | + drop(tx); |
| 133 | + for () in rx.iter() {} |
145 | 134 | }
|
0 commit comments