Skip to content

Commit 9481c53

Browse files
committed
fix division on SPARC
1 parent 63ccaf1 commit 9481c53

File tree

3 files changed

+183
-8
lines changed

3 files changed

+183
-8
lines changed

src/int/specialized_div_rem/delegate.rs

+130
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,133 @@ macro_rules! impl_delegate {
185185
}
186186
};
187187
}
188+
189+
/// Returns `n / d` and sets `*rem = n % d`.
190+
///
191+
/// This specialization exists because:
192+
/// - The LLVM backend for 32-bit SPARC cannot compile functions that return `(u128, u128)`,
193+
/// so we have to use an old fashioned `&mut u128` argument to return the remainder.
194+
/// - 64-bit SPARC does not have u64 * u64 => u128 widening multiplication, which makes the
195+
/// delegate algorithm strategy the only reasonably fast way to perform `u128` division.
196+
#[doc(hidden)]
197+
pub fn u128_divide_sparc(duo: u128, div: u128, rem: &mut u128) -> u128 {
198+
use super::*;
199+
let duo_lo = duo as u64;
200+
let duo_hi = (duo >> 64) as u64;
201+
let div_lo = div as u64;
202+
let div_hi = (div >> 64) as u64;
203+
204+
match (div_lo == 0, div_hi == 0, duo_hi == 0) {
205+
(true, true, _) => zero_div_fn(),
206+
(_, false, true) => {
207+
*rem = duo;
208+
return 0;
209+
}
210+
(false, true, true) => {
211+
let tmp = u64_by_u64_div_rem(duo_lo, div_lo);
212+
*rem = tmp.1 as u128;
213+
return tmp.0 as u128;
214+
}
215+
(false, true, false) => {
216+
if duo_hi < div_lo {
217+
let norm_shift = u64_normalization_shift(div_lo, duo_hi, false);
218+
let shl = if norm_shift == 0 {
219+
64 - 1
220+
} else {
221+
64 - norm_shift
222+
};
223+
224+
let mut div: u128 = div << shl;
225+
let mut pow_lo: u64 = 1 << shl;
226+
let mut quo_lo: u64 = 0;
227+
let mut duo = duo;
228+
loop {
229+
let sub = duo.wrapping_sub(div);
230+
if 0 <= (sub as i128) {
231+
duo = sub;
232+
quo_lo |= pow_lo;
233+
let duo_hi = (duo >> 64) as u64;
234+
if duo_hi == 0 {
235+
let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
236+
*rem = tmp.1 as u128;
237+
return (quo_lo | tmp.0) as u128;
238+
}
239+
}
240+
div >>= 1;
241+
pow_lo >>= 1;
242+
}
243+
} else if duo_hi == div_lo {
244+
let tmp = u64_by_u64_div_rem(duo as u64, div as u64);
245+
*rem = tmp.1 as u128;
246+
return (1 << 64) | (tmp.0 as u128);
247+
} else {
248+
if (div_lo >> 32) == 0 {
249+
let div_0 = div_lo as u32 as u64;
250+
let (quo_hi, rem_3) = u64_by_u64_div_rem(duo_hi, div_0);
251+
252+
let duo_mid = ((duo >> 32) as u32 as u64) | (rem_3 << 32);
253+
let (quo_1, rem_2) = u64_by_u64_div_rem(duo_mid, div_0);
254+
255+
let duo_lo = (duo as u32 as u64) | (rem_2 << 32);
256+
let (quo_0, rem_1) = u64_by_u64_div_rem(duo_lo, div_0);
257+
258+
*rem = rem_1 as u128;
259+
return (quo_0 as u128) | ((quo_1 as u128) << 32) | ((quo_hi as u128) << 64);
260+
}
261+
262+
let duo_lo = duo as u64;
263+
let tmp = u64_by_u64_div_rem(duo_hi, div_lo);
264+
let quo_hi = tmp.0;
265+
let mut duo = (duo_lo as u128) | ((tmp.1 as u128) << 64);
266+
if duo < div {
267+
*rem = duo;
268+
return (quo_hi as u128) << 64;
269+
}
270+
271+
let mut div: u128 = div << (64 - 1);
272+
let mut pow_lo: u64 = 1 << (64 - 1);
273+
let mut quo_lo: u64 = 0;
274+
loop {
275+
let sub = duo.wrapping_sub(div);
276+
if 0 <= (sub as i128) {
277+
duo = sub;
278+
quo_lo |= pow_lo;
279+
let duo_hi = (duo >> 64) as u64;
280+
if duo_hi == 0 {
281+
let tmp = u64_by_u64_div_rem(duo as u64, div_lo);
282+
*rem = tmp.1 as u128;
283+
return (tmp.0) as u128 | (quo_lo as u128) | ((quo_hi as u128) << 64);
284+
}
285+
}
286+
div >>= 1;
287+
pow_lo >>= 1;
288+
}
289+
}
290+
}
291+
(_, false, false) => {
292+
if duo < div {
293+
*rem = duo;
294+
return 0;
295+
}
296+
let div_original = div;
297+
let shl = u64_normalization_shift(duo_hi, div_hi, false);
298+
let mut duo = duo;
299+
let mut div: u128 = div << shl;
300+
let mut pow_lo: u64 = 1 << shl;
301+
let mut quo_lo: u64 = 0;
302+
loop {
303+
let sub = duo.wrapping_sub(div);
304+
if 0 <= (sub as i128) {
305+
duo = sub;
306+
quo_lo |= pow_lo;
307+
if duo < div_original {
308+
*rem = duo;
309+
return quo_lo as u128;
310+
}
311+
}
312+
div >>= 1;
313+
pow_lo >>= 1;
314+
}
315+
}
316+
}
317+
}

src/int/specialized_div_rem/mod.rs

+21-8
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ mod binary_long;
4646

4747
#[macro_use]
4848
mod delegate;
49+
pub use self::delegate::u128_divide_sparc;
4950

5051
#[macro_use]
5152
mod trifecta;
@@ -60,10 +61,6 @@ fn zero_div_fn() -> ! {
6061
unsafe { core::hint::unreachable_unchecked() }
6162
}
6263

63-
// The `B` extension on RISC-V determines if a CLZ assembly instruction exists
64-
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
65-
const USE_LZ: bool = cfg!(target_feature = "b");
66-
6764
#[cfg(target_arch = "arm")]
6865
const USE_LZ: bool = if cfg!(target_feature = "thumb-mode") {
6966
// ARM thumb targets have CLZ instructions if the instruction set of ARMv6T2 is supported. This
@@ -77,8 +74,22 @@ const USE_LZ: bool = if cfg!(target_feature = "thumb-mode") {
7774
cfg!(target_feature = "v5te")
7875
};
7976

80-
// All other targets Rust supports have CLZ instructions
81-
#[cfg(not(any(target_arch = "arm", target_arch = "riscv32", target_arch = "riscv64")))]
77+
// LZD or LZCNT on SPARC only exists for the VIS 3 extension and later.
78+
#[cfg(any(target_arch = "sparc", target_arch = "sparc64"))]
79+
const USE_LZ: bool = cfg!(target_feature = "vis3");
80+
81+
// The `B` extension on RISC-V determines if a CLZ assembly instruction exists
82+
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
83+
const USE_LZ: bool = cfg!(target_feature = "b");
84+
85+
// All other common targets Rust supports should have CLZ instructions
86+
#[cfg(not(any(
87+
target_arch = "arm",
88+
target_arch = "sparc",
89+
target_arch = "sparc64",
90+
target_arch = "riscv32",
91+
target_arch = "riscv64"
92+
)))]
8293
const USE_LZ: bool = true;
8394

8495
impl_normalization_shift!(
@@ -115,8 +126,9 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
115126
// microarchitecture can multiply and divide. We decide to be optimistic and assume `trifecta` is
116127
// faster if the target pointer width is at least 64.
117128
#[cfg(all(
129+
not(any(target_pointer_width = "16", target_pointer_width = "32")),
118130
not(all(not(feature = "no-asm"), target_arch = "x86_64")),
119-
not(any(target_pointer_width = "16", target_pointer_width = "32"))
131+
not(any(target_arch = "sparc", target_arch = "sparc64"))
120132
))]
121133
impl_trifecta!(
122134
u128_div_rem,
@@ -131,8 +143,9 @@ impl_trifecta!(
131143
// If the pointer width less than 64, then the target architecture almost certainly does not have
132144
// the fast 64 to 128 bit widening multiplication needed for `trifecta` to be faster.
133145
#[cfg(all(
146+
any(target_pointer_width = "16", target_pointer_width = "32"),
134147
not(all(not(feature = "no-asm"), target_arch = "x86_64")),
135-
any(target_pointer_width = "16", target_pointer_width = "32")
148+
not(any(target_arch = "sparc", target_arch = "sparc64"))
136149
))]
137150
impl_delegate!(
138151
u128_div_rem,

src/int/udiv.rs

+32
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
pub use int::specialized_div_rem::u128_divide_sparc;
12
use int::specialized_div_rem::*;
23

34
intrinsics! {
@@ -45,7 +46,10 @@ intrinsics! {
4546
}
4647
quo_rem.0
4748
}
49+
}
4850

51+
#[cfg(not(any(target_arch = "sparc", target_arch = "sparc64")))]
52+
intrinsics! {
4953
#[win64_128bit_abi_hack]
5054
/// Returns `n / d`
5155
pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 {
@@ -68,3 +72,31 @@ intrinsics! {
6872
quo_rem.0
6973
}
7074
}
75+
76+
#[cfg(any(target_arch = "sparc", target_arch = "sparc64"))]
77+
intrinsics! {
78+
#[win64_128bit_abi_hack]
79+
/// Returns `n / d`
80+
pub extern "C" fn __udivti3(n: u128, d: u128) -> u128 {
81+
u128_divide_sparc(n, d, &mut 0)
82+
}
83+
84+
#[win64_128bit_abi_hack]
85+
/// Returns `n % d`
86+
pub extern "C" fn __umodti3(n: u128, d: u128) -> u128 {
87+
let mut rem = 0;
88+
u128_divide_sparc(n, d, &mut rem);
89+
rem
90+
}
91+
92+
#[win64_128bit_abi_hack]
93+
/// Returns `n / d` and sets `*rem = n % d`
94+
pub extern "C" fn __udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 {
95+
let mut tmp = 0;
96+
let quo = u128_divide_sparc(n, d, &mut tmp);
97+
if let Some(rem) = rem {
98+
*rem = tmp;
99+
}
100+
quo
101+
}
102+
}

0 commit comments

Comments
 (0)