rustc_serialize: fix incorrect signed LEB128 decoding

tgnottingham · tgnottingham · commit f15fae822e3b · 2021-01-11T12:13:26.000-08:00
The signed LEB128 decoding function used a hardcoded constant of 64
instead of the number of bits in the type of integer being decoded,
which resulted in incorrect results for some inputs. Fix this, make the
decoding more consistent with the unsigned version, and increase the
LEB128 encoding and decoding test coverage.
diff --git a/compiler/rustc_serialize/src/leb128.rs b/compiler/rustc_serialize/src/leb128.rs
@@ -119,28 +119,38 @@ impl_write_signed_leb128!(write_i64_leb128, i64);
 impl_write_signed_leb128!(write_i128_leb128, i128);
 impl_write_signed_leb128!(write_isize_leb128, isize);
 
-#[inline]
-pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) {
-    let mut result = 0;
-    let mut shift = 0;
-    let mut position = start_position;
-    let mut byte;
-
-    loop {
-        byte = data[position];
-        position += 1;
-        result |= i128::from(byte & 0x7F) << shift;
-        shift += 7;
-
-        if (byte & 0x80) == 0 {
-            break;
-        }
-    }
+macro_rules! impl_read_signed_leb128 {
+    ($fn_name:ident, $int_ty:ty) => {
+        #[inline]
+        pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) {
+            let mut result = 0;
+            let mut shift = 0;
+            let mut position = 0;
+            let mut byte;
+
+            loop {
+                byte = slice[position];
+                position += 1;
+                result |= <$int_ty>::from(byte & 0x7F) << shift;
+                shift += 7;
+
+                if (byte & 0x80) == 0 {
+                    break;
+                }
+            }
 
-    if (shift < 64) && ((byte & 0x40) != 0) {
-        // sign extend
-        result |= -(1 << shift);
-    }
+            if (shift < <$int_ty>::BITS) && ((byte & 0x40) != 0) {
+                // sign extend
+                result |= (!0 << shift);
+            }
 
-    (result, position - start_position)
+            (result, position)
+        }
+    };
 }
+
+impl_read_signed_leb128!(read_i16_leb128, i16);
+impl_read_signed_leb128!(read_i32_leb128, i32);
+impl_read_signed_leb128!(read_i64_leb128, i64);
+impl_read_signed_leb128!(read_i128_leb128, i128);
+impl_read_signed_leb128!(read_isize_leb128, isize);
diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs
@@ -17,6 +17,7 @@ Core encoding and decoding interfaces.
 #![feature(min_specialization)]
 #![feature(vec_spare_capacity)]
 #![feature(core_intrinsics)]
+#![feature(int_bits_const)]
 #![feature(maybe_uninit_slice)]
 #![feature(new_uninit)]
 #![cfg_attr(test, feature(test))]
diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs
@@ -1,4 +1,4 @@
-use crate::leb128::{self, max_leb128_len, read_signed_leb128};
+use crate::leb128::{self, max_leb128_len};
 use crate::serialize;
 use std::borrow::Cow;
 use std::fs::File;
@@ -561,22 +561,14 @@ impl<'a> Decoder<'a> {
     }
 }
 
-macro_rules! read_uleb128 {
+macro_rules! read_leb128 {
     ($dec:expr, $fun:ident) => {{
         let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position..]);
         $dec.position += bytes_read;
         Ok(value)
     }};
 }
 
-macro_rules! read_sleb128 {
-    ($dec:expr, $t:ty) => {{
-        let (value, bytes_read) = read_signed_leb128($dec.data, $dec.position);
-        $dec.position += bytes_read;
-        Ok(value as $t)
-    }};
-}
-
 impl<'a> serialize::Decoder for Decoder<'a> {
     type Error = String;
 
@@ -587,22 +579,22 @@ impl<'a> serialize::Decoder for Decoder<'a> {
 
     #[inline]
     fn read_u128(&mut self) -> Result<u128, Self::Error> {
-        read_uleb128!(self, read_u128_leb128)
+        read_leb128!(self, read_u128_leb128)
     }
 
     #[inline]
     fn read_u64(&mut self) -> Result<u64, Self::Error> {
-        read_uleb128!(self, read_u64_leb128)
+        read_leb128!(self, read_u64_leb128)
     }
 
     #[inline]
     fn read_u32(&mut self) -> Result<u32, Self::Error> {
-        read_uleb128!(self, read_u32_leb128)
+        read_leb128!(self, read_u32_leb128)
     }
 
     #[inline]
     fn read_u16(&mut self) -> Result<u16, Self::Error> {
-        read_uleb128!(self, read_u16_leb128)
+        read_leb128!(self, read_u16_leb128)
     }
 
     #[inline]
@@ -614,27 +606,27 @@ impl<'a> serialize::Decoder for Decoder<'a> {
 
     #[inline]
     fn read_usize(&mut self) -> Result<usize, Self::Error> {
-        read_uleb128!(self, read_usize_leb128)
+        read_leb128!(self, read_usize_leb128)
     }
 
     #[inline]
     fn read_i128(&mut self) -> Result<i128, Self::Error> {
-        read_sleb128!(self, i128)
+        read_leb128!(self, read_i128_leb128)
     }
 
     #[inline]
     fn read_i64(&mut self) -> Result<i64, Self::Error> {
-        read_sleb128!(self, i64)
+        read_leb128!(self, read_i64_leb128)
     }
 
     #[inline]
     fn read_i32(&mut self) -> Result<i32, Self::Error> {
-        read_sleb128!(self, i32)
+        read_leb128!(self, read_i32_leb128)
     }
 
     #[inline]
     fn read_i16(&mut self) -> Result<i16, Self::Error> {
-        read_sleb128!(self, i16)
+        read_leb128!(self, read_i16_leb128)
     }
 
     #[inline]
@@ -646,7 +638,7 @@ impl<'a> serialize::Decoder for Decoder<'a> {
 
     #[inline]
     fn read_isize(&mut self) -> Result<isize, Self::Error> {
-        read_sleb128!(self, isize)
+        read_leb128!(self, read_isize_leb128)
     }
 
     #[inline]
diff --git a/compiler/rustc_serialize/tests/leb128.rs b/compiler/rustc_serialize/tests/leb128.rs
@@ -1,3 +1,4 @@
+#![feature(int_bits_const)]
 #![feature(maybe_uninit_slice)]
 #![feature(maybe_uninit_uninit_array)]
 
@@ -8,16 +9,28 @@ macro_rules! impl_test_unsigned_leb128 {
     ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => {
         #[test]
         fn $test_name() {
+            // Test 256 evenly spaced values of integer range,
+            // integer max value, and some "random" numbers.
+            let mut values = Vec::new();
+
+            let increment = (1 as $int_ty) << ($int_ty::BITS - 8);
+            values.extend((0..256).map(|i| $int_ty::MIN + i * increment));
+
+            values.push($int_ty::MAX);
+
+            values.extend(
+                (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFu64 as $int_ty)),
+            );
+
             let mut stream = Vec::new();
 
-            for x in 0..62 {
+            for &x in &values {
                 let mut buf = MaybeUninit::uninit_array();
-                stream.extend($write_fn_name(&mut buf, (3u64 << x) as $int_ty));
+                stream.extend($write_fn_name(&mut buf, x));
             }
 
             let mut position = 0;
-            for x in 0..62 {
-                let expected = (3u64 << x) as $int_ty;
+            for &expected in &values {
                 let (actual, bytes_read) = $read_fn_name(&stream[position..]);
                 assert_eq!(expected, actual);
                 position += bytes_read;
@@ -34,12 +47,28 @@ impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128
 impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize);
 
 macro_rules! impl_test_signed_leb128 {
-    ($test_name:ident, $write_fn_name:ident, $int_ty:ident) => {
+    ($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => {
         #[test]
         fn $test_name() {
-            let values: Vec<_> = (-500..500)
-                .map(|i| ((i as $int_ty).wrapping_mul(0x12345789ABCDEF_i64 as $int_ty)))
-                .collect();
+            // Test 256 evenly spaced values of integer range,
+            // integer max value, and some "random" numbers.
+            let mut values = Vec::new();
+
+            let mut value = $int_ty::MIN;
+            let increment = (1 as $int_ty) << ($int_ty::BITS - 8);
+
+            for _ in 0..256 {
+                values.push(value);
+                // The addition in the last loop iteration overflows.
+                value = value.wrapping_add(increment);
+            }
+
+            values.push($int_ty::MAX);
+
+            values.extend(
+                (-500..500).map(|i| (i as $int_ty).wrapping_mul(0x12345789ABCDEFi64 as $int_ty)),
+            );
+
             let mut stream = Vec::new();
 
             for &x in &values {
@@ -48,9 +77,8 @@ macro_rules! impl_test_signed_leb128 {
             }
 
             let mut position = 0;
-            for &x in &values {
-                let expected = x as i128;
-                let (actual, bytes_read) = read_signed_leb128(&mut stream, position);
+            for &expected in &values {
+                let (actual, bytes_read) = $read_fn_name(&stream[position..]);
                 assert_eq!(expected, actual);
                 position += bytes_read;
             }
@@ -59,8 +87,8 @@ macro_rules! impl_test_signed_leb128 {
     };
 }
 
-impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, i16);
-impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, i32);
-impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, i64);
-impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, i128);
-impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, isize);
+impl_test_signed_leb128!(test_i16_leb128, write_i16_leb128, read_i16_leb128, i16);
+impl_test_signed_leb128!(test_i32_leb128, write_i32_leb128, read_i32_leb128, i32);
+impl_test_signed_leb128!(test_i64_leb128, write_i64_leb128, read_i64_leb128, i64);
+impl_test_signed_leb128!(test_i128_leb128, write_i128_leb128, read_i128_leb128, i128);
+impl_test_signed_leb128!(test_isize_leb128, write_isize_leb128, read_isize_leb128, isize);