diff --git a/src/libextra/extra.rs b/src/libextra/extra.rs index 50c57b28d2235..c4f72d10a8ae5 100644 --- a/src/libextra/extra.rs +++ b/src/libextra/extra.rs @@ -74,7 +74,6 @@ pub mod deque; pub mod fun_treemap; pub mod list; pub mod priority_queue; -pub mod rope; pub mod smallintmap; pub mod sort; diff --git a/src/libextra/rope.rs b/src/libextra/rope.rs deleted file mode 100644 index dd3f08917fd48..0000000000000 --- a/src/libextra/rope.rs +++ /dev/null @@ -1,1442 +0,0 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -/*! - * High-level text containers. - * - * Ropes are a high-level representation of text that offers - * much better performance than strings for common operations, - * and generally reduce memory allocations and copies, while only - * entailing a small degradation of less common operations. - * - * More precisely, where a string is represented as a memory buffer, - * a rope is a tree structure whose leaves are slices of immutable - * strings. Therefore, concatenation, appending, prepending, substrings, - * etc. are operations that require only trivial tree manipulation, - * generally without having to copy memory. In addition, the tree - * structure of ropes makes them suitable as a form of index to speed-up - * access to Unicode characters by index in long chunks of text. - * - * The following operations are algorithmically faster in ropes: - * - * * extracting a subrope is logarithmic (linear in strings); - * * appending/prepending is near-constant time (linear in strings); - * * concatenation is near-constant time (linear in strings); - * * char length is constant-time (linear in strings); - * * access to a character by index is logarithmic (linear in strings); - */ - -#[allow(missing_doc)]; - - -use std::uint; -use std::vec; -use std::str; - -/// The type of ropes. -pub type Rope = node::Root; - -/* - Section: Creating a rope - */ - -/// Create an empty rope -pub fn empty() -> Rope { - return node::Empty; -} - -/** - * Adopt a string as a rope. - * - * # Arguments - * - * * str - A valid string. - * - * # Return value - * - * A rope representing the same string as `str`. Depending of the length - * of `str`, this rope may be empty, flat or complex. - * - * # Performance notes - * - * * this operation does not copy the string; - * * the function runs in linear time. - */ -pub fn of_str(str: @~str) -> Rope { - return of_substr(str, 0u, str.len()); -} - -/** - * As `of_str` but for a substring. - * - * # Arguments - * * byte_offset - The offset of `str` at which the rope starts. - * * byte_len - The number of bytes of `str` to use. - * - * # Return value - * - * A rope representing the same string as `str.slice(byte_offset, - * byte_offset + byte_len)`. Depending on `byte_len`, this rope may - * be empty, flat or complex. - * - * # Performance note - * - * This operation does not copy the substring. - * - * # Safety notes - * - * * this function does _not_ check the validity of the substring; - * * this function fails if `byte_offset` or `byte_len` do not match `str`. - */ -pub fn of_substr(str: @~str, byte_offset: uint, byte_len: uint) -> Rope { - if byte_len == 0u { return node::Empty; } - if byte_offset + byte_len > str.len() { fail!(); } - return node::Content(node::of_substr(str, byte_offset, byte_len)); -} - -/* -Section: Adding things to a rope - */ - -/** - * Add one char to the end of the rope - * - * # Performance note - * - * * this function executes in near-constant time - */ -pub fn append_char(rope: Rope, char: char) -> Rope { - return append_str(rope, @str::from_chars([char])); -} - -/** - * Add one string to the end of the rope - * - * # Performance note - * - * * this function executes in near-linear time - */ -pub fn append_str(rope: Rope, str: @~str) -> Rope { - return append_rope(rope, of_str(str)) -} - -/** - * Add one char to the beginning of the rope - * - * # Performance note - * * this function executes in near-constant time - */ -pub fn prepend_char(rope: Rope, char: char) -> Rope { - return prepend_str(rope, @str::from_chars([char])); -} - -/** - * Add one string to the beginning of the rope - * - * # Performance note - * * this function executes in near-linear time - */ -pub fn prepend_str(rope: Rope, str: @~str) -> Rope { - return append_rope(of_str(str), rope) -} - -/// Concatenate two ropes -pub fn append_rope(left: Rope, right: Rope) -> Rope { - match (left) { - node::Empty => return right, - node::Content(left_content) => { - match (right) { - node::Empty => return left, - node::Content(right_content) => { - return node::Content(node::concat2(left_content, right_content)); - } - } - } - } -} - -/** - * Concatenate many ropes. - * - * If the ropes are balanced initially and have the same height, the resulting - * rope remains balanced. However, this function does not take any further - * measure to ensure that the result is balanced. - */ -pub fn concat(v: ~[Rope]) -> Rope { - //Copy `v` into a mut vector - let mut len = v.len(); - if len == 0u { return node::Empty; } - let mut ropes = vec::from_elem(len, v[0]); - for uint::range(1u, len) |i| { - ropes[i] = v[i]; - } - - //Merge progresively - while len > 1u { - for uint::range(0u, len/2u) |i| { - ropes[i] = append_rope(ropes[2u*i], ropes[2u*i+1u]); - } - if len%2u != 0u { - ropes[len/2u] = ropes[len - 1u]; - len = len/2u + 1u; - } else { - len = len/2u; - } - } - - //Return final rope - return ropes[0]; -} - - -/* -Section: Keeping ropes healthy - */ - - -/** - * Balance a rope. - * - * # Return value - * - * A copy of the rope in which small nodes have been grouped in memory, - * and with a reduced height. - * - * If you perform numerous rope concatenations, it is generally a good idea - * to rebalance your rope at some point, before using it for other purposes. - */ -pub fn bal(rope:Rope) -> Rope { - match (rope) { - node::Empty => return rope, - node::Content(x) => match (node::bal(x)) { - None => rope, - Some(y) => node::Content(y) - } - } -} - -/* -Section: Transforming ropes - */ - - -/** - * Extract a subrope from a rope. - * - * # Performance note - * - * * on a balanced rope, this operation takes algorithmic time; - * * this operation does not involve any copying - * - * # Safety note - * - * * this function fails if char_offset/char_len do not represent - * valid positions in rope - */ -pub fn sub_chars(rope: Rope, char_offset: uint, char_len: uint) -> Rope { - if char_len == 0u { return node::Empty; } - match (rope) { - node::Empty => fail!(), - node::Content(node) => if char_len > node::char_len(node) { - fail!() - } else { - return node::Content(node::sub_chars(node, char_offset, char_len)) - } - } -} - -/** - * Extract a subrope from a rope. - * - * # Performance note - * - * * on a balanced rope, this operation takes algorithmic time; - * * this operation does not involve any copying - * - * # Safety note - * - * * this function fails if byte_offset/byte_len do not represent - * valid positions in rope - */ -pub fn sub_bytes(rope: Rope, byte_offset: uint, byte_len: uint) -> Rope { - if byte_len == 0u { return node::Empty; } - match (rope) { - node::Empty => fail!(), - node::Content(node) =>if byte_len > node::byte_len(node) { - fail!() - } else { - return node::Content(node::sub_bytes(node, byte_offset, byte_len)) - } - } -} - -/* -Section: Comparing ropes - */ - -/** - * Compare two ropes by Unicode lexicographical order. - * - * This function compares only the contents of the rope, not their structure. - * - * # Return value - * - * A negative value if `left < right`, 0 if eq(left, right) or a positive - * value if `left > right` - */ -pub fn cmp(left: Rope, right: Rope) -> int { - match ((left, right)) { - (node::Empty, node::Empty) => return 0, - (node::Empty, _) => return -1, - (_, node::Empty) => return 1, - (node::Content(a), node::Content(b)) => { - return node::cmp(a, b); - } - } -} - -/** - * Returns `true` if both ropes have the same content (regardless of - * their structure), `false` otherwise - */ -pub fn eq(left: Rope, right: Rope) -> bool { - return cmp(left, right) == 0; -} - -/** - * # Arguments - * - * * left - an arbitrary rope - * * right - an arbitrary rope - * - * # Return value - * - * `true` if `left <= right` in lexicographical order (regardless of their - * structure), `false` otherwise - */ -pub fn le(left: Rope, right: Rope) -> bool { - return cmp(left, right) <= 0; -} - -/** - * # Arguments - * - * * left - an arbitrary rope - * * right - an arbitrary rope - * - * # Return value - * - * `true` if `left < right` in lexicographical order (regardless of their - * structure), `false` otherwise - */ -pub fn lt(left: Rope, right: Rope) -> bool { - return cmp(left, right) < 0; -} - -/** - * # Arguments - * - * * left - an arbitrary rope - * * right - an arbitrary rope - * - * # Return value - * - * `true` if `left >= right` in lexicographical order (regardless of their - * structure), `false` otherwise - */ -pub fn ge(left: Rope, right: Rope) -> bool { - return cmp(left, right) >= 0; -} - -/** - * # Arguments - * - * * left - an arbitrary rope - * * right - an arbitrary rope - * - * # Return value - * - * `true` if `left > right` in lexicographical order (regardless of their - * structure), `false` otherwise - */ -pub fn gt(left: Rope, right: Rope) -> bool { - return cmp(left, right) > 0; -} - -/* -Section: Iterating - */ - -/** - * Loop through a rope, char by char - * - * While other mechanisms are available, this is generally the best manner - * of looping through the contents of a rope char by char. If you prefer a - * loop that iterates through the contents string by string (e.g. to print - * the contents of the rope or output it to the system), however, - * you should rather use `traverse_components`. - * - * # Arguments - * - * * rope - A rope to traverse. It may be empty. - * * it - A block to execute with each consecutive character of the rope. - * Return `true` to continue, `false` to stop. - * - * # Return value - * - * `true` If execution proceeded correctly, `false` if it was interrupted, - * that is if `it` returned `false` at any point. - */ -pub fn loop_chars(rope: Rope, it: &fn(c: char) -> bool) -> bool { - match (rope) { - node::Empty => return true, - node::Content(x) => return node::loop_chars(x, it) - } -} - -/** - * Loop through a rope, char by char, until the end. - * - * # Arguments - * * rope - A rope to traverse. It may be empty - * * it - A block to execute with each consecutive character of the rope. - */ -pub fn iter_chars(rope: Rope, it: &fn(char)) { - do loop_chars(rope) |x| { - it(x); - true - }; -} - -/** - * Loop through a rope, string by string - * - * While other mechanisms are available, this is generally the best manner of - * looping through the contents of a rope string by string, which may be - * useful e.g. to print strings as you see them (without having to copy their - * contents into a new string), to send them to then network, to write them to - * a file, etc.. If you prefer a loop that iterates through the contents - * char by char (e.g. to search for a char), however, you should rather - * use `traverse`. - * - * # Arguments - * - * * rope - A rope to traverse. It may be empty - * * it - A block to execute with each consecutive string component of the - * rope. Return `true` to continue, `false` to stop - * - * # Return value - * - * `true` If execution proceeded correctly, `false` if it was interrupted, - * that is if `it` returned `false` at any point. - */ -pub fn loop_leaves(rope: Rope, it: &fn(node::Leaf) -> bool) -> bool{ - match (rope) { - node::Empty => return true, - node::Content(x) => return node::loop_leaves(x, it) - } -} - -pub mod iterator { - pub mod leaf { - - use rope::{Rope, node}; - - pub fn start(rope: Rope) -> node::leaf_iterator::T { - match (rope) { - node::Empty => return node::leaf_iterator::empty(), - node::Content(x) => return node::leaf_iterator::start(x) - } - } - pub fn next(it: &mut node::leaf_iterator::T) -> Option { - return node::leaf_iterator::next(it); - } - } - pub mod char { - - use rope::{Rope, node}; - - pub fn start(rope: Rope) -> node::char_iterator::T { - match (rope) { - node::Empty => return node::char_iterator::empty(), - node::Content(x) => return node::char_iterator::start(x) - } - } - pub fn next(it: &mut node::char_iterator::T) -> Option { - return node::char_iterator::next(it) - } - } -} - -/* - Section: Rope properties - */ - -/** - * Returns the height of the rope. - * - * The height of the rope is a bound on the number of operations which - * must be performed during a character access before finding the leaf in - * which a character is contained. - * - * # Performance note - * - * Constant time. - */ -pub fn height(rope: Rope) -> uint { - match (rope) { - node::Empty => return 0u, - node::Content(x) => return node::height(x) - } -} - - - -/** - * The number of character in the rope - * - * # Performance note - * - * Constant time. - */ -pub fn char_len(rope: Rope) -> uint { - match (rope) { - node::Empty => return 0u, - node::Content(x) => return node::char_len(x) - } -} - -/** - * The number of bytes in the rope - * - * # Performance note - * - * Constant time. - */ -pub fn byte_len(rope: Rope) -> uint { - match (rope) { - node::Empty => return 0u, - node::Content(x) => return node::byte_len(x) - } -} - -/** - * The character at position `pos` - * - * # Arguments - * - * * pos - A position in the rope - * - * # Safety notes - * - * The function will fail if `pos` is not a valid position in the rope. - * - * # Performance note - * - * This function executes in a time proportional to the height of the - * rope + the (bounded) length of the largest leaf. - */ -pub fn char_at(rope: Rope, pos: uint) -> char { - match (rope) { - node::Empty => fail!(), - node::Content(x) => return node::char_at(x, pos) - } -} - - -/* - Section: Implementation -*/ -pub mod node { - - use rope::node; - - use std::cast; - use std::uint; - use std::vec; - - /// Implementation of type `rope` - pub enum Root { - /// An empty rope - Empty, - /// A non-empty rope - Content(@Node), - } - - /** - * A text component in a rope. - * - * This is actually a slice in a rope, so as to ensure maximal sharing. - * - * # Fields - * - * * byte_offset = The number of bytes skipped in `content` - * * byte_len - The number of bytes of `content` to use - * * char_len - The number of chars in the leaf. - * * content - Contents of the leaf. - * - * Note that we can have `char_len < content.char_len()`, if - * this leaf is only a subset of the string. Also note that the - * string can be shared between several ropes, e.g. for indexing - * purposes. - */ - pub struct Leaf { - byte_offset: uint, - byte_len: uint, - char_len: uint, - content: @~str, - } - - /** - * A node obtained from the concatenation of two other nodes - * - * # Fields - * - * * left - The node containing the beginning of the text. - * * right - The node containing the end of the text. - * * char_len - The number of chars contained in all leaves of this node. - * * byte_len - The number of bytes in the subrope. - * - * Used to pre-allocate the correct amount of storage for - * serialization. - * - * * height - Height of the subrope. - * - * Used for rebalancing and to allocate stacks for traversals. - */ - pub struct Concat { - //FIXME (#2744): Perhaps a `vec` instead of `left`/`right` - left: @Node, - right: @Node, - char_len: uint, - byte_len: uint, - height: uint, - } - - pub enum Node { - /// A leaf consisting in a `str` - Leaf(Leaf), - /// The concatenation of two ropes - Concat(Concat), - } - - /** - * The maximal number of chars that _should_ be permitted in a single node - * - * This is not a strict value - */ - pub static HINT_MAX_LEAF_CHAR_LEN: uint = 256u; - - /** - * The maximal height that _should_ be permitted in a tree. - * - * This is not a strict value - */ - pub static HINT_MAX_NODE_HEIGHT: uint = 16u; - - /** - * Adopt a string as a node. - * - * If the string is longer than `max_leaf_char_len`, it is - * logically split between as many leaves as necessary. Regardless, - * the string itself is not copied. - * - * Performance note: The complexity of this function is linear in - * the length of `str`. - */ - pub fn of_str(str: @~str) -> @Node { - return of_substr(str, 0u, str.len()); - } - - /** - * Adopt a slice of a string as a node. - * - * If the slice is longer than `max_leaf_char_len`, it is logically split - * between as many leaves as necessary. Regardless, the string itself - * is not copied - * - * # Arguments - * - * * byte_start - The byte offset where the slice of `str` starts. - * * byte_len - The number of bytes from `str` to use. - * - * # Safety note - * - * Behavior is undefined if `byte_start` or `byte_len` do not represent - * valid positions in `str` - */ - pub fn of_substr(str: @~str, byte_start: uint, byte_len: uint) -> @Node { - return of_substr_unsafer(str, byte_start, byte_len, - str.slice(byte_start, byte_start + byte_len).char_len()); - } - - /** - * Adopt a slice of a string as a node. - * - * If the slice is longer than `max_leaf_char_len`, it is logically split - * between as many leaves as necessary. Regardless, the string itself - * is not copied - * - * # Arguments - * - * * byte_start - The byte offset where the slice of `str` starts. - * * byte_len - The number of bytes from `str` to use. - * * char_len - The number of chars in `str` in the interval - * [byte_start, byte_start+byte_len) - * - * # Safety notes - * - * * Behavior is undefined if `byte_start` or `byte_len` do not represent - * valid positions in `str` - * * Behavior is undefined if `char_len` does not accurately represent the - * number of chars between byte_start and byte_start+byte_len - */ - pub fn of_substr_unsafer(str: @~str, byte_start: uint, byte_len: uint, - char_len: uint) -> @Node { - assert!((byte_start + byte_len <= str.len())); - let candidate = @Leaf(Leaf { - byte_offset: byte_start, - byte_len: byte_len, - char_len: char_len, - content: str, - }); - if char_len <= HINT_MAX_LEAF_CHAR_LEN { - return candidate; - } else { - //Firstly, split `str` in slices of HINT_MAX_LEAF_CHAR_LEN - let mut leaves = uint::div_ceil(char_len, HINT_MAX_LEAF_CHAR_LEN); - //Number of leaves - let mut nodes = vec::from_elem(leaves, candidate); - - let mut i = 0u; - let mut offset = byte_start; - let first_leaf_char_len = - if char_len%HINT_MAX_LEAF_CHAR_LEN == 0u { - HINT_MAX_LEAF_CHAR_LEN - } else { - char_len%HINT_MAX_LEAF_CHAR_LEN - }; - while i < leaves { - let chunk_char_len: uint = - if i == 0u { first_leaf_char_len } - else { HINT_MAX_LEAF_CHAR_LEN }; - let chunk_byte_len = - str.slice_from(offset).slice_chars(0, chunk_char_len).len(); - nodes[i] = @Leaf(Leaf { - byte_offset: offset, - byte_len: chunk_byte_len, - char_len: chunk_char_len, - content: str, - }); - - offset += chunk_byte_len; - i += 1u; - } - - //Then, build a tree from these slices by collapsing them - while leaves > 1u { - i = 0u; - while i < leaves - 1u {//Concat nodes 0 with 1, 2 with 3 etc. - nodes[i/2u] = concat2(nodes[i], nodes[i + 1u]); - i += 2u; - } - if i == leaves - 1u { - //And don't forget the last node if it is in even position - nodes[i/2u] = nodes[i]; - } - leaves = uint::div_ceil(leaves, 2u); - } - return nodes[0u]; - } - } - - pub fn byte_len(node: @Node) -> uint { - //FIXME (#2744): Could we do this without the pattern-matching? - match (*node) { - Leaf(y) => y.byte_len, - Concat(ref y) => y.byte_len - } - } - - pub fn char_len(node: @Node) -> uint { - match (*node) { - Leaf(y) => y.char_len, - Concat(ref y) => y.char_len - } - } - - /** - * Concatenate a forest of nodes into one tree. - * - * # Arguments - * - * * forest - The forest. This vector is progressively rewritten during - * execution and should be discarded as meaningless afterwards. - */ - pub fn tree_from_forest_destructive(forest: &mut [@Node]) -> @Node { - let mut i; - let mut len = forest.len(); - while len > 1u { - i = 0u; - while i < len - 1u {//Concat nodes 0 with 1, 2 with 3 etc. - let mut left = forest[i]; - let mut right = forest[i+1u]; - let left_len = char_len(left); - let right_len= char_len(right); - let mut left_height= height(left); - let mut right_height=height(right); - if left_len + right_len > HINT_MAX_LEAF_CHAR_LEN { - if left_len <= HINT_MAX_LEAF_CHAR_LEN { - left = flatten(left); - left_height = height(left); - } - if right_len <= HINT_MAX_LEAF_CHAR_LEN { - right = flatten(right); - right_height = height(right); - } - } - if left_height >= HINT_MAX_NODE_HEIGHT { - left = of_substr_unsafer(@serialize_node(left), - 0u,byte_len(left), - left_len); - } - if right_height >= HINT_MAX_NODE_HEIGHT { - right = of_substr_unsafer(@serialize_node(right), - 0u,byte_len(right), - right_len); - } - forest[i/2u] = concat2(left, right); - i += 2u; - } - if i == len - 1u { - //And don't forget the last node if it is in even position - forest[i/2u] = forest[i]; - } - len = uint::div_ceil(len, 2u); - } - return forest[0]; - } - - pub fn serialize_node(node: @Node) -> ~str { - unsafe { - let mut buf = vec::from_elem(byte_len(node), 0); - let mut offset = 0u;//Current position in the buffer - let mut it = leaf_iterator::start(node); - loop { - match leaf_iterator::next(&mut it) { - None => break, - Some(x) => { - //FIXME (#2744): Replace with memcpy or something similar - let local_buf: ~[u8] = cast::transmute(copy *x.content); - let mut i = x.byte_offset; - while i < x.byte_len { - buf[offset] = local_buf[i]; - offset += 1u; - i += 1u; - } - cast::forget(local_buf); - } - } - } - return cast::transmute(buf); - } - } - - /** - * Replace a subtree by a single leaf with the same contents. - * - * * Performance note - * - * This function executes in linear time. - */ - pub fn flatten(node: @Node) -> @Node { - match (*node) { - Leaf(_) => node, - Concat(ref x) => { - @Leaf(Leaf { - byte_offset: 0u, - byte_len: x.byte_len, - char_len: x.char_len, - content: @serialize_node(node), - }) - } - } - } - - /** - * Balance a node. - * - * # Algorithm - * - * * if the node height is smaller than `HINT_MAX_NODE_HEIGHT`, do nothing - * * otherwise, gather all leaves as a forest, rebuild a balanced node, - * concatenating small leaves along the way - * - * # Return value - * - * * `None` if no transformation happened - * * `Some(x)` otherwise, in which case `x` has the same contents - * as `node` bot lower height and/or fragmentation. - */ - pub fn bal(node: @Node) -> Option<@Node> { - if height(node) < HINT_MAX_NODE_HEIGHT { return None; } - //1. Gather all leaves as a forest - let mut forest = ~[]; - let mut it = leaf_iterator::start(node); - loop { - match leaf_iterator::next(&mut it) { - None => break, - Some(x) => forest.push(@Leaf(x)) - } - } - //2. Rebuild tree from forest - let root = @*tree_from_forest_destructive(forest); - return Some(root); - - } - - /** - * Compute the subnode of a node. - * - * # Arguments - * - * * node - A node - * * byte_offset - A byte offset in `node` - * * byte_len - The number of bytes to return - * - * # Performance notes - * - * * this function performs no copying; - * * this function executes in a time proportional to the height of `node` - * - * # Safety notes - * - * This function fails if `byte_offset` or `byte_len` do not represent - * valid positions in `node`. - */ - pub fn sub_bytes(node: @Node, byte_offset: uint, - byte_len: uint) -> @Node { - let mut node = node; - let mut byte_offset = byte_offset; - loop { - if byte_offset == 0u && byte_len == node::byte_len(node) { - return node; - } - match (*node) { - node::Leaf(x) => { - let char_len = - x.content.slice(byte_offset, byte_offset + byte_len).char_len(); - return @Leaf(Leaf { - byte_offset: byte_offset, - byte_len: byte_len, - char_len: char_len, - content: x.content, - }); - } - node::Concat(ref x) => { - let left_len: uint = node::byte_len(x.left); - if byte_offset <= left_len { - if byte_offset + byte_len <= left_len { - //Case 1: Everything fits in x.left, tail-call - node = x.left; - } else { - //Case 2: A (non-empty, possibly full) suffix - //of x.left and a (non-empty, possibly full) prefix - //of x.right - let left_result = - sub_bytes(x.left, byte_offset, left_len); - let right_result = - sub_bytes(x.right, 0u, left_len - byte_offset); - return concat2(left_result, right_result); - } - } else { - //Case 3: Everything fits in x.right - byte_offset -= left_len; - node = x.right; - } - } - } - }; - } - - /** - * Compute the subnode of a node. - * - * # Arguments - * - * * node - A node - * * char_offset - A char offset in `node` - * * char_len - The number of chars to return - * - * # Performance notes - * - * * this function performs no copying; - * * this function executes in a time proportional to the height of `node` - * - * # Safety notes - * - * This function fails if `char_offset` or `char_len` do not represent - * valid positions in `node`. - */ - pub fn sub_chars(node: @Node, char_offset: uint, - char_len: uint) -> @Node { - let mut node = node; - let mut char_offset = char_offset; - loop { - match (*node) { - node::Leaf(x) => { - if char_offset == 0u && char_len == x.char_len { - return node; - } - let byte_offset = - x.content.slice_chars(0, char_offset).len(); - let byte_len = - x.content.slice_from(byte_offset).slice_chars(0, char_len).len(); - return @Leaf(Leaf { - byte_offset: byte_offset, - byte_len: byte_len, - char_len: char_len, - content: x.content, - }); - } - node::Concat(ref x) => { - if char_offset == 0u && char_len == x.char_len {return node;} - let left_len : uint = node::char_len(x.left); - if char_offset <= left_len { - if char_offset + char_len <= left_len { - //Case 1: Everything fits in x.left, tail call - node = x.left; - } else { - //Case 2: A (non-empty, possibly full) suffix - //of x.left and a (non-empty, possibly full) prefix - //of x.right - let left_result = - sub_chars(x.left, char_offset, left_len); - let right_result = - sub_chars(x.right, 0u, left_len - char_offset); - return concat2(left_result, right_result); - } - } else { - //Case 3: Everything fits in x.right, tail call - node = x.right; - char_offset -= left_len; - } - } - } - }; - } - - pub fn concat2(left: @Node, right: @Node) -> @Node { - @Concat(Concat { - left: left, - right: right, - char_len: char_len(left) + char_len(right), - byte_len: byte_len(left) + byte_len(right), - height: uint::max(height(left), height(right)) + 1u, - }) - } - - pub fn height(node: @Node) -> uint { - match (*node) { - Leaf(_) => 0u, - Concat(ref x) => x.height, - } - } - - pub fn cmp(a: @Node, b: @Node) -> int { - let mut ita = char_iterator::start(a); - let mut itb = char_iterator::start(b); - let mut result = 0; - while result == 0 { - match (char_iterator::next(&mut ita), char_iterator::next(&mut itb)) - { - (None, None) => break, - (Some(chara), Some(charb)) => { - result = chara.cmp(&charb) as int; - } - (Some(_), _) => { - result = 1; - } - (_, Some(_)) => { - result = -1; - } - } - } - return result; - } - - pub fn loop_chars(node: @Node, it: &fn(c: char) -> bool) -> bool { - return loop_leaves(node,|leaf| { - leaf.content.slice(leaf.byte_offset, leaf.byte_len).iter().all(|c| it(c)) - }); - } - - /** - * Loop through a node, leaf by leaf - * - * # Arguments - * - * * rope - A node to traverse. - * * it - A block to execute with each consecutive leaf of the node. - * Return `true` to continue, `false` to stop - * - * # Arguments - * - * `true` If execution proceeded correctly, `false` if it was interrupted, - * that is if `it` returned `false` at any point. - */ - pub fn loop_leaves(node: @Node, it: &fn(Leaf) -> bool) -> bool{ - let mut current = node; - loop { - match (*current) { - Leaf(x) => return it(x), - Concat(ref x) => if loop_leaves(x.left, |l| it(l)) { //non tail call - current = x.right; //tail call - } else { - return false; - } - } - }; - } - - /** - * # Arguments - * - * * pos - A position in the rope - * - * # Return value - * - * The character at position `pos` - * - * # Safety notes - * - * The function will fail if `pos` is not a valid position in the rope. - * - * Performance note: This function executes in a time - * proportional to the height of the rope + the (bounded) - * length of the largest leaf. - */ - pub fn char_at(mut node: @Node, mut pos: uint) -> char { - loop { - match *node { - Leaf(x) => return x.content.char_at(pos), - Concat(Concat {left, right, _}) => { - let left_len = char_len(left); - node = if left_len > pos { left } - else { pos -= left_len; right }; - } - } - }; - } - - pub mod leaf_iterator { - - use rope::node::{Concat, Leaf, Node, height}; - - use std::vec; - - pub struct T { - stack: ~[@Node], - stackpos: int, - } - - pub fn empty() -> T { - let stack : ~[@Node] = ~[]; - T { stack: stack, stackpos: -1 } - } - - pub fn start(node: @Node) -> T { - let stack = vec::from_elem(height(node)+1u, node); - T { - stack: stack, - stackpos: 0, - } - } - - pub fn next(it: &mut T) -> Option { - if it.stackpos < 0 { return None; } - loop { - let current = it.stack[it.stackpos]; - it.stackpos -= 1; - match (*current) { - Concat(ref x) => { - it.stackpos += 1; - it.stack[it.stackpos] = x.right; - it.stackpos += 1; - it.stack[it.stackpos] = x.left; - } - Leaf(x) => return Some(x) - } - }; - } - } - - pub mod char_iterator { - - use rope::node::{Leaf, Node}; - use rope::node::leaf_iterator; - - pub struct T { - leaf_iterator: leaf_iterator::T, - leaf: Option, - leaf_byte_pos: uint, - } - - pub fn start(node: @Node) -> T { - T { - leaf_iterator: leaf_iterator::start(node), - leaf: None, - leaf_byte_pos: 0u, - } - } - - pub fn empty() -> T { - T { - leaf_iterator: leaf_iterator::empty(), - leaf: None, - leaf_byte_pos: 0u, - } - } - - pub fn next(it: &mut T) -> Option { - loop { - match get_current_or_next_leaf(it) { - None => return None, - Some(_) => { - let next_char = get_next_char_in_leaf(it); - match next_char { - None => loop, - Some(_) => return next_char - } - } - } - }; - } - - pub fn get_current_or_next_leaf(it: &mut T) -> Option { - match it.leaf { - Some(_) => return it.leaf, - None => { - let next = leaf_iterator::next(&mut it.leaf_iterator); - match next { - None => return None, - Some(_) => { - it.leaf = next; - it.leaf_byte_pos = 0u; - return next; - } - } - } - } - } - - pub fn get_next_char_in_leaf(it: &mut T) -> Option { - match copy it.leaf { - None => return None, - Some(aleaf) => { - if it.leaf_byte_pos >= aleaf.byte_len { - //We are actually past the end of the leaf - it.leaf = None; - return None - } else { - let range = - aleaf.content.char_range_at((*it).leaf_byte_pos + aleaf.byte_offset); - let ch = range.ch; - let next = range.next; - (*it).leaf_byte_pos = next - aleaf.byte_offset; - return Some(ch) - } - } - } - } - } -} - -#[cfg(test)] -mod tests { - - use rope::*; - - use std::uint; - use std::vec; - - //Utility function, used for sanity check - fn rope_to_string(r: Rope) -> ~str { - match (r) { - node::Empty => return ~"", - node::Content(x) => { - let mut str = ~""; - fn aux(str: &mut ~str, node: @node::Node) { - match (*node) { - node::Leaf(x) => { - str.push_str(x.content.slice(x.byte_offset, x.byte_offset + x.byte_len)); - } - node::Concat(ref x) => { - aux(str, x.left); - aux(str, x.right); - } - } - } - aux(&mut str, x); - return str - } - } - } - - - #[test] - fn trivial() { - assert_eq!(char_len(empty()), 0u); - assert_eq!(byte_len(empty()), 0u); - } - - #[test] - fn of_string1() { - let sample = @~"0123456789ABCDE"; - let r = of_str(sample); - - assert_eq!(char_len(r), sample.char_len()); - assert!(rope_to_string(r) == *sample); - } - - #[test] - fn of_string2() { - let buf = @ mut ~"1234567890"; - let mut i = 0; - while i < 10 { - let a = copy *buf; - let b = copy *buf; - *buf = a + b; - i+=1; - } - let sample = @copy *buf; - let r = of_str(sample); - assert_eq!(char_len(r), sample.char_len()); - assert!(rope_to_string(r) == *sample); - - let mut string_iter = 0u; - let string_len = sample.len(); - let mut rope_iter = iterator::char::start(r); - let mut equal = true; - while equal { - match (node::char_iterator::next(&mut rope_iter)) { - None => { - if string_iter < string_len { - equal = false; - } break; } - Some(c) => { - let range = sample.char_range_at(string_iter); - string_iter = range.next; - if range.ch != c { equal = false; break; } - } - } - } - - assert!(equal); - } - - #[test] - fn iter1() { - let buf = @ mut ~"1234567890"; - let mut i = 0; - while i < 10 { - let a = copy *buf; - let b = copy *buf; - *buf = a + b; - i+=1; - } - let sample = @copy *buf; - let r = of_str(sample); - - let mut len = 0u; - let mut it = iterator::char::start(r); - loop { - match (node::char_iterator::next(&mut it)) { - None => break, - Some(_) => len += 1u - } - } - - assert_eq!(len, sample.char_len()); - } - - #[test] - fn bal1() { - let init = @~"1234567890"; - let buf = @mut copy *init; - let mut i = 0; - while i < 8 { - let a = copy *buf; - let b = copy *buf; - *buf = a + b; - i+=1; - } - let sample = @copy *buf; - let r1 = of_str(sample); - let mut r2 = of_str(init); - i = 0; - while i < 8 { r2 = append_rope(r2, r2); i+= 1;} - - - assert!(eq(r1, r2)); - let r3 = bal(r2); - assert_eq!(char_len(r1), char_len(r3)); - - assert!(eq(r1, r3)); - } - - #[test] - #[ignore] - fn char_at1() { - //Generate a large rope - let mut r = of_str(@~"123456789"); - for uint::range(0u, 10u) |_i| { - r = append_rope(r, r); - } - - //Copy it in the slowest possible way - let mut r2 = empty(); - for uint::range(0u, char_len(r)) |i| { - r2 = append_char(r2, char_at(r, i)); - } - assert!(eq(r, r2)); - - let mut r3 = empty(); - for uint::range(0u, char_len(r)) |i| { - r3 = prepend_char(r3, char_at(r, char_len(r) - i - 1u)); - } - assert!(eq(r, r3)); - - //Additional sanity checks - let balr = bal(r); - let bal2 = bal(r2); - let bal3 = bal(r3); - assert!(eq(r, balr)); - assert!(eq(r, bal2)); - assert!(eq(r, bal3)); - assert!(eq(r2, r3)); - assert!(eq(bal2, bal3)); - } - - #[test] - fn concat1() { - //Generate a reasonable rope - let chunk = of_str(@~"123456789"); - let mut r = empty(); - for uint::range(0u, 10u) |_i| { - r = append_rope(r, chunk); - } - - //Same rope, obtained with rope::concat - let r2 = concat(vec::from_elem(10u, chunk)); - - assert!(eq(r, r2)); - } -} diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 564c58f7097ee..748d5f05aae0f 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -13,8 +13,7 @@ * * Strings are a packed UTF-8 representation of text, stored as null * terminated buffers of u8 bytes. Strings should be indexed in bytes, - * for efficiency, but UTF-8 unsafe operations should be avoided. For - * some heavy-duty uses, try extra::rope. + * for efficiency, but UTF-8 unsafe operations should be avoided. */ use at_vec;