changeset 6479:e03c4f2f58e0

Move rope grapheme iterators from core to stdx
author Michael Davis <mcarsondavis@gmail.com>
date Sun, 26 Jan 2025 20:58:27 -0500
parents 3fc2b4750c18
children 80f7a633de7f
files helix-core/src/doc_formatter.rs helix-core/src/graphemes.rs helix-core/src/indent.rs helix-core/src/lib.rs helix-core/src/position.rs helix-core/src/selection.rs helix-stdx/src/rope.rs helix-term/src/commands.rs
diffstat 8 files changed, 210 insertions(+), 178 deletions(-) [+]
line wrap: on
line diff
--- a/helix-core/src/doc_formatter.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/doc_formatter.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -19,10 +19,12 @@
 
 use unicode_segmentation::{Graphemes, UnicodeSegmentation};
 
+use helix_stdx::rope::{RopeGraphemes, RopeSliceExt};
+
 use crate::graphemes::{Grapheme, GraphemeStr};
 use crate::syntax::Highlight;
 use crate::text_annotations::TextAnnotations;
-use crate::{Position, RopeGraphemes, RopeSlice};
+use crate::{Position, RopeSlice};
 
 /// TODO make Highlight a u32 to reduce the size of this enum to a single word.
 #[derive(Debug, Clone, Copy)]
@@ -219,7 +221,7 @@
             text_fmt,
             annotations,
             visual_pos: Position { row: 0, col: 0 },
-            graphemes: RopeGraphemes::new(text.slice(block_char_idx..)),
+            graphemes: text.slice(block_char_idx..).graphemes(),
             char_pos: block_char_idx,
             exhausted: false,
             indent_level: None,
--- a/helix-core/src/graphemes.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/graphemes.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -1,7 +1,7 @@
 //! Utility functions to traverse the unicode graphemes of a `Rope`'s text contents.
 //!
 //! Based on <https://github.com/cessen/led/blob/c4fa72405f510b7fd16052f90a598c429b3104a6/src/graphemes.rs>
-use ropey::{iter::Chunks, str_utils::byte_to_char_idx, RopeSlice};
+use ropey::{str_utils::byte_to_char_idx, RopeSlice};
 use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
 use unicode_width::UnicodeWidthStr;
 
@@ -270,162 +270,6 @@
     }
 }
 
-/// An iterator over the graphemes of a `RopeSlice`.
-#[derive(Clone)]
-pub struct RopeGraphemes<'a> {
-    text: RopeSlice<'a>,
-    chunks: Chunks<'a>,
-    cur_chunk: &'a str,
-    cur_chunk_start: usize,
-    cursor: GraphemeCursor,
-}
-
-impl fmt::Debug for RopeGraphemes<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("RopeGraphemes")
-            .field("text", &self.text)
-            .field("chunks", &self.chunks)
-            .field("cur_chunk", &self.cur_chunk)
-            .field("cur_chunk_start", &self.cur_chunk_start)
-            // .field("cursor", &self.cursor)
-            .finish()
-    }
-}
-
-impl RopeGraphemes<'_> {
-    #[must_use]
-    pub fn new(slice: RopeSlice) -> RopeGraphemes {
-        let mut chunks = slice.chunks();
-        let first_chunk = chunks.next().unwrap_or("");
-        RopeGraphemes {
-            text: slice,
-            chunks,
-            cur_chunk: first_chunk,
-            cur_chunk_start: 0,
-            cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
-        }
-    }
-}
-
-impl<'a> Iterator for RopeGraphemes<'a> {
-    type Item = RopeSlice<'a>;
-
-    fn next(&mut self) -> Option<RopeSlice<'a>> {
-        let a = self.cursor.cur_cursor();
-        let b;
-        loop {
-            match self
-                .cursor
-                .next_boundary(self.cur_chunk, self.cur_chunk_start)
-            {
-                Ok(None) => {
-                    return None;
-                }
-                Ok(Some(n)) => {
-                    b = n;
-                    break;
-                }
-                Err(GraphemeIncomplete::NextChunk) => {
-                    self.cur_chunk_start += self.cur_chunk.len();
-                    self.cur_chunk = self.chunks.next().unwrap_or("");
-                }
-                Err(GraphemeIncomplete::PreContext(idx)) => {
-                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
-                    self.cursor.provide_context(chunk, byte_idx);
-                }
-                _ => unreachable!(),
-            }
-        }
-
-        if a < self.cur_chunk_start {
-            Some(self.text.byte_slice(a..b))
-        } else {
-            let a2 = a - self.cur_chunk_start;
-            let b2 = b - self.cur_chunk_start;
-            Some((&self.cur_chunk[a2..b2]).into())
-        }
-    }
-}
-
-/// An iterator over the graphemes of a `RopeSlice` in reverse.
-#[derive(Clone)]
-pub struct RevRopeGraphemes<'a> {
-    text: RopeSlice<'a>,
-    chunks: Chunks<'a>,
-    cur_chunk: &'a str,
-    cur_chunk_start: usize,
-    cursor: GraphemeCursor,
-}
-
-impl fmt::Debug for RevRopeGraphemes<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("RevRopeGraphemes")
-            .field("text", &self.text)
-            .field("chunks", &self.chunks)
-            .field("cur_chunk", &self.cur_chunk)
-            .field("cur_chunk_start", &self.cur_chunk_start)
-            // .field("cursor", &self.cursor)
-            .finish()
-    }
-}
-
-impl RevRopeGraphemes<'_> {
-    #[must_use]
-    pub fn new(slice: RopeSlice) -> RevRopeGraphemes {
-        let (mut chunks, mut cur_chunk_start, _, _) = slice.chunks_at_byte(slice.len_bytes());
-        chunks.reverse();
-        let first_chunk = chunks.next().unwrap_or("");
-        cur_chunk_start -= first_chunk.len();
-        RevRopeGraphemes {
-            text: slice,
-            chunks,
-            cur_chunk: first_chunk,
-            cur_chunk_start,
-            cursor: GraphemeCursor::new(slice.len_bytes(), slice.len_bytes(), true),
-        }
-    }
-}
-
-impl<'a> Iterator for RevRopeGraphemes<'a> {
-    type Item = RopeSlice<'a>;
-
-    fn next(&mut self) -> Option<RopeSlice<'a>> {
-        let a = self.cursor.cur_cursor();
-        let b;
-        loop {
-            match self
-                .cursor
-                .prev_boundary(self.cur_chunk, self.cur_chunk_start)
-            {
-                Ok(None) => {
-                    return None;
-                }
-                Ok(Some(n)) => {
-                    b = n;
-                    break;
-                }
-                Err(GraphemeIncomplete::PrevChunk) => {
-                    self.cur_chunk = self.chunks.next().unwrap_or("");
-                    self.cur_chunk_start -= self.cur_chunk.len();
-                }
-                Err(GraphemeIncomplete::PreContext(idx)) => {
-                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
-                    self.cursor.provide_context(chunk, byte_idx);
-                }
-                _ => unreachable!(),
-            }
-        }
-
-        if a >= self.cur_chunk_start + self.cur_chunk.len() {
-            Some(self.text.byte_slice(b..a))
-        } else {
-            let a2 = a - self.cur_chunk_start;
-            let b2 = b - self.cur_chunk_start;
-            Some((&self.cur_chunk[b2..a2]).into())
-        }
-    }
-}
-
 /// A highly compressed Cow<'a, str> that holds
 /// atmost u31::MAX bytes and is readonly
 pub struct GraphemeStr<'a> {
--- a/helix-core/src/indent.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/indent.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -8,7 +8,7 @@
     graphemes::{grapheme_width, tab_width_at},
     syntax::{IndentationHeuristic, LanguageConfiguration, RopeProvider, Syntax},
     tree_sitter::Node,
-    Position, Rope, RopeGraphemes, RopeSlice, Tendril,
+    Position, Rope, RopeSlice, Tendril,
 };
 
 /// Enum representing indentation style.
@@ -200,7 +200,7 @@
 /// Create a string of tabs & spaces that has the same visual width as the given RopeSlice (independent of the tab width).
 fn whitespace_with_same_width(text: RopeSlice) -> String {
     let mut s = String::new();
-    for grapheme in RopeGraphemes::new(text) {
+    for grapheme in text.graphemes() {
         if grapheme == "\t" {
             s.push('\t');
         } else {
--- a/helix-core/src/lib.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/lib.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -54,7 +54,6 @@
 #[doc(inline)]
 pub use {regex, tree_sitter};
 
-pub use graphemes::RopeGraphemes;
 pub use position::{
     char_idx_at_visual_offset, coords_at_pos, pos_at_coords, softwrapped_dimensions,
     visual_offset_from_anchor, visual_offset_from_block, Position, VisualOffsetError,
--- a/helix-core/src/position.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/position.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -4,10 +4,12 @@
     ops::{Add, AddAssign, Sub, SubAssign},
 };
 
+use helix_stdx::rope::RopeSliceExt;
+
 use crate::{
     chars::char_is_line_ending,
     doc_formatter::{DocumentFormatter, TextFormat},
-    graphemes::{ensure_grapheme_boundary_prev, grapheme_width, RopeGraphemes},
+    graphemes::{ensure_grapheme_boundary_prev, grapheme_width},
     line_ending::line_end_char_index,
     text_annotations::TextAnnotations,
     RopeSlice,
@@ -101,7 +103,7 @@
 
     let line_start = text.line_to_char(line);
     let pos = ensure_grapheme_boundary_prev(text, pos);
-    let col = RopeGraphemes::new(text.slice(line_start..pos)).count();
+    let col = text.slice(line_start..pos).graphemes().count();
 
     Position::new(line, col)
 }
@@ -126,7 +128,7 @@
 
     let mut col = 0;
 
-    for grapheme in RopeGraphemes::new(text.slice(line_start..pos)) {
+    for grapheme in text.slice(line_start..pos).graphemes() {
         if grapheme == "\t" {
             col += tab_width - (col % tab_width);
         } else {
@@ -275,7 +277,7 @@
     };
 
     let mut col_char_offset = 0;
-    for (i, g) in RopeGraphemes::new(text.slice(line_start..line_end)).enumerate() {
+    for (i, g) in text.slice(line_start..line_end).graphemes().enumerate() {
         if i == col {
             break;
         }
@@ -306,7 +308,7 @@
 
     let mut col_char_offset = 0;
     let mut cols_remaining = col;
-    for grapheme in RopeGraphemes::new(text.slice(line_start..line_end)) {
+    for grapheme in text.slice(line_start..line_end).graphemes() {
         let grapheme_width = if grapheme == "\t" {
             tab_width - ((col - cols_remaining) % tab_width)
         } else {
--- a/helix-core/src/selection.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-core/src/selection.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -9,7 +9,7 @@
     },
     line_ending::get_line_ending,
     movement::Direction,
-    Assoc, ChangeSet, RopeGraphemes, RopeSlice,
+    Assoc, ChangeSet, RopeSlice,
 };
 use helix_stdx::range::is_subset;
 use helix_stdx::rope::{self, RopeSliceExt};
@@ -379,7 +379,7 @@
 
     /// Returns true if this Range covers a single grapheme in the given text
     pub fn is_single_grapheme(&self, doc: RopeSlice) -> bool {
-        let mut graphemes = RopeGraphemes::new(doc.slice(self.from()..self.to()));
+        let mut graphemes = doc.slice(self.from()..self.to()).graphemes();
         let first = graphemes.next();
         let second = graphemes.next();
         first.is_some() && second.is_none()
--- a/helix-stdx/src/rope.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-stdx/src/rope.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -1,8 +1,10 @@
+use std::fmt;
 use std::ops::{Bound, RangeBounds};
 
 pub use regex_cursor::engines::meta::{Builder as RegexBuilder, Regex};
 pub use regex_cursor::regex_automata::util::syntax::Config;
 use regex_cursor::{Input as RegexInput, RopeyCursor};
+use ropey::iter::Chunks;
 use ropey::RopeSlice;
 use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
 
@@ -122,6 +124,33 @@
     /// ```
     #[allow(clippy::wrong_self_convention)]
     fn is_grapheme_boundary(self, byte_idx: usize) -> bool;
+    /// Returns an iterator over the grapheme clusters in the slice.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use ropey::RopeSlice;
+    /// # use helix_stdx::rope::RopeSliceExt;
+    /// let text = RopeSlice::from("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+    /// let graphemes: Vec<_> = text.graphemes().collect();
+    /// assert_eq!(graphemes.as_slice(), &["πŸ˜Άβ€πŸŒ«οΈ", "πŸ΄β€β˜ οΈ", "πŸ–ΌοΈ"]);
+    /// ```
+    fn graphemes(self) -> RopeGraphemes<'a>;
+    /// Returns an iterator over the grapheme clusters in the slice, reversed.
+    ///
+    /// The returned iterator starts at the end of the slice and ends at the beginning of the
+    /// slice.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use ropey::RopeSlice;
+    /// # use helix_stdx::rope::RopeSliceExt;
+    /// let text = RopeSlice::from("πŸ˜Άβ€πŸŒ«οΈπŸ΄β€β˜ οΈπŸ–ΌοΈ");
+    /// let graphemes: Vec<_> = text.graphemes_rev().collect();
+    /// assert_eq!(graphemes.as_slice(), &["πŸ–ΌοΈ", "πŸ΄β€β˜ οΈ", "πŸ˜Άβ€πŸŒ«οΈ"]);
+    /// ```
+    fn graphemes_rev(self) -> RevRopeGraphemes<'a>;
 }
 
 impl<'a> RopeSliceExt<'a> for RopeSlice<'a> {
@@ -305,6 +334,32 @@
             }
         }
     }
+
+    fn graphemes(self) -> RopeGraphemes<'a> {
+        let mut chunks = self.chunks();
+        let first_chunk = chunks.next().unwrap_or("");
+        RopeGraphemes {
+            text: self,
+            chunks,
+            cur_chunk: first_chunk,
+            cur_chunk_start: 0,
+            cursor: GraphemeCursor::new(0, self.len_bytes(), true),
+        }
+    }
+
+    fn graphemes_rev(self) -> RevRopeGraphemes<'a> {
+        let (mut chunks, mut cur_chunk_start, _, _) = self.chunks_at_byte(self.len_bytes());
+        chunks.reverse();
+        let first_chunk = chunks.next().unwrap_or("");
+        cur_chunk_start -= first_chunk.len();
+        RevRopeGraphemes {
+            text: self,
+            chunks,
+            cur_chunk: first_chunk,
+            cur_chunk_start,
+            cursor: GraphemeCursor::new(self.len_bytes(), self.len_bytes(), true),
+        }
+    }
 }
 
 // copied from std
@@ -314,6 +369,130 @@
     (b as i8) >= -0x40
 }
 
+/// An iterator over the graphemes of a `RopeSlice`.
+#[derive(Clone)]
+pub struct RopeGraphemes<'a> {
+    text: RopeSlice<'a>,
+    chunks: Chunks<'a>,
+    cur_chunk: &'a str,
+    cur_chunk_start: usize,
+    cursor: GraphemeCursor,
+}
+
+impl fmt::Debug for RopeGraphemes<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("RopeGraphemes")
+            .field("text", &self.text)
+            .field("chunks", &self.chunks)
+            .field("cur_chunk", &self.cur_chunk)
+            .field("cur_chunk_start", &self.cur_chunk_start)
+            // .field("cursor", &self.cursor)
+            .finish()
+    }
+}
+
+impl<'a> Iterator for RopeGraphemes<'a> {
+    type Item = RopeSlice<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let a = self.cursor.cur_cursor();
+        let b;
+        loop {
+            match self
+                .cursor
+                .next_boundary(self.cur_chunk, self.cur_chunk_start)
+            {
+                Ok(None) => {
+                    return None;
+                }
+                Ok(Some(n)) => {
+                    b = n;
+                    break;
+                }
+                Err(GraphemeIncomplete::NextChunk) => {
+                    self.cur_chunk_start += self.cur_chunk.len();
+                    self.cur_chunk = self.chunks.next().unwrap_or("");
+                }
+                Err(GraphemeIncomplete::PreContext(idx)) => {
+                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
+                    self.cursor.provide_context(chunk, byte_idx);
+                }
+                _ => unreachable!(),
+            }
+        }
+
+        if a < self.cur_chunk_start {
+            Some(self.text.byte_slice(a..b))
+        } else {
+            let a2 = a - self.cur_chunk_start;
+            let b2 = b - self.cur_chunk_start;
+            Some((&self.cur_chunk[a2..b2]).into())
+        }
+    }
+}
+
+/// An iterator over the graphemes of a `RopeSlice` in reverse.
+#[derive(Clone)]
+pub struct RevRopeGraphemes<'a> {
+    text: RopeSlice<'a>,
+    chunks: Chunks<'a>,
+    cur_chunk: &'a str,
+    cur_chunk_start: usize,
+    cursor: GraphemeCursor,
+}
+
+impl fmt::Debug for RevRopeGraphemes<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("RevRopeGraphemes")
+            .field("text", &self.text)
+            .field("chunks", &self.chunks)
+            .field("cur_chunk", &self.cur_chunk)
+            .field("cur_chunk_start", &self.cur_chunk_start)
+            // .field("cursor", &self.cursor)
+            .finish()
+    }
+}
+
+impl<'a> Iterator for RevRopeGraphemes<'a> {
+    type Item = RopeSlice<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let a = self.cursor.cur_cursor();
+        let b;
+        loop {
+            match self
+                .cursor
+                .prev_boundary(self.cur_chunk, self.cur_chunk_start)
+            {
+                Ok(None) => {
+                    return None;
+                }
+                Ok(Some(n)) => {
+                    b = n;
+                    break;
+                }
+                Err(GraphemeIncomplete::PrevChunk) => {
+                    self.cur_chunk = self.chunks.next().unwrap_or("");
+                    self.cur_chunk_start -= self.cur_chunk.len();
+                }
+                Err(GraphemeIncomplete::PreContext(idx)) => {
+                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
+                    self.cursor.provide_context(chunk, byte_idx);
+                }
+                _ => unreachable!(),
+            }
+        }
+
+        if a >= self.cur_chunk_start + self.cur_chunk.len() {
+            Some(self.text.byte_slice(b..a))
+        } else {
+            let a2 = a - self.cur_chunk_start;
+            let b2 = b - self.cur_chunk_start;
+            Some((&self.cur_chunk[b2..a2]).into())
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use ropey::RopeSlice;
--- a/helix-term/src/commands.rs	Sun Jan 26 19:16:02 2025 -0500
+++ b/helix-term/src/commands.rs	Sun Jan 26 20:58:27 2025 -0500
@@ -20,7 +20,7 @@
     comment,
     doc_formatter::TextFormat,
     encoding, find_workspace,
-    graphemes::{self, next_grapheme_boundary, RevRopeGraphemes},
+    graphemes::{self, next_grapheme_boundary},
     history::UndoKind,
     increment,
     indent::{self, IndentStyle},
@@ -35,8 +35,8 @@
     text_annotations::{Overlay, TextAnnotations},
     textobject,
     unicode::width::UnicodeWidthChar,
-    visual_offset_from_block, Deletion, LineEnding, Position, Range, Rope, RopeGraphemes,
-    RopeReader, RopeSlice, Selection, SmallVec, Syntax, Tendril, Transaction,
+    visual_offset_from_block, Deletion, LineEnding, Position, Range, Rope, RopeReader, RopeSlice,
+    Selection, SmallVec, Syntax, Tendril, Transaction,
 };
 use helix_view::{
     document::{FormatterError, Mode, SCRATCH_BUFFER_NAME},
@@ -1681,10 +1681,12 @@
         if let Some(ch) = ch {
             let transaction = Transaction::change_by_selection(doc.text(), selection, |range| {
                 if !range.is_empty() {
-                    let text: Tendril =
-                        RopeGraphemes::new(doc.text().slice(range.from()..range.to()))
-                            .map(|_g| ch)
-                            .collect();
+                    let text: Tendril = doc
+                        .text()
+                        .slice(range.from()..range.to())
+                        .graphemes()
+                        .map(|_g| ch)
+                        .collect();
                     (range.from(), range.to(), Some(text))
                 } else {
                     // No change.
@@ -6574,7 +6576,9 @@
             // madeup of word characters. The latter condition is needed because
             // move_next_word_end simply treats a sequence of characters from
             // the same char class as a word so `=<` would also count as a word.
-            let add_label = RevRopeGraphemes::new(text.slice(..cursor_fwd.head))
+            let add_label = text
+                .slice(..cursor_fwd.head)
+                .graphemes_rev()
                 .take(2)
                 .take_while(|g| g.chars().all(char_is_word))
                 .count()
@@ -6600,7 +6604,9 @@
             // madeup of word characters. The latter condition is needed because
             // move_prev_word_start simply treats a sequence of characters from
             // the same char class as a word so `=<` would also count as a word.
-            let add_label = RopeGraphemes::new(text.slice(cursor_rev.head..))
+            let add_label = text
+                .slice(cursor_rev.head..)
+                .graphemes()
                 .take(2)
                 .take_while(|g| g.chars().all(char_is_word))
                 .count()