changeset 6792:176f68aecad1

syntax: Fix language detection by shebang The switch to tree-house accidentally dropped some shebang parsing code from the loader's function to detect by shebang. This change restores that. The new code is slightly different as it's using a `regex_cursor` regex on the Rope rather than eagerly converting the text to a `Cow<str>` and running a regular regex across it.
author Michael Davis <mcarsondavis@gmail.com>
date Wed, 14 May 2025 16:29:27 -0400
parents 4a705a983d38
children 53795d518b8b
files helix-core/src/syntax.rs
diffstat 1 files changed, 17 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/helix-core/src/syntax.rs	Wed May 14 10:52:00 2025 -0300
+++ b/helix-core/src/syntax.rs	Wed May 14 16:29:27 2025 -0400
@@ -312,7 +312,22 @@
     }
 
     pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> {
-        let shebang: Cow<str> = text.into();
+        // NOTE: this is slightly different than the one for injection markers in tree-house. It
+        // is anchored at the beginning.
+        use helix_stdx::rope::Regex;
+        use once_cell::sync::Lazy;
+        const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)";
+        static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap());
+
+        let marker = SHEBANG_REGEX
+            .captures_iter(regex_cursor::Input::new(text))
+            .map(|cap| text.byte_slice(cap.get_group(1).unwrap().range()))
+            .next()?;
+        self.language_for_shebang_marker(marker)
+    }
+
+    fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> {
+        let shebang: Cow<str> = marker.into();
         self.languages_by_shebang.get(shebang.as_ref()).copied()
     }
 
@@ -351,7 +366,7 @@
                 let path: Cow<str> = text.into();
                 self.language_for_filename(Path::new(path.as_ref()))
             }
-            InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text),
+            InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text),
         }
     }