Mercurial > forks > helix
changeset 6792:176f68aecad1
syntax: Fix language detection by shebang
The switch to tree-house accidentally dropped some shebang parsing code
from the loader's function to detect by shebang. This change restores
that. The new code is slightly different as it's using a `regex_cursor`
regex on the Rope rather than eagerly converting the text to a
`Cow<str>` and running a regular regex across it.
author | Michael Davis <mcarsondavis@gmail.com> |
---|---|
date | Wed, 14 May 2025 16:29:27 -0400 |
parents | 4a705a983d38 |
children | 53795d518b8b |
files | helix-core/src/syntax.rs |
diffstat | 1 files changed, 17 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/helix-core/src/syntax.rs Wed May 14 10:52:00 2025 -0300 +++ b/helix-core/src/syntax.rs Wed May 14 16:29:27 2025 -0400 @@ -312,7 +312,22 @@ } pub fn language_for_shebang(&self, text: RopeSlice) -> Option<Language> { - let shebang: Cow<str> = text.into(); + // NOTE: this is slightly different than the one for injection markers in tree-house. It + // is anchored at the beginning. + use helix_stdx::rope::Regex; + use once_cell::sync::Lazy; + const SHEBANG: &str = r"^#!\s*(?:\S*[/\\](?:env\s+(?:\-\S+\s+)*)?)?([^\s\.\d]+)"; + static SHEBANG_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(SHEBANG).unwrap()); + + let marker = SHEBANG_REGEX + .captures_iter(regex_cursor::Input::new(text)) + .map(|cap| text.byte_slice(cap.get_group(1).unwrap().range())) + .next()?; + self.language_for_shebang_marker(marker) + } + + fn language_for_shebang_marker(&self, marker: RopeSlice) -> Option<Language> { + let shebang: Cow<str> = marker.into(); self.languages_by_shebang.get(shebang.as_ref()).copied() } @@ -351,7 +366,7 @@ let path: Cow<str> = text.into(); self.language_for_filename(Path::new(path.as_ref())) } - InjectionLanguageMarker::Shebang(text) => self.language_for_shebang(text), + InjectionLanguageMarker::Shebang(text) => self.language_for_shebang_marker(text), } }