changeset 5800:749f00caf192

feat(cairo): update tree-sitter grammar and queries (#10919) * feat(cairo): update tree-sitter grammar and queries * fix suggestions
author Lucas @ StarkWare <70894690+LucasLvy@users.noreply.github.com>
date Wed, 12 Jun 2024 02:20:13 +0200
parents 2050725df3c5
children fe4a15d12cbc
files languages.toml runtime/queries/cairo/highlights.scm runtime/queries/cairo/indents.scm runtime/queries/cairo/injections.scm runtime/queries/cairo/locals.scm runtime/queries/cairo/textobjects.scm
diffstat 6 files changed, 583 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/languages.toml	Wed Jun 12 01:08:50 2024 +0900
+++ b/languages.toml	Wed Jun 12 02:20:13 2024 +0200
@@ -2074,9 +2074,12 @@
 comment-token = "//"
 indent = { tab-width = 4, unit = "    " }
 # auto-format = true
-grammar = "rust"
 language-servers = [ "cairo-language-server" ]
 
+[[grammar]]
+name = "cairo"
+source = { git = "https://github.com/starkware-libs/tree-sitter-cairo", rev = "0596baab741ffacdc65c761d5d5ffbbeae97f033" }
+
 [[language]]
 name = "cpon"
 scope = "scope.cpon"
--- a/runtime/queries/cairo/highlights.scm	Wed Jun 12 01:08:50 2024 +0900
+++ b/runtime/queries/cairo/highlights.scm	Wed Jun 12 02:20:13 2024 +0200
@@ -1,1 +1,362 @@
-; inherits: rust
+; -------
+; Tree-Sitter doesn't allow overrides in regards to captures,
+; though it is possible to affect the child node of a captured
+; node. Thus, the approach here is to flip the order so that
+; overrides are unnecessary.
+; -------
+
+; -------
+; Types
+; -------
+
+(type_parameters
+  (type_identifier) @type.parameter)
+(constrained_type_parameter
+  left: (type_identifier) @type.parameter)
+
+; ---
+; Primitives
+; ---
+
+(primitive_type) @type.builtin
+(boolean_literal) @constant.builtin.boolean
+(numeric_literal) @constant.numeric.integer
+[
+  (string_literal)
+  (shortstring_literal)
+] @string
+[
+  (line_comment)
+] @comment
+
+; ---
+; Extraneous
+; ---
+
+(enum_variant (identifier) @type.enum.variant)
+
+(field_initializer
+  (field_identifier) @variable.other.member)
+(shorthand_field_initializer
+  (identifier) @variable.other.member)
+(shorthand_field_identifier) @variable.other.member
+
+
+; ---
+; Punctuation
+; ---
+
+[
+  "::"
+  "."
+  ";"
+  ","
+] @punctuation.delimiter
+
+[
+  "("
+  ")"
+  "["
+  "]"
+  "{"
+  "}"
+] @punctuation.bracket
+(type_arguments
+  [
+    "<"
+    ">"
+  ] @punctuation.bracket)
+(type_parameters
+  [
+    "<"
+    ">"
+  ] @punctuation.bracket)
+
+; ---
+; Variables
+; ---
+
+(let_declaration
+  pattern: [
+    ((identifier) @variable)
+    ((tuple_pattern
+      (identifier) @variable))
+  ])
+  
+; It needs to be anonymous to not conflict with `call_expression` further below. 
+(_
+ value: (field_expression
+  value: (identifier)? @variable
+  field: (field_identifier) @variable.other.member))
+
+(parameter
+	pattern: (identifier) @variable.parameter)
+
+; -------
+; Keywords
+; -------
+[
+  "match"
+  "if"
+  "else"
+] @keyword.control.conditional
+
+[
+  "while"
+  "loop"
+] @keyword.control.repeat
+
+[
+  "break"
+  "continue"
+  "return"
+] @keyword.control.return
+
+"use" @keyword.control.import
+(mod_item "mod" @keyword.control.import !body)
+(use_as_clause "as" @keyword.control.import)
+
+
+[
+  (crate)
+  (super)
+  "as"
+  "pub"
+  "mod"
+  (extern)
+  (nopanic)
+
+  "impl"
+  "trait"
+  "of"
+
+  "default"
+] @keyword
+
+[
+  "struct"
+  "enum"
+  "type"
+] @keyword.storage.type
+
+"let" @keyword.storage
+"fn" @keyword.function
+
+(mutable_specifier) @keyword.storage.modifier.mut
+(ref_specifier) @keyword.storage.modifier.ref
+
+(snapshot_type "@" @keyword.storage.modifier.ref)
+
+[
+  "const"
+  "ref"
+] @keyword.storage.modifier
+
+; TODO: variable.mut to highlight mutable identifiers via locals.scm
+
+; -------
+; Constructors
+; -------
+; TODO: this is largely guesswork, remove it once we get actual info from locals.scm or r-a
+
+(struct_expression
+  name: (type_identifier) @constructor)
+
+(tuple_enum_pattern
+  type: [
+    (identifier) @constructor
+    (scoped_identifier
+      name: (identifier) @constructor)
+  ])
+(struct_pattern
+  type: [
+    ((type_identifier) @constructor)
+    (scoped_type_identifier
+      name: (type_identifier) @constructor)
+  ])
+(match_pattern
+  ((identifier) @constructor) (#match? @constructor "^[A-Z]"))
+(or_pattern
+  ((identifier) @constructor)
+  ((identifier) @constructor)
+  (#match? @constructor "^[A-Z]"))
+
+; -------
+; Guess Other Types
+; -------
+
+((identifier) @constant
+ (#match? @constant "^[A-Z][A-Z\\d_]*$"))
+
+; ---
+; PascalCase identifiers in call_expressions (e.g. `Ok()`)
+; are assumed to be enum constructors.
+; ---
+
+(call_expression
+  function: [
+    ((identifier) @constructor
+      (#match? @constructor "^[A-Z]"))
+    (scoped_identifier
+      name: ((identifier) @constructor
+        (#match? @constructor "^[A-Z]")))
+  ])
+
+; ---
+; PascalCase identifiers under a path which is also PascalCase
+; are assumed to be constructors if they have methods or fields.
+; ---
+
+(field_expression
+  value: (scoped_identifier
+    path: [
+      (identifier) @type
+      (scoped_identifier
+        name: (identifier) @type)
+    ]
+    name: (identifier) @constructor
+      (#match? @type "^[A-Z]")
+      (#match? @constructor "^[A-Z]")))
+
+; ---
+; Other PascalCase identifiers are assumed to be structs.
+; ---
+
+((identifier) @type
+  (#match? @type "^[A-Z]"))
+
+; -------
+; Functions
+; -------
+
+(call_expression
+  function: [
+    ((identifier) @function)
+    (scoped_identifier
+      name: (identifier) @function)
+    (field_expression
+      field: (field_identifier) @function)
+  ])
+(generic_function
+  function: [
+    ((identifier) @function)
+    (scoped_identifier
+      name: (identifier) @function)
+    (field_expression
+      field: (field_identifier) @function.method)
+  ])
+(function_item
+  (function
+    name: (identifier) @function))
+
+(function_signature_item
+  (function
+    name: (identifier) @function))
+
+(external_function_item
+  (function
+    name: (identifier) @function))
+
+; ---
+; Macros
+; ---
+
+(attribute
+  (identifier) @special
+  arguments: (token_tree (identifier) @type)
+  (#eq? @special "derive")
+)
+
+(attribute
+  (identifier) @function.macro)
+(attribute
+  [
+    (identifier) @function.macro
+    (scoped_identifier
+      name: (identifier) @function.macro)
+  ]
+  (token_tree (identifier) @function.macro)?)
+
+(inner_attribute_item) @attribute
+
+(macro_invocation
+  macro: [
+    ((identifier) @function.macro)
+    (scoped_identifier
+      name: (identifier) @function.macro)
+  ]
+  "!" @function.macro)
+
+
+; -------
+; Operators
+; -------
+
+[
+  "*"
+  "->"
+  "=>"
+  "<="
+  "="
+  "=="
+  "!"
+  "!="
+  "%"
+  "%="
+  "@"
+  "&&"
+  "|"
+  "||"
+  "^"
+  "*"
+  "*="
+  "-"
+  "-="
+  "+"
+  "+="
+  "/"
+  "/="
+  ">"
+  "<"
+  ">="
+  ">>"
+  "<<"
+] @operator
+
+; -------
+; Paths
+; -------
+
+(use_declaration
+  argument: (identifier) @namespace)
+(use_wildcard
+  (identifier) @namespace)
+(mod_item
+  name: (identifier) @namespace)
+(scoped_use_list
+  path: (identifier)? @namespace)
+(use_list
+  (identifier) @namespace)
+(use_as_clause
+  path: (identifier)? @namespace
+  alias: (identifier) @namespace)
+
+; ---
+; Remaining Paths
+; ---
+
+(scoped_identifier
+  path: (identifier)? @namespace
+  name: (identifier) @namespace)
+(scoped_type_identifier
+  path: (identifier) @namespace)
+
+; -------
+; Remaining Identifiers
+; -------
+
+"?" @special
+
+(type_identifier) @type
+(identifier) @variable
+(field_identifier) @variable.other.member
--- a/runtime/queries/cairo/indents.scm	Wed Jun 12 01:08:50 2024 +0900
+++ b/runtime/queries/cairo/indents.scm	Wed Jun 12 02:20:13 2024 +0200
@@ -1,1 +1,118 @@
-; inherits: rust
+[
+  (use_list)
+  (block)
+  (match_block)
+  (arguments)
+  (parameters)
+  (declaration_list)
+  (field_declaration_list)
+  (field_initializer_list)
+  (struct_pattern)
+  (tuple_pattern)
+  (unit_expression)
+  (enum_variant_list)
+  (call_expression)
+  (binary_expression)
+  (field_expression)
+  (tuple_expression)
+  (array_expression)
+
+  (token_tree)
+] @indent
+
+[
+  "}"
+  "]"
+  ")"
+] @outdent
+
+; Indent the right side of assignments.
+; The #not-same-line? predicate is required to prevent an extra indent for e.g.
+; an else-clause where the previous if-clause starts on the same line as the assignment.
+(assignment_expression
+  .
+  (_) @expr-start
+  right: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+(compound_assignment_expr
+  .
+  (_) @expr-start
+  right: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+(let_declaration
+  "let" @expr-start
+  value: (_) @indent
+  alternative: (_)? @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+(let_condition
+  .
+  (_) @expr-start
+  value: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+(if_expression
+  .
+  (_) @expr-start
+  condition: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+(field_pattern
+  .
+  (_) @expr-start
+  pattern: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+; Indent type aliases that span multiple lines, similar to
+; regular assignment expressions
+(type_item
+  .
+  (_) @expr-start
+  type: (_) @indent
+  (#not-same-line? @indent @expr-start)
+  (#set! "scope" "all")
+)
+
+; Some field expressions where the left part is a multiline expression are not
+; indented by cargo fmt.
+; Because this multiline expression might be nested in an arbitrary number of
+; field expressions, this can only be matched using a Regex.
+(field_expression
+  value: (_) @val
+  "." @outdent
+  ; Check whether the first line ends with `(`, `{` or `[` (up to whitespace).
+  (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))")
+)
+; Same as above, but with an additional `call_expression`. This is required since otherwise
+; the arguments of the function call won't be outdented.
+(call_expression
+  function: (field_expression
+    value: (_) @val
+    "." @outdent
+    (#match? @val "(\\A[^\\n\\r]+(\\(|\\{|\\[)[\\t ]*(\\n|\\r))")
+  )
+  arguments: (_) @outdent
+)
+
+
+; Indent if guards in patterns.
+; Since the tree-sitter grammar doesn't create a node for the if expression,
+; it's not possible to do this correctly in all cases. Indenting the tail of the
+; whole pattern whenever it contains an `if` only fails if the `if` appears after
+; the second line of the pattern (which should only rarely be the case)
+(match_pattern
+  .
+  (_) @expr-start
+  "if" @pattern-guard
+  (#not-same-line? @expr-start @pattern-guard)
+) @indent
+
+  
--- a/runtime/queries/cairo/injections.scm	Wed Jun 12 01:08:50 2024 +0900
+++ b/runtime/queries/cairo/injections.scm	Wed Jun 12 02:20:13 2024 +0200
@@ -1,3 +1,3 @@
-([(line_comment) (block_comment)] @injection.content
+([(line_comment)] @injection.content
  (#set! injection.language "comment"))
 
--- a/runtime/queries/cairo/locals.scm	Wed Jun 12 01:08:50 2024 +0900
+++ b/runtime/queries/cairo/locals.scm	Wed Jun 12 02:20:13 2024 +0200
@@ -1,1 +1,25 @@
-; inherits: rust
+; Scopes
+
+[
+  (function_item)
+  (struct_item)
+  (enum_item)
+  (type_item)
+  (trait_item)
+  (impl_item)
+  (block)
+] @local.scope
+
+; Definitions
+
+(parameter
+  (identifier) @local.definition)
+
+(type_parameters
+  (type_identifier) @local.definition)
+(constrained_type_parameter
+  left: (type_identifier) @local.definition)
+
+; References
+(identifier) @local.reference
+(type_identifier) @local.reference
--- a/runtime/queries/cairo/textobjects.scm	Wed Jun 12 01:08:50 2024 +0900
+++ b/runtime/queries/cairo/textobjects.scm	Wed Jun 12 02:20:13 2024 +0200
@@ -1,1 +1,73 @@
-; inherits: rust
+(function_item
+  body: (_) @function.inside) @function.around
+
+(struct_item
+  body: (_) @class.inside) @class.around
+
+(enum_item
+  body: (_) @class.inside) @class.around
+
+(trait_item
+  body: (_) @class.inside) @class.around
+
+(impl_item
+  body: (_) @class.inside) @class.around
+
+(parameters 
+  ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(type_parameters
+  ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(type_arguments
+  ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(arguments
+  ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+(field_initializer_list  
+  ((_) @parameter.inside . ","? @parameter.around) @parameter.around)
+
+[
+  (line_comment)
+] @comment.inside
+
+(line_comment)+ @comment.around
+
+(; #[test]
+ (attribute_item
+   (attribute
+     (identifier) @_test_attribute))
+ ; allow other attributes like #[should_panic] and comments
+ [
+   (attribute_item)
+   (line_comment)
+ ]*
+ ; the test function
+ (function_item
+   body: (_) @test.inside) @test.around
+ (#eq? @_test_attribute "test"))
+
+(array_expression
+  (_) @entry.around)
+
+(tuple_expression
+  (_) @entry.around)
+
+(tuple_pattern
+  (_) @entry.around)
+
+; Commonly used vec macro intializer is special cased
+(macro_invocation
+  (identifier) @_id (token_tree (_) @entry.around)
+  (#eq? @_id "array"))
+
+(enum_variant) @entry.around
+
+(field_declaration
+  (_) @entry.inside) @entry.around
+
+(field_initializer
+  (_) @entry.inside) @entry.around
+
+(shorthand_field_initializer) @entry.around