mirror of
https://github.com/Aider-AI/aider.git
synced 2025-05-31 09:44:59 +00:00
Merge branch 'sitter-map'
This commit is contained in:
commit
815fdaabc3
28 changed files with 1074 additions and 288 deletions
|
@ -8,7 +8,7 @@ GPT to edit the code and your own editor to make changes yourself.
|
|||
Aider makes sure edits from you and GPT are
|
||||
[committed to git](https://aider.chat/docs/faq.html#how-does-aider-use-git)
|
||||
with sensible commit messages.
|
||||
Aider is unique in that it [works well with pre-existing, larger codebases](https://aider.chat/docs/ctags.html).
|
||||
Aider is unique in that it [works well with pre-existing, larger codebases](https://aider.chat/docs/repomap.html).
|
||||
|
||||
<p align="center">
|
||||
<img src="assets/screencast.svg" alt="aider screencast">
|
||||
|
@ -61,7 +61,7 @@ Here are some example transcripts that show how you can chat with `aider` to wri
|
|||
* [**Complex Multi-file Change with Debugging**](https://aider.chat/examples/complex-change.html): GPT makes a complex code change that is coordinated across multiple source files, and resolves bugs by reviewing error output and doc snippets.
|
||||
|
||||
* [**Create a Black Box Test Case**](https://aider.chat/examples/add-test.html): GPT creates a "black box" test case without access to the source of the method being tested, using only a
|
||||
[high level map of the repository based on ctags](https://aider.chat/docs/ctags.html).
|
||||
[high level map of the repository based on tree-sitter](https://aider.chat/docs/repomap.html).
|
||||
|
||||
You can find more chat transcripts on the [examples page](https://aider.chat/examples/).
|
||||
|
||||
|
@ -73,7 +73,7 @@ You can find more chat transcripts on the [examples page](https://aider.chat/exa
|
|||
* Aider will apply the edits suggested by GPT directly to your source files.
|
||||
* Aider will [automatically commit each changeset to your local git repo](https://aider.chat/docs/faq.html#how-does-aider-use-git) with a descriptive commit message. These frequent, automatic commits provide a safety net. It's easy to undo changes or use standard git workflows to manage longer sequences of changes.
|
||||
* You can use aider with multiple source files at once, so GPT can make coordinated code changes across all of them in a single changeset/commit.
|
||||
* Aider can [give *GPT-4* a map of your entire git repo](https://aider.chat/docs/ctags.html), which helps it understand and modify large codebases.
|
||||
* Aider can [give *GPT-4* a map of your entire git repo](https://aider.chat/docs/repomap.html), which helps it understand and modify large codebases.
|
||||
* You can also edit files by hand using your editor while chatting with aider. Aider will notice these out-of-band edits and keep GPT up to date with the latest versions of your files. This lets you bounce back and forth between the aider chat and your editor, to collaboratively code with GPT.
|
||||
|
||||
|
||||
|
@ -134,7 +134,7 @@ Aider has some ability to help GPT figure out which files to edit all by itself,
|
|||
* Use Meta-ENTER (Esc+ENTER in some environments) to enter multiline chat messages. Or enter `{` alone on the first line to start a multiline message and `}` alone on the last line to end it.
|
||||
* If your code is throwing an error, share the error output with GPT using `/run` or by pasting it into the chat. Let GPT figure out and fix the bug.
|
||||
* GPT knows about a lot of standard tools and libraries, but may get some of the fine details wrong about APIs and function arguments. You can paste doc snippets into the chat to resolve these issues.
|
||||
* GPT can only see the content of the files you specifically "add to the chat". Aider also sends GPT-4 a [map of your entire git repo](https://aider.chat/docs/ctags.html). So GPT may ask to see additional files if it feels that's needed for your requests.
|
||||
* GPT can only see the content of the files you specifically "add to the chat". Aider also sends GPT-4 a [map of your entire git repo](https://aider.chat/docs/repomap.html). So GPT may ask to see additional files if it feels that's needed for your requests.
|
||||
* I also shared some general [GPT coding tips on Hacker News](https://news.ycombinator.com/item?id=36211879).
|
||||
|
||||
|
||||
|
|
|
@ -177,17 +177,10 @@ class Coder:
|
|||
self.verbose,
|
||||
)
|
||||
|
||||
if self.repo_map.use_ctags:
|
||||
self.io.tool_output(f"Repo-map: universal-ctags using {map_tokens} tokens")
|
||||
elif not self.repo_map.has_ctags and map_tokens > 0:
|
||||
self.io.tool_output(
|
||||
f"Repo-map: basic using {map_tokens} tokens"
|
||||
f" ({self.repo_map.ctags_disabled_reason})"
|
||||
)
|
||||
else:
|
||||
self.io.tool_output("Repo-map: disabled because map_tokens == 0")
|
||||
if map_tokens > 0:
|
||||
self.io.tool_output(f"Repo-map: using {map_tokens} tokens")
|
||||
else:
|
||||
self.io.tool_output("Repo-map: disabled")
|
||||
self.io.tool_output("Repo-map: disabled because map_tokens == 0")
|
||||
|
||||
for fname in self.get_inchat_relative_files():
|
||||
self.io.tool_output(f"Added {fname} to the chat.")
|
||||
|
|
46
aider/queries/tree-sitter-c-sharp-tags.scm
Normal file
46
aider/queries/tree-sitter-c-sharp-tags.scm
Normal file
|
@ -0,0 +1,46 @@
|
|||
(class_declaration
|
||||
name: (identifier) @name.definition.class
|
||||
) @definition.class
|
||||
|
||||
(class_declaration
|
||||
bases: (base_list (_) @name.reference.class)
|
||||
) @reference.class
|
||||
|
||||
(interface_declaration
|
||||
name: (identifier) @name.definition.interface
|
||||
) @definition.interface
|
||||
|
||||
(interface_declaration
|
||||
bases: (base_list (_) @name.reference.interface)
|
||||
) @reference.interface
|
||||
|
||||
(method_declaration
|
||||
name: (identifier) @name.definition.method
|
||||
) @definition.method
|
||||
|
||||
(object_creation_expression
|
||||
type: (identifier) @name.reference.class
|
||||
) @reference.class
|
||||
|
||||
(type_parameter_constraints_clause
|
||||
target: (identifier) @name.reference.class
|
||||
) @reference.class
|
||||
|
||||
(type_constraint
|
||||
type: (identifier) @name.reference.class
|
||||
) @reference.class
|
||||
|
||||
(variable_declaration
|
||||
type: (identifier) @name.reference.class
|
||||
) @reference.class
|
||||
|
||||
(invocation_expression
|
||||
function:
|
||||
(member_access_expression
|
||||
name: (identifier) @name.reference.send
|
||||
)
|
||||
) @reference.send
|
||||
|
||||
(namespace_declaration
|
||||
name: (identifier) @name.definition.module
|
||||
) @definition.module
|
9
aider/queries/tree-sitter-c-tags.scm
Normal file
9
aider/queries/tree-sitter-c-tags.scm
Normal file
|
@ -0,0 +1,9 @@
|
|||
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
|
||||
|
||||
(declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
|
||||
|
||||
(function_declarator declarator: (identifier) @name.definition.function) @definition.function
|
||||
|
||||
(type_definition declarator: (type_identifier) @name.definition.type) @definition.type
|
||||
|
||||
(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
|
15
aider/queries/tree-sitter-cpp-tags.scm
Normal file
15
aider/queries/tree-sitter-cpp-tags.scm
Normal file
|
@ -0,0 +1,15 @@
|
|||
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
|
||||
|
||||
(declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
|
||||
|
||||
(function_declarator declarator: (identifier) @name.definition.function) @definition.function
|
||||
|
||||
(function_declarator declarator: (field_identifier) @name.definition.function) @definition.function
|
||||
|
||||
(function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @scope name: (identifier) @name.definition.method)) @definition.method
|
||||
|
||||
(type_definition declarator: (type_identifier) @name.definition.type) @definition.type
|
||||
|
||||
(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
|
||||
|
||||
(class_specifier name: (type_identifier) @name.definition.class) @definition.class
|
5
aider/queries/tree-sitter-elisp-tags.scm
Normal file
5
aider/queries/tree-sitter-elisp-tags.scm
Normal file
|
@ -0,0 +1,5 @@
|
|||
;; defun/defsubst
|
||||
(function_definition name: (symbol) @name.definition.function) @definition.function
|
||||
|
||||
;; Treat macros as function definitions for the sake of TAGS.
|
||||
(macro_definition name: (symbol) @name.definition.function) @definition.function
|
54
aider/queries/tree-sitter-elixir-tags.scm
Normal file
54
aider/queries/tree-sitter-elixir-tags.scm
Normal file
|
@ -0,0 +1,54 @@
|
|||
; Definitions
|
||||
|
||||
; * modules and protocols
|
||||
(call
|
||||
target: (identifier) @ignore
|
||||
(arguments (alias) @name.definition.module)
|
||||
(#match? @ignore "^(defmodule|defprotocol)$")) @definition.module
|
||||
|
||||
; * functions/macros
|
||||
(call
|
||||
target: (identifier) @ignore
|
||||
(arguments
|
||||
[
|
||||
; zero-arity functions with no parentheses
|
||||
(identifier) @name.definition.function
|
||||
; regular function clause
|
||||
(call target: (identifier) @name.definition.function)
|
||||
; function clause with a guard clause
|
||||
(binary_operator
|
||||
left: (call target: (identifier) @name.definition.function)
|
||||
operator: "when")
|
||||
])
|
||||
(#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp)$")) @definition.function
|
||||
|
||||
; References
|
||||
|
||||
; ignore calls to kernel/special-forms keywords
|
||||
(call
|
||||
target: (identifier) @ignore
|
||||
(#match? @ignore "^(def|defp|defdelegate|defguard|defguardp|defmacro|defmacrop|defn|defnp|defmodule|defprotocol|defimpl|defstruct|defexception|defoverridable|alias|case|cond|else|for|if|import|quote|raise|receive|require|reraise|super|throw|try|unless|unquote|unquote_splicing|use|with)$"))
|
||||
|
||||
; ignore module attributes
|
||||
(unary_operator
|
||||
operator: "@"
|
||||
operand: (call
|
||||
target: (identifier) @ignore))
|
||||
|
||||
; * function call
|
||||
(call
|
||||
target: [
|
||||
; local
|
||||
(identifier) @name.reference.call
|
||||
; remote
|
||||
(dot
|
||||
right: (identifier) @name.reference.call)
|
||||
]) @reference.call
|
||||
|
||||
; * pipe into function call
|
||||
(binary_operator
|
||||
operator: "|>"
|
||||
right: (identifier) @name.reference.call) @reference.call
|
||||
|
||||
; * modules
|
||||
(alias) @name.reference.module @reference.module
|
19
aider/queries/tree-sitter-elm-tags.scm
Normal file
19
aider/queries/tree-sitter-elm-tags.scm
Normal file
|
@ -0,0 +1,19 @@
|
|||
(value_declaration (function_declaration_left (lower_case_identifier) @name.definition.function)) @definition.function
|
||||
|
||||
(function_call_expr (value_expr (value_qid) @name.reference.function)) @reference.function
|
||||
(exposed_value (lower_case_identifier) @name.reference.function)) @reference.function
|
||||
(type_annotation ((lower_case_identifier) @name.reference.function) (colon)) @reference.function
|
||||
|
||||
(type_declaration ((upper_case_identifier) @name.definition.type) ) @definition.type
|
||||
|
||||
(type_ref (upper_case_qid (upper_case_identifier) @name.reference.type)) @reference.type
|
||||
(exposed_type (upper_case_identifier) @name.reference.type)) @reference.type
|
||||
|
||||
(type_declaration (union_variant (upper_case_identifier) @name.definition.union)) @definition.union
|
||||
|
||||
(value_expr (upper_case_qid (upper_case_identifier) @name.reference.union)) @reference.union
|
||||
|
||||
|
||||
(module_declaration
|
||||
(upper_case_qid (upper_case_identifier)) @name.definition.module
|
||||
) @definition.module
|
30
aider/queries/tree-sitter-go-tags.scm
Normal file
30
aider/queries/tree-sitter-go-tags.scm
Normal file
|
@ -0,0 +1,30 @@
|
|||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
(function_declaration
|
||||
name: (identifier) @name.definition.function) @definition.function
|
||||
(#strip! @doc "^//\\s*")
|
||||
(#set-adjacent! @doc @definition.function)
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
(method_declaration
|
||||
name: (field_identifier) @name.definition.method) @definition.method
|
||||
(#strip! @doc "^//\\s*")
|
||||
(#set-adjacent! @doc @definition.method)
|
||||
)
|
||||
|
||||
(call_expression
|
||||
function: [
|
||||
(identifier) @name.reference.call
|
||||
(parenthesized_expression (identifier) @name.reference.call)
|
||||
(selector_expression field: (field_identifier) @name.reference.call)
|
||||
(parenthesized_expression (selector_expression field: (field_identifier) @name.reference.call))
|
||||
]) @reference.call
|
||||
|
||||
(type_spec
|
||||
name: (type_identifier) @name.definition.type) @definition.type
|
||||
|
||||
(type_identifier) @name.reference.type @reference.type
|
20
aider/queries/tree-sitter-java-tags.scm
Normal file
20
aider/queries/tree-sitter-java-tags.scm
Normal file
|
@ -0,0 +1,20 @@
|
|||
(class_declaration
|
||||
name: (identifier) @name.definition.class) @definition.class
|
||||
|
||||
(method_declaration
|
||||
name: (identifier) @name.definition.method) @definition.method
|
||||
|
||||
(method_invocation
|
||||
name: (identifier) @name.reference.call
|
||||
arguments: (argument_list) @reference.call)
|
||||
|
||||
(interface_declaration
|
||||
name: (identifier) @name.definition.interface) @definition.interface
|
||||
|
||||
(type_list
|
||||
(type_identifier) @name.reference.implementation) @reference.implementation
|
||||
|
||||
(object_creation_expression
|
||||
type: (type_identifier) @name.reference.class) @reference.class
|
||||
|
||||
(superclass (type_identifier) @name.reference.class) @reference.class
|
88
aider/queries/tree-sitter-javascript-tags.scm
Normal file
88
aider/queries/tree-sitter-javascript-tags.scm
Normal file
|
@ -0,0 +1,88 @@
|
|||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
(method_definition
|
||||
name: (property_identifier) @name.definition.method) @definition.method
|
||||
(#not-eq? @name.definition.method "constructor")
|
||||
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
|
||||
(#select-adjacent! @doc @definition.method)
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
[
|
||||
(class
|
||||
name: (_) @name.definition.class)
|
||||
(class_declaration
|
||||
name: (_) @name.definition.class)
|
||||
] @definition.class
|
||||
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
|
||||
(#select-adjacent! @doc @definition.class)
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
[
|
||||
(function
|
||||
name: (identifier) @name.definition.function)
|
||||
(function_declaration
|
||||
name: (identifier) @name.definition.function)
|
||||
(generator_function
|
||||
name: (identifier) @name.definition.function)
|
||||
(generator_function_declaration
|
||||
name: (identifier) @name.definition.function)
|
||||
] @definition.function
|
||||
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
|
||||
(#select-adjacent! @doc @definition.function)
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
(lexical_declaration
|
||||
(variable_declarator
|
||||
name: (identifier) @name.definition.function
|
||||
value: [(arrow_function) (function)]) @definition.function)
|
||||
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
|
||||
(#select-adjacent! @doc @definition.function)
|
||||
)
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
(variable_declaration
|
||||
(variable_declarator
|
||||
name: (identifier) @name.definition.function
|
||||
value: [(arrow_function) (function)]) @definition.function)
|
||||
(#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$")
|
||||
(#select-adjacent! @doc @definition.function)
|
||||
)
|
||||
|
||||
(assignment_expression
|
||||
left: [
|
||||
(identifier) @name.definition.function
|
||||
(member_expression
|
||||
property: (property_identifier) @name.definition.function)
|
||||
]
|
||||
right: [(arrow_function) (function)]
|
||||
) @definition.function
|
||||
|
||||
(pair
|
||||
key: (property_identifier) @name.definition.function
|
||||
value: [(arrow_function) (function)]) @definition.function
|
||||
|
||||
(
|
||||
(call_expression
|
||||
function: (identifier) @name.reference.call) @reference.call
|
||||
(#not-match? @name.reference.call "^(require)$")
|
||||
)
|
||||
|
||||
(call_expression
|
||||
function: (member_expression
|
||||
property: (property_identifier) @name.reference.call)
|
||||
arguments: (_) @reference.call)
|
||||
|
||||
(new_expression
|
||||
constructor: (_) @name.reference.class) @reference.class
|
116
aider/queries/tree-sitter-ocaml-tags.scm
Normal file
116
aider/queries/tree-sitter-ocaml-tags.scm
Normal file
|
@ -0,0 +1,116 @@
|
|||
; Modules
|
||||
;--------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(module_definition (module_binding (module_name) @name.definition.module) @definition.module)
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
(module_path (module_name) @name.reference.module) @reference.module
|
||||
|
||||
; Modules types
|
||||
;--------------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(module_type_definition (module_type_name) @name.definition.interface) @definition.interface
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
(module_type_path (module_type_name) @name.reference.implementation) @reference.implementation
|
||||
|
||||
; Functions
|
||||
;----------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(value_definition
|
||||
[
|
||||
(let_binding
|
||||
pattern: (value_name) @name.definition.function
|
||||
(parameter))
|
||||
(let_binding
|
||||
pattern: (value_name) @name.definition.function
|
||||
body: [(fun_expression) (function_expression)])
|
||||
] @definition.function
|
||||
)
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(external (value_name) @name.definition.function) @definition.function
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
(application_expression
|
||||
function: (value_path (value_name) @name.reference.call)) @reference.call
|
||||
|
||||
(infix_expression
|
||||
left: (value_path (value_name) @name.reference.call)
|
||||
(infix_operator) @reference.call
|
||||
(#eq? @reference.call "@@"))
|
||||
|
||||
(infix_expression
|
||||
(infix_operator) @reference.call
|
||||
right: (value_path (value_name) @name.reference.call)
|
||||
(#eq? @reference.call "|>"))
|
||||
|
||||
; Operator
|
||||
;---------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(value_definition
|
||||
(let_binding
|
||||
pattern: (parenthesized_operator [
|
||||
(prefix_operator)
|
||||
(infix_operator)
|
||||
(hash_operator)
|
||||
(indexing_operator)
|
||||
(let_operator)
|
||||
(and_operator)
|
||||
(match_operator)
|
||||
] @name.definition.function)) @definition.function)
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
[
|
||||
(prefix_operator)
|
||||
(sign_operator)
|
||||
(infix_operator)
|
||||
(hash_operator)
|
||||
(indexing_operator)
|
||||
(let_operator)
|
||||
(and_operator)
|
||||
(match_operator)
|
||||
] @name.reference.call @reference.call
|
||||
|
||||
; Classes
|
||||
;--------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
[
|
||||
(class_definition (class_binding (class_name) @name.definition.class) @definition.class)
|
||||
(class_type_definition (class_type_binding (class_type_name) @name.definition.class) @definition.class)
|
||||
]
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
[
|
||||
(class_path (class_name) @name.reference.class)
|
||||
(class_type_path (class_type_name) @name.reference.class)
|
||||
] @reference.class
|
||||
|
||||
; Methods
|
||||
;--------
|
||||
|
||||
(
|
||||
(comment)? @doc .
|
||||
(method_definition (method_name) @name.definition.method) @definition.method
|
||||
(#strip! @doc "^\\(\\*\\*?\\s*|\\s\\*\\)$")
|
||||
)
|
||||
|
||||
(method_invocation (method_name) @name.reference.call) @reference.call
|
26
aider/queries/tree-sitter-php-tags.scm
Normal file
26
aider/queries/tree-sitter-php-tags.scm
Normal file
|
@ -0,0 +1,26 @@
|
|||
(class_declaration
|
||||
name: (name) @name.definition.class) @definition.class
|
||||
|
||||
(function_definition
|
||||
name: (name) @name.definition.function) @definition.function
|
||||
|
||||
(method_declaration
|
||||
name: (name) @name.definition.function) @definition.function
|
||||
|
||||
(object_creation_expression
|
||||
[
|
||||
(qualified_name (name) @name.reference.class)
|
||||
(variable_name (name) @name.reference.class)
|
||||
]) @reference.class
|
||||
|
||||
(function_call_expression
|
||||
function: [
|
||||
(qualified_name (name) @name.reference.call)
|
||||
(variable_name (name)) @name.reference.call
|
||||
]) @reference.call
|
||||
|
||||
(scoped_call_expression
|
||||
name: (name) @name.reference.call) @reference.call
|
||||
|
||||
(member_call_expression
|
||||
name: (name) @name.reference.call) @reference.call
|
12
aider/queries/tree-sitter-python-tags.scm
Normal file
12
aider/queries/tree-sitter-python-tags.scm
Normal file
|
@ -0,0 +1,12 @@
|
|||
(class_definition
|
||||
name: (identifier) @name.definition.class) @definition.class
|
||||
|
||||
(function_definition
|
||||
name: (identifier) @name.definition.function) @definition.function
|
||||
|
||||
(call
|
||||
function: [
|
||||
(identifier) @name.reference.call
|
||||
(attribute
|
||||
attribute: (identifier) @name.reference.call)
|
||||
]) @reference.call
|
26
aider/queries/tree-sitter-ql-tags.scm
Normal file
26
aider/queries/tree-sitter-ql-tags.scm
Normal file
|
@ -0,0 +1,26 @@
|
|||
(classlessPredicate
|
||||
name: (predicateName) @name.definition.function) @definition.function
|
||||
|
||||
(memberPredicate
|
||||
name: (predicateName) @name.definition.method) @definition.method
|
||||
|
||||
(aritylessPredicateExpr
|
||||
name: (literalId) @name.reference.call) @reference.call
|
||||
|
||||
(module
|
||||
name: (moduleName) @name.definition.module) @definition.module
|
||||
|
||||
(dataclass
|
||||
name: (className) @name.definition.class) @definition.class
|
||||
|
||||
(datatype
|
||||
name: (className) @name.definition.class) @definition.class
|
||||
|
||||
(datatypeBranch
|
||||
name: (className) @name.definition.class) @definition.class
|
||||
|
||||
(qualifiedRhs
|
||||
name: (predicateName) @name.reference.call) @reference.call
|
||||
|
||||
(typeExpr
|
||||
name: (className) @name.reference.type) @reference.type
|
0
aider/queries/tree-sitter-r-tags.scm
Normal file
0
aider/queries/tree-sitter-r-tags.scm
Normal file
64
aider/queries/tree-sitter-ruby-tags.scm
Normal file
64
aider/queries/tree-sitter-ruby-tags.scm
Normal file
|
@ -0,0 +1,64 @@
|
|||
; Method definitions
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
[
|
||||
(method
|
||||
name: (_) @name.definition.method) @definition.method
|
||||
(singleton_method
|
||||
name: (_) @name.definition.method) @definition.method
|
||||
]
|
||||
(#strip! @doc "^#\\s*")
|
||||
(#select-adjacent! @doc @definition.method)
|
||||
)
|
||||
|
||||
(alias
|
||||
name: (_) @name.definition.method) @definition.method
|
||||
|
||||
(setter
|
||||
(identifier) @ignore)
|
||||
|
||||
; Class definitions
|
||||
|
||||
(
|
||||
(comment)* @doc
|
||||
.
|
||||
[
|
||||
(class
|
||||
name: [
|
||||
(constant) @name.definition.class
|
||||
(scope_resolution
|
||||
name: (_) @name.definition.class)
|
||||
]) @definition.class
|
||||
(singleton_class
|
||||
value: [
|
||||
(constant) @name.definition.class
|
||||
(scope_resolution
|
||||
name: (_) @name.definition.class)
|
||||
]) @definition.class
|
||||
]
|
||||
(#strip! @doc "^#\\s*")
|
||||
(#select-adjacent! @doc @definition.class)
|
||||
)
|
||||
|
||||
; Module definitions
|
||||
|
||||
(
|
||||
(module
|
||||
name: [
|
||||
(constant) @name.definition.module
|
||||
(scope_resolution
|
||||
name: (_) @name.definition.module)
|
||||
]) @definition.module
|
||||
)
|
||||
|
||||
; Calls
|
||||
|
||||
(call method: (identifier) @name.reference.call) @reference.call
|
||||
|
||||
(
|
||||
[(identifier) (constant)] @name.reference.call @reference.call
|
||||
(#is-not? local)
|
||||
(#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$")
|
||||
)
|
60
aider/queries/tree-sitter-rust-tags.scm
Normal file
60
aider/queries/tree-sitter-rust-tags.scm
Normal file
|
@ -0,0 +1,60 @@
|
|||
; ADT definitions
|
||||
|
||||
(struct_item
|
||||
name: (type_identifier) @name.definition.class) @definition.class
|
||||
|
||||
(enum_item
|
||||
name: (type_identifier) @name.definition.class) @definition.class
|
||||
|
||||
(union_item
|
||||
name: (type_identifier) @name.definition.class) @definition.class
|
||||
|
||||
; type aliases
|
||||
|
||||
(type_item
|
||||
name: (type_identifier) @name.definition.class) @definition.class
|
||||
|
||||
; method definitions
|
||||
|
||||
(declaration_list
|
||||
(function_item
|
||||
name: (identifier) @name.definition.method)) @definition.method
|
||||
|
||||
; function definitions
|
||||
|
||||
(function_item
|
||||
name: (identifier) @name.definition.function) @definition.function
|
||||
|
||||
; trait definitions
|
||||
(trait_item
|
||||
name: (type_identifier) @name.definition.interface) @definition.interface
|
||||
|
||||
; module definitions
|
||||
(mod_item
|
||||
name: (identifier) @name.definition.module) @definition.module
|
||||
|
||||
; macro definitions
|
||||
|
||||
(macro_definition
|
||||
name: (identifier) @name.definition.macro) @definition.macro
|
||||
|
||||
; references
|
||||
|
||||
(call_expression
|
||||
function: (identifier) @name.reference.call) @reference.call
|
||||
|
||||
(call_expression
|
||||
function: (field_expression
|
||||
field: (field_identifier) @name.reference.call)) @reference.call
|
||||
|
||||
(macro_invocation
|
||||
macro: (identifier) @name.reference.call) @reference.call
|
||||
|
||||
; implementations
|
||||
|
||||
(impl_item
|
||||
trait: (type_identifier) @name.reference.implementation) @reference.implementation
|
||||
|
||||
(impl_item
|
||||
type: (type_identifier) @name.reference.implementation
|
||||
!trait) @reference.implementation
|
23
aider/queries/tree-sitter-typescript-tags.scm
Normal file
23
aider/queries/tree-sitter-typescript-tags.scm
Normal file
|
@ -0,0 +1,23 @@
|
|||
(function_signature
|
||||
name: (identifier) @name.definition.function) @definition.function
|
||||
|
||||
(method_signature
|
||||
name: (property_identifier) @name.definition.method) @definition.method
|
||||
|
||||
(abstract_method_signature
|
||||
name: (property_identifier) @name.definition.method) @definition.method
|
||||
|
||||
(abstract_class_declaration
|
||||
name: (type_identifier) @name.definition.class) @definition.class
|
||||
|
||||
(module
|
||||
name: (identifier) @name.definition.module) @definition.module
|
||||
|
||||
(interface_declaration
|
||||
name: (type_identifier) @name.definition.interface) @definition.interface
|
||||
|
||||
(type_annotation
|
||||
(type_identifier) @name.reference.type) @reference.type
|
||||
|
||||
(new_expression
|
||||
constructor: (identifier) @name.reference.class) @reference.class
|
410
aider/repomap.py
410
aider/repomap.py
|
@ -1,79 +1,31 @@
|
|||
import colorsys
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from collections import Counter, defaultdict
|
||||
from collections import Counter, defaultdict, namedtuple
|
||||
from pathlib import Path
|
||||
|
||||
import networkx as nx
|
||||
import pkg_resources
|
||||
from diskcache import Cache
|
||||
from grep_ast import TreeContext, filename_to_lang
|
||||
from pygments.lexers import guess_lexer_for_filename
|
||||
from pygments.token import Token
|
||||
from pygments.util import ClassNotFound
|
||||
from tqdm import tqdm
|
||||
from tree_sitter_languages import get_language, get_parser
|
||||
|
||||
from aider import models
|
||||
|
||||
from .dump import dump # noqa: F402
|
||||
|
||||
|
||||
def to_tree(tags):
|
||||
if not tags:
|
||||
return ""
|
||||
|
||||
tags = sorted(tags)
|
||||
|
||||
output = ""
|
||||
last = [None] * len(tags[0])
|
||||
tab = "\t"
|
||||
for tag in tags:
|
||||
tag = list(tag)
|
||||
|
||||
for i in range(len(last) + 1):
|
||||
if i == len(last):
|
||||
break
|
||||
if last[i] != tag[i]:
|
||||
break
|
||||
|
||||
num_common = i
|
||||
|
||||
indent = tab * num_common
|
||||
rest = tag[num_common:]
|
||||
for item in rest:
|
||||
output += indent + item + "\n"
|
||||
indent += tab
|
||||
last = tag
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def fname_to_components(fname, with_colon):
|
||||
path_components = fname.split(os.sep)
|
||||
res = [pc + os.sep for pc in path_components[:-1]]
|
||||
if with_colon:
|
||||
res.append(path_components[-1] + ":")
|
||||
else:
|
||||
res.append(path_components[-1])
|
||||
return res
|
||||
Tag = namedtuple("Tag", "rel_fname fname line name kind".split())
|
||||
|
||||
|
||||
class RepoMap:
|
||||
CACHE_VERSION = 1
|
||||
ctags_cmd = [
|
||||
"ctags",
|
||||
"--fields=+S",
|
||||
"--extras=-F",
|
||||
"--output-format=json",
|
||||
"--output-encoding=utf-8",
|
||||
]
|
||||
IDENT_CACHE_DIR = f".aider.ident.cache.v{CACHE_VERSION}"
|
||||
CACHE_VERSION = 3
|
||||
TAGS_CACHE_DIR = f".aider.tags.cache.v{CACHE_VERSION}"
|
||||
|
||||
ctags_disabled_reason = "ctags not initialized"
|
||||
|
||||
cache_missing = False
|
||||
|
||||
warned_files = set()
|
||||
|
@ -94,26 +46,27 @@ class RepoMap:
|
|||
root = os.getcwd()
|
||||
self.root = root
|
||||
|
||||
self.load_ident_cache()
|
||||
self.load_tags_cache()
|
||||
|
||||
self.max_map_tokens = map_tokens
|
||||
self.has_ctags = self.check_for_ctags()
|
||||
|
||||
if map_tokens > 0 and self.has_ctags:
|
||||
self.use_ctags = True
|
||||
else:
|
||||
self.use_ctags = False
|
||||
|
||||
self.tokenizer = main_model.tokenizer
|
||||
self.repo_content_prefix = repo_content_prefix
|
||||
|
||||
def get_repo_map(self, chat_files, other_files):
|
||||
res = self.choose_files_listing(chat_files, other_files)
|
||||
if not res:
|
||||
if self.max_map_tokens <= 0:
|
||||
return
|
||||
|
||||
files_listing, ctags_msg = res
|
||||
if not other_files:
|
||||
return
|
||||
|
||||
files_listing = self.get_ranked_tags_map(chat_files, other_files)
|
||||
if not files_listing:
|
||||
return
|
||||
|
||||
num_tokens = self.token_count(files_listing)
|
||||
if self.verbose:
|
||||
self.io.tool_output(f"Repo-map: {num_tokens/1024:.1f} k-tokens")
|
||||
|
||||
if chat_files:
|
||||
other = "other "
|
||||
|
@ -121,10 +74,7 @@ class RepoMap:
|
|||
other = ""
|
||||
|
||||
if self.repo_content_prefix:
|
||||
repo_content = self.repo_content_prefix.format(
|
||||
other=other,
|
||||
ctags_msg=ctags_msg,
|
||||
)
|
||||
repo_content = self.repo_content_prefix.format(other=other)
|
||||
else:
|
||||
repo_content = ""
|
||||
|
||||
|
@ -132,39 +82,6 @@ class RepoMap:
|
|||
|
||||
return repo_content
|
||||
|
||||
def choose_files_listing(self, chat_files, other_files):
|
||||
if self.max_map_tokens <= 0:
|
||||
return
|
||||
|
||||
if not other_files:
|
||||
return
|
||||
|
||||
if self.use_ctags:
|
||||
files_listing = self.get_ranked_tags_map(chat_files, other_files)
|
||||
if files_listing:
|
||||
num_tokens = self.token_count(files_listing)
|
||||
if self.verbose:
|
||||
self.io.tool_output(f"ctags map: {num_tokens/1024:.1f} k-tokens")
|
||||
ctags_msg = " with selected ctags info"
|
||||
return files_listing, ctags_msg
|
||||
|
||||
files_listing = self.get_simple_files_map(other_files)
|
||||
ctags_msg = ""
|
||||
num_tokens = self.token_count(files_listing)
|
||||
if self.verbose:
|
||||
self.io.tool_output(f"simple map: {num_tokens/1024:.1f} k-tokens")
|
||||
if num_tokens < self.max_map_tokens:
|
||||
return files_listing, ctags_msg
|
||||
|
||||
def get_simple_files_map(self, other_files):
|
||||
fnames = []
|
||||
for fname in other_files:
|
||||
fname = self.get_rel_fname(fname)
|
||||
fname = fname_to_components(fname, False)
|
||||
fnames.append(fname)
|
||||
|
||||
return to_tree(fnames)
|
||||
|
||||
def token_count(self, string):
|
||||
return len(self.tokenizer.encode(string))
|
||||
|
||||
|
@ -175,66 +92,6 @@ class RepoMap:
|
|||
path = os.path.relpath(path, self.root)
|
||||
return [path + ":"]
|
||||
|
||||
def run_ctags(self, filename):
|
||||
# Check if the file is in the cache and if the modification time has not changed
|
||||
file_mtime = self.get_mtime(filename)
|
||||
if file_mtime is None:
|
||||
return []
|
||||
|
||||
cache_key = filename
|
||||
if cache_key in self.TAGS_CACHE and self.TAGS_CACHE[cache_key]["mtime"] == file_mtime:
|
||||
return self.TAGS_CACHE[cache_key]["data"]
|
||||
|
||||
cmd = self.ctags_cmd + [
|
||||
f"--input-encoding={self.io.encoding}",
|
||||
filename,
|
||||
]
|
||||
output = subprocess.check_output(cmd, stderr=subprocess.PIPE).decode("utf-8")
|
||||
output_lines = output.splitlines()
|
||||
|
||||
data = []
|
||||
for line in output_lines:
|
||||
try:
|
||||
data.append(json.loads(line))
|
||||
except json.decoder.JSONDecodeError as err:
|
||||
self.io.tool_error(f"Error parsing ctags output: {err}")
|
||||
self.io.tool_error(repr(line))
|
||||
|
||||
# Update the cache
|
||||
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
|
||||
self.save_tags_cache()
|
||||
return data
|
||||
|
||||
def check_for_ctags(self):
|
||||
try:
|
||||
executable = self.ctags_cmd[0]
|
||||
cmd = [executable, "--version"]
|
||||
output = subprocess.check_output(cmd, stderr=subprocess.PIPE).decode("utf-8")
|
||||
output = output.lower()
|
||||
|
||||
cmd = " ".join(cmd)
|
||||
|
||||
if "universal ctags" not in output:
|
||||
self.ctags_disabled_reason = f"{cmd} does not claim to be universal ctags"
|
||||
return
|
||||
if "+json" not in output:
|
||||
self.ctags_disabled_reason = f"{cmd} does not list +json support"
|
||||
return
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
hello_py = os.path.join(tempdir, "hello.py")
|
||||
with open(hello_py, "w", encoding="utf-8") as f:
|
||||
f.write("def hello():\n print('Hello, world!')\n")
|
||||
self.run_ctags(hello_py)
|
||||
except FileNotFoundError:
|
||||
self.ctags_disabled_reason = f"{executable} executable not found"
|
||||
return
|
||||
except Exception as err:
|
||||
self.ctags_disabled_reason = f"error running universal-ctags: {err}"
|
||||
return
|
||||
|
||||
return True
|
||||
|
||||
def load_tags_cache(self):
|
||||
path = Path(self.root) / self.TAGS_CACHE_DIR
|
||||
if not path.exists():
|
||||
|
@ -244,52 +101,103 @@ class RepoMap:
|
|||
def save_tags_cache(self):
|
||||
pass
|
||||
|
||||
def load_ident_cache(self):
|
||||
path = Path(self.root) / self.IDENT_CACHE_DIR
|
||||
if not path.exists():
|
||||
self.cache_missing = True
|
||||
self.IDENT_CACHE = Cache(path)
|
||||
|
||||
def save_ident_cache(self):
|
||||
pass
|
||||
|
||||
def get_mtime(self, fname):
|
||||
try:
|
||||
return os.path.getmtime(fname)
|
||||
except FileNotFoundError:
|
||||
self.io.tool_error(f"File not found error: {fname}")
|
||||
|
||||
def get_name_identifiers(self, fname, uniq=True):
|
||||
def get_tags(self, fname, rel_fname):
|
||||
# Check if the file is in the cache and if the modification time has not changed
|
||||
file_mtime = self.get_mtime(fname)
|
||||
if file_mtime is None:
|
||||
return set()
|
||||
return []
|
||||
|
||||
cache_key = fname
|
||||
if cache_key in self.IDENT_CACHE and self.IDENT_CACHE[cache_key]["mtime"] == file_mtime:
|
||||
idents = self.IDENT_CACHE[cache_key]["data"]
|
||||
else:
|
||||
idents = self.get_name_identifiers_uncached(fname)
|
||||
self.IDENT_CACHE[cache_key] = {"mtime": file_mtime, "data": idents}
|
||||
self.save_ident_cache()
|
||||
if cache_key in self.TAGS_CACHE and self.TAGS_CACHE[cache_key]["mtime"] == file_mtime:
|
||||
return self.TAGS_CACHE[cache_key]["data"]
|
||||
|
||||
if uniq:
|
||||
idents = set(idents)
|
||||
return idents
|
||||
# miss!
|
||||
|
||||
def get_name_identifiers_uncached(self, fname):
|
||||
content = self.io.read_text(fname)
|
||||
if content is None:
|
||||
return list()
|
||||
data = list(self.get_tags_raw(fname, rel_fname))
|
||||
|
||||
# Update the cache
|
||||
self.TAGS_CACHE[cache_key] = {"mtime": file_mtime, "data": data}
|
||||
self.save_tags_cache()
|
||||
return data
|
||||
|
||||
def get_tags_raw(self, fname, rel_fname):
|
||||
lang = filename_to_lang(fname)
|
||||
if not lang:
|
||||
return
|
||||
|
||||
language = get_language(lang)
|
||||
parser = get_parser(lang)
|
||||
|
||||
# Load the tags queries
|
||||
scm_fname = pkg_resources.resource_filename(
|
||||
__name__, os.path.join("queries", f"tree-sitter-{lang}-tags.scm")
|
||||
)
|
||||
query_scm = Path(scm_fname)
|
||||
if not query_scm.exists():
|
||||
return
|
||||
query_scm = query_scm.read_text()
|
||||
|
||||
code = Path(fname).read_text(encoding=self.io.encoding)
|
||||
tree = parser.parse(bytes(code, "utf-8"))
|
||||
|
||||
# Run the tags queries
|
||||
query = language.query(query_scm)
|
||||
captures = query.captures(tree.root_node)
|
||||
|
||||
captures = list(captures)
|
||||
|
||||
saw = set()
|
||||
for node, tag in captures:
|
||||
if tag.startswith("name.definition."):
|
||||
kind = "def"
|
||||
elif tag.startswith("name.reference."):
|
||||
kind = "ref"
|
||||
else:
|
||||
continue
|
||||
|
||||
saw.add(kind)
|
||||
|
||||
result = Tag(
|
||||
rel_fname=rel_fname,
|
||||
fname=fname,
|
||||
name=node.text.decode("utf-8"),
|
||||
kind=kind,
|
||||
line=node.start_point[0],
|
||||
)
|
||||
|
||||
yield result
|
||||
|
||||
if "ref" in saw:
|
||||
return
|
||||
if "def" not in saw:
|
||||
return
|
||||
|
||||
# We saw defs, without any refs
|
||||
# Some tags files only provide defs (cpp, for example)
|
||||
# Use pygments to backfill refs
|
||||
|
||||
try:
|
||||
lexer = guess_lexer_for_filename(fname, content)
|
||||
lexer = guess_lexer_for_filename(fname, code)
|
||||
except ClassNotFound:
|
||||
return list()
|
||||
return
|
||||
|
||||
# lexer.get_tokens_unprocessed() returns (char position in file, token type, token string)
|
||||
tokens = list(lexer.get_tokens_unprocessed(content))
|
||||
res = [token[2] for token in tokens if token[1] in Token.Name]
|
||||
return res
|
||||
tokens = list(lexer.get_tokens(code))
|
||||
tokens = [token[1] for token in tokens if token[0] in Token.Name]
|
||||
|
||||
for token in tokens:
|
||||
yield Tag(
|
||||
rel_fname=rel_fname,
|
||||
fname=fname,
|
||||
name=token,
|
||||
kind="ref",
|
||||
line=-1,
|
||||
)
|
||||
|
||||
def get_ranked_tags(self, chat_fnames, other_fnames):
|
||||
defines = defaultdict(set)
|
||||
|
@ -327,34 +235,25 @@ class RepoMap:
|
|||
personalization[rel_fname] = 1.0
|
||||
chat_rel_fnames.add(rel_fname)
|
||||
|
||||
data = self.run_ctags(fname)
|
||||
tags = list(self.get_tags(fname, rel_fname))
|
||||
if tags is None:
|
||||
continue
|
||||
|
||||
for tag in data:
|
||||
ident = tag["name"]
|
||||
defines[ident].add(rel_fname)
|
||||
for tag in tags:
|
||||
if tag.kind == "def":
|
||||
defines[tag.name].add(rel_fname)
|
||||
key = (rel_fname, tag.name)
|
||||
definitions[key].add(tag)
|
||||
|
||||
scope = tag.get("scope")
|
||||
kind = tag.get("kind")
|
||||
name = tag.get("name")
|
||||
signature = tag.get("signature")
|
||||
if tag.kind == "ref":
|
||||
references[tag.name].append(rel_fname)
|
||||
|
||||
last = name
|
||||
if signature:
|
||||
last += " " + signature
|
||||
##
|
||||
# dump(defines)
|
||||
# dump(references)
|
||||
|
||||
res = [rel_fname]
|
||||
if scope:
|
||||
res.append(scope)
|
||||
res += [kind, last]
|
||||
|
||||
key = (rel_fname, ident)
|
||||
definitions[key].add(tuple(res))
|
||||
# definitions[key].add((rel_fname,))
|
||||
|
||||
idents = self.get_name_identifiers(fname, uniq=False)
|
||||
for ident in idents:
|
||||
# dump("ref", fname, ident)
|
||||
references[ident].append(rel_fname)
|
||||
if not references:
|
||||
references = dict((k, list(v)) for k, v in defines.items())
|
||||
|
||||
idents = set(defines.keys()).intersection(set(references.keys()))
|
||||
|
||||
|
@ -364,10 +263,13 @@ class RepoMap:
|
|||
definers = defines[ident]
|
||||
for referencer, num_refs in Counter(references[ident]).items():
|
||||
for definer in definers:
|
||||
if referencer == definer:
|
||||
continue
|
||||
# if referencer == definer:
|
||||
# continue
|
||||
G.add_edge(referencer, definer, weight=num_refs, ident=ident)
|
||||
|
||||
if not references:
|
||||
pass
|
||||
|
||||
if personalization:
|
||||
pers_args = dict(personalization=personalization, dangling=personalization)
|
||||
else:
|
||||
|
@ -391,6 +293,9 @@ class RepoMap:
|
|||
|
||||
ranked_tags = []
|
||||
ranked_definitions = sorted(ranked_definitions.items(), reverse=True, key=lambda x: x[1])
|
||||
|
||||
# dump(ranked_definitions)
|
||||
|
||||
for (fname, ident), rank in ranked_definitions:
|
||||
# print(f"{rank:.03f} {fname} {ident}")
|
||||
if fname in chat_rel_fnames:
|
||||
|
@ -428,9 +333,8 @@ class RepoMap:
|
|||
|
||||
while lower_bound <= upper_bound:
|
||||
middle = (lower_bound + upper_bound) // 2
|
||||
tree = to_tree(ranked_tags[:middle])
|
||||
tree = self.to_tree(ranked_tags[:middle])
|
||||
num_tokens = self.token_count(tree)
|
||||
# dump(middle, num_tokens)
|
||||
|
||||
if num_tokens < self.max_map_tokens:
|
||||
best_tree = tree
|
||||
|
@ -440,17 +344,63 @@ class RepoMap:
|
|||
|
||||
return best_tree
|
||||
|
||||
def to_tree(self, tags):
|
||||
if not tags:
|
||||
return ""
|
||||
|
||||
def find_py_files(directory):
|
||||
tags = sorted(tags)
|
||||
|
||||
cur_fname = None
|
||||
context = None
|
||||
output = ""
|
||||
|
||||
# add a bogus tag at the end so we trip the this_fname != cur_fname...
|
||||
dummy_tag = (None,)
|
||||
for tag in tags + [dummy_tag]:
|
||||
this_fname = tag[0]
|
||||
|
||||
# ... here ... to output the final real entry in the list
|
||||
if this_fname != cur_fname:
|
||||
if context:
|
||||
context.add_context()
|
||||
output += "\n"
|
||||
output += cur_fname + ":\n"
|
||||
output += context.format()
|
||||
context = None
|
||||
elif cur_fname:
|
||||
output += "\n" + cur_fname + "\n"
|
||||
|
||||
if type(tag) is Tag:
|
||||
context = TreeContext(
|
||||
tag.rel_fname,
|
||||
Path(tag.fname).read_text(self.io.encoding),
|
||||
color=False,
|
||||
line_number=False,
|
||||
child_context=False,
|
||||
last_line=False,
|
||||
margin=0,
|
||||
mark_lois=False,
|
||||
loi_pad=0,
|
||||
header_max=3,
|
||||
show_top_of_file_parent_scope=False,
|
||||
)
|
||||
cur_fname = this_fname
|
||||
|
||||
if context:
|
||||
context.add_lines_of_interest([tag.line])
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def find_src_files(directory):
|
||||
if not os.path.isdir(directory):
|
||||
return [directory]
|
||||
|
||||
py_files = []
|
||||
src_files = []
|
||||
for root, dirs, files in os.walk(directory):
|
||||
for file in files:
|
||||
if file.endswith(".py"):
|
||||
py_files.append(os.path.join(root, file))
|
||||
return py_files
|
||||
src_files.append(os.path.join(root, file))
|
||||
return src_files
|
||||
|
||||
|
||||
def get_random_color():
|
||||
|
@ -465,15 +415,13 @@ if __name__ == "__main__":
|
|||
|
||||
chat_fnames = []
|
||||
other_fnames = []
|
||||
for dname in sys.argv[1:]:
|
||||
if ".venv" in dname:
|
||||
other_fnames += find_py_files(dname)
|
||||
for fname in sys.argv[1:]:
|
||||
if Path(fname).is_dir():
|
||||
chat_fnames += find_src_files(fname)
|
||||
else:
|
||||
chat_fnames += find_py_files(dname)
|
||||
chat_fnames.append(fname)
|
||||
|
||||
root = os.path.commonpath(chat_fnames)
|
||||
|
||||
rm = RepoMap(root=root)
|
||||
rm = RepoMap(root=".")
|
||||
repo_map = rm.get_ranked_tags_map(chat_fnames, other_fnames)
|
||||
|
||||
dump(len(repo_map))
|
||||
|
|
BIN
assets/robot-ast.png
Normal file
BIN
assets/robot-ast.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 671 KiB |
|
@ -3,6 +3,14 @@
|
|||
|
||||

|
||||
|
||||
## Updated
|
||||
|
||||
Aider no longer uses ctags to build a repo map.
|
||||
Please see the newer article about
|
||||
[using tree-sitter to build a better repo map](https://aider.chat/docs/repomap.html).
|
||||
|
||||
-------
|
||||
|
||||
GPT-4 is extremely useful for "self-contained" coding tasks,
|
||||
like generating brand new code or modifying a pure function
|
||||
that has no dependencies.
|
||||
|
|
|
@ -20,11 +20,11 @@ Aider is tightly integrated with git, which makes it easy to:
|
|||
- Manage a series of GPT's changes on a git branch
|
||||
|
||||
Aider specifically uses git in these ways:
|
||||
|
||||
|
||||
- It asks to create a git repo if you launch it in a directory without one.
|
||||
- Whenever GPT edits a file, aider commits those changes with a descriptive commit message. This makes it easy to undo or review GPT's changes.
|
||||
- Aider takes special care if GPT tries to edit files that already have uncommitted changes (dirty files). Aider will first commit any preexisting changes with a descriptive commit message. This keeps your edits separate from GPT's edits, and makes sure you never lose your work if GPT makes an inappropriate change.
|
||||
|
||||
|
||||
Aider also allows you to use in-chat commands to `/diff` or `/undo` the last change made by GPT.
|
||||
To do more complex management of your git history, you cat use raw `git` commands,
|
||||
either by using `/git` within the chat, or with standard git tools outside of aider.
|
||||
|
@ -58,7 +58,7 @@ They have large context windows, better coding skills and
|
|||
they generally obey the instructions in the system prompt.
|
||||
GPT-4 is able to structure code edits as simple "diffs"
|
||||
and use a
|
||||
[repository map](https://aider.chat/docs/ctags.html)
|
||||
[repository map](https://aider.chat/docs/repomap.html)
|
||||
to improve its ability to make changes in larger codebases.
|
||||
|
||||
GPT-3.5 is supported more experimentally
|
||||
|
|
242
docs/repomap.md
Normal file
242
docs/repomap.md
Normal file
|
@ -0,0 +1,242 @@
|
|||
|
||||
# Building a better repository map with tree sitter
|
||||
|
||||

|
||||
|
||||
GPT-4 is extremely useful for "self-contained" coding tasks,
|
||||
like generating or modifying a simple function
|
||||
that has no dependencies. Tools like GitHub CoPilot serve
|
||||
these simple coding tasks well.
|
||||
|
||||
But it's much more difficult for humans or AIs to make
|
||||
complex changes in a larger, pre-existing codebase.
|
||||
To do this successfully, you need to:
|
||||
|
||||
1. Find the code that needs to be changed.
|
||||
2. Understand how that code relates to the rest of the codebase.
|
||||
3. Make the correct code change to accomplish the task.
|
||||
|
||||
GPT-4 is actually great at making the code changes (3),
|
||||
once you tell it which files need to be changed (1)
|
||||
and show it how they fit into the rest of the codebase (2).
|
||||
|
||||
This article is going to focus on the problem of "code context" (2), where we need to:
|
||||
|
||||
- Help GPT understand the overall codebase, so that it
|
||||
can decifer the meaning of code with complex dependencies and generate
|
||||
new code that respects and utilizes existing abstractions.
|
||||
- Convey all of this "code context" to GPT in an
|
||||
efficient manner that fits within GPT's context window.
|
||||
|
||||
To address these issues, aider
|
||||
sends GPT a **concise map of your whole git repository**
|
||||
that includes
|
||||
the most important classes and functions along with their types and call signatures.
|
||||
|
||||
This **repository map** is now built automatically by using
|
||||
[tree-sitter](https://tree-sitter.github.io/tree-sitter/)
|
||||
to extract symbol definitions from source files.
|
||||
Tree-sitter is used by many IDEs, editors and LSP servers to
|
||||
help humans search and navigate large codebases.
|
||||
Aider now uses it to help GPT better comprehend, navigate
|
||||
and edit code in larger repos.
|
||||
|
||||
*To code with GPT-4 using the techniques discussed here, just install [aider](https://aider.chat/docs/install.html).*
|
||||
|
||||
## The problem: code context
|
||||
|
||||
GPT-4 is great at "self contained" coding tasks, like writing or
|
||||
modifying a pure function with no external dependencies.
|
||||
GPT can easily handle requests like "write a
|
||||
Fibonacci function" or "rewrite this loop using list
|
||||
comprehensions", because they require no context beyond the code
|
||||
being discussed.
|
||||
|
||||
Most real code is not pure and self-contained, it is intertwined with
|
||||
and depends on code from many different files in a repo.
|
||||
If you ask GPT to "switch all the print statements in class Foo to
|
||||
use the BarLog logging system", it needs to see and
|
||||
modify the code in the Foo class, but it also needs to understand
|
||||
how to use
|
||||
the project's BarLog
|
||||
subsystem.
|
||||
|
||||
A simple solution is to **send the entire codebase** to GPT along with
|
||||
each change request. Now GPT has all the context! But this won't work
|
||||
for even moderately
|
||||
sized repos, because they won't fit into the context window.
|
||||
|
||||
A better approach is to be selective,
|
||||
and **hand pick which files to send**.
|
||||
For the example above, you could send the file that
|
||||
contains the Foo class
|
||||
and the file that contains the BarLog logging subsystem.
|
||||
This works pretty well, and is supported by aider -- you
|
||||
can manually specify which files to "add to the chat" you are having with GPT.
|
||||
|
||||
But sending whole files is a bulky way to send code context,
|
||||
wasting the precious context window.
|
||||
GPT doesn't need to see the entire implementation of BarLog,
|
||||
it just needs to understand it well enough to use it.
|
||||
You may quickly run out of context window by sending
|
||||
full files of code
|
||||
just to convey context.
|
||||
|
||||
Aider also strives to reduce the manual work involved in
|
||||
coding with AI, so it would be better if we could automatically
|
||||
provide the needed code context.
|
||||
|
||||
## Using a repo map to provide context
|
||||
|
||||
Aider sends a **repo map** to GPT along with
|
||||
each request from the user to make a code change.
|
||||
The map contains a list of the files in the
|
||||
repo, along with the key symbols which are defined in each file.
|
||||
It shows how each of these symbols are defined in the
|
||||
source code, by including the critical lines of code for each definition.
|
||||
|
||||
Here's a
|
||||
sample of the map of the aider repo, just showing the maps of
|
||||
[io.py](https://github.com/paul-gauthier/aider/blob/main/aider/io.py)
|
||||
and
|
||||
[main.py](https://github.com/paul-gauthier/aider/blob/main/aider/main.py)
|
||||
:
|
||||
|
||||
```
|
||||
aider/io.py:
|
||||
⋮...
|
||||
│class InputOutput:
|
||||
⋮...
|
||||
│ def read_text(self, filename):
|
||||
⋮...
|
||||
│ def write_text(self, filename, content):
|
||||
⋮...
|
||||
│ def confirm_ask(self, question, default="y"):
|
||||
⋮...
|
||||
│ def tool_error(self, message):
|
||||
⋮...
|
||||
│ def tool_output(self, *messages, log_only=False):
|
||||
⋮...
|
||||
|
||||
aider/main.py:
|
||||
⋮...
|
||||
│def main(argv=None, input=None, output=None, force_git_root=None):
|
||||
⋮...
|
||||
```
|
||||
|
||||
Mapping out the repo like this provides some key benefits:
|
||||
|
||||
- GPT can see classes, methods and function signatures from everywhere in the repo. This alone may give it enough context to solve many tasks. For example, it can probably figure out how to use the API exported from a module just based on the details shown in the map.
|
||||
- If it needs to see more code, GPT can use the map to figure out by itself which files it needs to look at in more detail. GPT will then ask to see these specific files, and aider will automatically add them to the chat context.
|
||||
|
||||
Of course, for large repositories even just the repo map might be too large
|
||||
for GPT's context window.
|
||||
Aider solves this problem by sending just the **most relevant**
|
||||
portions of the repo map.
|
||||
It does this by analyzing the full repo map using
|
||||
a graph ranking algorithm, computed on a graph
|
||||
where each source file is a node and edges connect
|
||||
files which have dependencies.
|
||||
Aider optimizes the repo map by
|
||||
selecting the most important parts of the codebase
|
||||
which will
|
||||
fit into the token budget assigned by the user
|
||||
(via the `--map-tokens` switch, which defaults to 1k tokens).
|
||||
|
||||
The sample map shown above doesn't contain *every* class, method and function from those
|
||||
files.
|
||||
It only includes the most important identifiers,
|
||||
the ones which are most often referenced by other portions of the code.
|
||||
These are the key piece of context that GPT needs to know to understand
|
||||
the overall codebase.
|
||||
|
||||
|
||||
## Using tree-sitter to make the map
|
||||
|
||||
Under the hood, aider uses
|
||||
[tree sitter](https://tree-sitter.github.io/tree-sitter/)
|
||||
to build the
|
||||
map.
|
||||
It specifically uses the
|
||||
[py-tree-sitter-languages](https://github.com/grantjenks/py-tree-sitter-languages)
|
||||
python module,
|
||||
which provides simple, pip-installable binary wheels for
|
||||
[most popular programming languages](https://github.com/paul-gauthier/grep-ast/blob/main/grep_ast/parsers.py).
|
||||
|
||||
Tree-sitter parses source code into an Abstract Syntax Tree (AST) based
|
||||
on the syntax of the programming language.
|
||||
Using the AST, we can identify where functions, classes, variables, types and
|
||||
other definitions occur in the source code.
|
||||
We can also identify where else in the code these things are used or referenced.
|
||||
|
||||
Aider uses all of these definitions and references to
|
||||
determine which are the most important identifiers in the repository,
|
||||
and to produce the repo map that shows just those key
|
||||
lines from the codebase.
|
||||
|
||||
## What about ctags?
|
||||
|
||||
The tree-sitter repository map replaces the
|
||||
[ctags based map](https://aider.chat/docs/ctags.html)
|
||||
that aider originally used.
|
||||
Switching from ctags to tree-sitter provides a bunch of benefits:
|
||||
|
||||
- The map is richer, showing full function call signatures and other details straight from the source files.
|
||||
- Thanks to `py-tree-sitter-languages`, we get full support for many programming languages via a python package that's automatically installed as part of the normal `pip install aider-chat`.
|
||||
- We remove the requirement for users to manually install `universal-ctags` via some extenal tool or package manager (brew, apt, choco, etc).
|
||||
- Tree-sitter integration is a key enabler for future work and capabilities for aider.
|
||||
|
||||
## Future work
|
||||
|
||||
You'll recall that we identified the 3 key steps
|
||||
required to use GPT
|
||||
to code within a large, pre-existing codebase:
|
||||
|
||||
1. Find the code that needs to be changed.
|
||||
2. Understand how that code relates to the rest of the codebase.
|
||||
3. Make the correct code change to accomplish the task.
|
||||
|
||||
We're now using tree-sitter to help solve the code context problem (2),
|
||||
but it's also an important foundation
|
||||
for future work on automatically finding all the code which
|
||||
will need to be changed (1).
|
||||
|
||||
Right now, aider relies on the user to specify which source files
|
||||
will need to be modified to complete their request.
|
||||
Users manually "add files to the chat" using aider's `/add` command,
|
||||
which makes those files available for GPT to modify.
|
||||
|
||||
This works well, but a key piece of future work is to harness the
|
||||
power of GPT and tree-sitter to automatically identify
|
||||
which parts of the code will need changes.
|
||||
|
||||
## Try it out
|
||||
|
||||
To code with GPT-4 using the techniques discussed here,
|
||||
just install [aider](https://aider.chat/docs/install.html).
|
||||
|
||||
|
||||
## Credits
|
||||
|
||||
Aider uses
|
||||
[modified versions of the tags.scm files](https://github.com/paul-gauthier/aider/tree/main/aider/queries)
|
||||
from these
|
||||
open source tree-sitter language implementations:
|
||||
|
||||
* https://github.com/tree-sitter/tree-sitter-c — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-c-sharp — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-cpp — licensed under the MIT License.
|
||||
* https://github.com/Wilfred/tree-sitter-elisp — licensed under the MIT License.
|
||||
* https://github.com/elixir-lang/tree-sitter-elixir — licensed under the Apache License, Version 2.0.
|
||||
* https://github.com/elm-tooling/tree-sitter-elm — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-go — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-java — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-javascript — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-ocaml — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-php — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-python — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-ql — licensed under the MIT License.
|
||||
* https://github.com/r-lib/tree-sitter-r — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-ruby — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-rust — licensed under the MIT License.
|
||||
* https://github.com/tree-sitter/tree-sitter-typescript — licensed under the MIT License.
|
|
@ -34,3 +34,4 @@ jsonschema==4.17.3
|
|||
sounddevice==0.4.6
|
||||
soundfile==0.12.1
|
||||
pathspec==0.11.2
|
||||
grep-ast==0.1.1
|
||||
|
|
3
setup.py
3
setup.py
|
@ -17,6 +17,9 @@ setup(
|
|||
version=__version__,
|
||||
packages=find_packages(),
|
||||
include_package_data=True,
|
||||
package_data={
|
||||
"aider": ["queries/*"],
|
||||
},
|
||||
install_requires=requirements,
|
||||
python_requires=">=3.9",
|
||||
entry_points={
|
||||
|
|
|
@ -10,7 +10,7 @@ from aider import models
|
|||
from aider.coders import Coder
|
||||
from aider.dump import dump # noqa: F401
|
||||
from aider.io import InputOutput
|
||||
from tests.utils import GitTemporaryDirectory
|
||||
from tests.utils import ChdirTemporaryDirectory, GitTemporaryDirectory
|
||||
|
||||
|
||||
class TestCoder(unittest.TestCase):
|
||||
|
@ -354,20 +354,21 @@ class TestCoder(unittest.TestCase):
|
|||
|
||||
@patch("aider.coders.base_coder.openai.ChatCompletion.create")
|
||||
def test_run_with_invalid_request_error(self, mock_chat_completion_create):
|
||||
# Mock the IO object
|
||||
mock_io = MagicMock()
|
||||
with ChdirTemporaryDirectory():
|
||||
# Mock the IO object
|
||||
mock_io = MagicMock()
|
||||
|
||||
# Initialize the Coder object with the mocked IO and mocked repo
|
||||
coder = Coder.create(models.GPT4, None, mock_io)
|
||||
# Initialize the Coder object with the mocked IO and mocked repo
|
||||
coder = Coder.create(models.GPT4, None, mock_io)
|
||||
|
||||
# Set up the mock to raise InvalidRequestError
|
||||
mock_chat_completion_create.side_effect = openai.error.InvalidRequestError(
|
||||
"Invalid request", "param"
|
||||
)
|
||||
# Set up the mock to raise InvalidRequestError
|
||||
mock_chat_completion_create.side_effect = openai.error.InvalidRequestError(
|
||||
"Invalid request", "param"
|
||||
)
|
||||
|
||||
# Call the run method and assert that InvalidRequestError is raised
|
||||
with self.assertRaises(openai.error.InvalidRequestError):
|
||||
coder.run(with_message="hi")
|
||||
# Call the run method and assert that InvalidRequestError is raised
|
||||
with self.assertRaises(openai.error.InvalidRequestError):
|
||||
coder.run(with_message="hi")
|
||||
|
||||
def test_new_file_edit_one_commit(self):
|
||||
"""A new file shouldn't get pre-committed before the GPT edit commit"""
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
import os
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
from aider.dump import dump # noqa: F401
|
||||
from aider.io import InputOutput
|
||||
from aider.repomap import RepoMap
|
||||
from tests.utils import IgnorantTemporaryDirectory
|
||||
|
@ -89,33 +89,9 @@ print(my_function(3, 4))
|
|||
# close the open cache files, so Windows won't error
|
||||
del repo_map
|
||||
|
||||
def test_check_for_ctags_failure(self):
|
||||
with patch("subprocess.run") as mock_run:
|
||||
mock_run.side_effect = Exception("ctags not found")
|
||||
repo_map = RepoMap(io=InputOutput())
|
||||
self.assertFalse(repo_map.has_ctags)
|
||||
|
||||
def test_check_for_ctags_success(self):
|
||||
with patch("subprocess.check_output") as mock_run:
|
||||
mock_run.side_effect = [
|
||||
(
|
||||
b"Universal Ctags 0.0.0(f25b4bb7)\n Optional compiled features: +wildcards,"
|
||||
b" +regex, +gnulib_fnmatch, +gnulib_regex, +iconv, +option-directory, +xpath,"
|
||||
b" +json, +interactive, +yaml, +case-insensitive-filenames, +packcc,"
|
||||
b" +optscript, +pcre2"
|
||||
),
|
||||
(
|
||||
b'{"_type": "tag", "name": "status", "path": "aider/main.py", "pattern": "/^ '
|
||||
b' status = main()$/", "kind": "variable"}'
|
||||
),
|
||||
]
|
||||
repo_map = RepoMap(io=InputOutput())
|
||||
self.assertTrue(repo_map.has_ctags)
|
||||
|
||||
def test_get_repo_map_without_ctags(self):
|
||||
# Create a temporary directory with a sample Python file containing identifiers
|
||||
def test_get_repo_map_all_files(self):
|
||||
test_files = [
|
||||
"test_file_without_ctags.py",
|
||||
"test_file0.py",
|
||||
"test_file1.txt",
|
||||
"test_file2.md",
|
||||
"test_file3.json",
|
||||
|
@ -130,10 +106,11 @@ print(my_function(3, 4))
|
|||
f.write("")
|
||||
|
||||
repo_map = RepoMap(root=temp_dir, io=InputOutput())
|
||||
repo_map.has_ctags = False # force it off
|
||||
|
||||
other_files = [os.path.join(temp_dir, file) for file in test_files]
|
||||
result = repo_map.get_repo_map([], other_files)
|
||||
dump(other_files)
|
||||
dump(repr(result))
|
||||
|
||||
# Check if the result contains each specific file in the expected tags map without ctags
|
||||
for file in test_files:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue