From 92f4100c92a3e7d95e9f7ecb68cd6747bfb91771 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Fri, 8 Dec 2023 12:11:57 -0800 Subject: [PATCH 01/39] be careful about too much convo history, avoid swamping the model with too much context --- aider/models/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aider/models/openai.py b/aider/models/openai.py index 1c6286d63..cb396fabf 100644 --- a/aider/models/openai.py +++ b/aider/models/openai.py @@ -44,11 +44,11 @@ class OpenAIModel(Model): elif tokens == 32: self.prompt_price = 0.06 self.completion_price = 0.12 - self.max_chat_history_tokens = 3 * 1024 + self.max_chat_history_tokens = 2 * 1024 elif tokens == 128: self.prompt_price = 0.01 self.completion_price = 0.03 - self.max_chat_history_tokens = 4 * 1024 + self.max_chat_history_tokens = 2 * 1024 return @@ -60,7 +60,7 @@ class OpenAIModel(Model): if self.name == "gpt-3.5-turbo-1106": self.prompt_price = 0.001 self.completion_price = 0.002 - self.max_chat_history_tokens = 3 * 1024 + self.max_chat_history_tokens = 2 * 1024 elif tokens == 4: self.prompt_price = 0.0015 self.completion_price = 0.002 From ab24bce3b46c6c23bc30465bc07a07d0b7b98a54 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 14 Dec 2023 12:20:48 -0800 Subject: [PATCH 02/39] Updated HISTORY --- HISTORY.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/HISTORY.md b/HISTORY.md index 8bd53973c..618abee1a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # Release history +### v0.18.1 + +- Upgraded to new openai python client v1.3.7. + ### v0.18.0 - Improved prompting for both GPT-4 and GPT-4 Turbo. From 52fdb17e96881b796a5fb50a3310d77a67644069 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 14 Dec 2023 12:21:20 -0800 Subject: [PATCH 03/39] version bump to 0.18.1 --- aider/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/__init__.py b/aider/__init__.py index c3bb1bc38..e9fa21e95 100644 --- a/aider/__init__.py +++ b/aider/__init__.py @@ -1 +1 @@ -__version__ = "0.18.1-dev" +__version__ = "0.18.1" From 3aa17c46ddf1a865b16dba1c913e57fef9c13c12 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Thu, 14 Dec 2023 12:21:50 -0800 Subject: [PATCH 04/39] set version to 0.18.2-dev --- aider/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aider/__init__.py b/aider/__init__.py index e9fa21e95..eda726119 100644 --- a/aider/__init__.py +++ b/aider/__init__.py @@ -1 +1 @@ -__version__ = "0.18.1" +__version__ = "0.18.2-dev" From 7113a30271266f991551606948076c14f5235b09 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 12:54:34 -0800 Subject: [PATCH 05/39] unified diffs --- aider/coders/__init__.py | 2 + aider/coders/base_coder.py | 28 +++++-- aider/main.py | 15 +++- aider/models/__init__.py | 2 + aider/models/openai.py | 6 +- aider/utils.py | 64 +++++++++++++++- benchmark/benchmark.py | 146 ++++++++++++++++++++++++++++++++----- benchmark/prompts.py | 4 +- dev-requirements.txt | 1 - requirements.in | 1 + requirements.txt | 2 + tests/test_coder.py | 2 +- tests/test_commands.py | 2 +- tests/test_io.py | 2 +- tests/test_main.py | 2 +- tests/test_repo.py | 2 +- tests/test_repomap.py | 2 +- tests/utils.py | 56 -------------- 18 files changed, 243 insertions(+), 96 deletions(-) delete mode 100644 tests/utils.py diff --git a/aider/coders/__init__.py b/aider/coders/__init__.py index 58f5c3b6c..bf9163328 100644 --- a/aider/coders/__init__.py +++ b/aider/coders/__init__.py @@ -2,6 +2,7 @@ from .base_coder import Coder from .editblock_coder import EditBlockCoder from .editblock_func_coder import EditBlockFunctionCoder from .single_wholefile_func_coder import SingleWholeFileFunctionCoder +from .udiff_coder import UnifiedDiffCoder from .wholefile_coder import WholeFileCoder from .wholefile_func_coder import WholeFileFunctionCoder @@ -12,4 +13,5 @@ __all__ = [ WholeFileFunctionCoder, EditBlockFunctionCoder, SingleWholeFileFunctionCoder, + UnifiedDiffCoder, ] diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 7b86e00ab..1236a1a73 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -49,7 +49,9 @@ class Coder: functions = None total_cost = 0.0 num_exhausted_context_windows = 0 + num_malformed_responses = 0 last_keyboard_interrupt = None + max_apply_update_errors = 3 @classmethod def create( @@ -61,7 +63,7 @@ class Coder: skip_model_availabily_check=False, **kwargs, ): - from . import EditBlockCoder, WholeFileCoder + from . import EditBlockCoder, UnifiedDiffCoder, WholeFileCoder if not main_model: main_model = models.GPT4 @@ -83,6 +85,8 @@ class Coder: return EditBlockCoder(client, main_model, io, **kwargs) elif edit_format == "whole": return WholeFileCoder(client, main_model, io, **kwargs) + elif edit_format == "udiff": + return UnifiedDiffCoder(client, main_model, io, **kwargs) else: raise ValueError(f"Unknown edit format {edit_format}") @@ -296,7 +300,13 @@ class Coder: prompt += "\n" prompt += relative_fname prompt += f"\n{self.fence[0]}\n" + prompt += content + + # lines = content.splitlines(keepends=True) + # lines = [f"{i+1:03}:{line}" for i, line in enumerate(lines)] + # prompt += "".join(lines) + prompt += f"{self.fence[1]}\n" return prompt @@ -346,7 +356,7 @@ class Coder: new_user_message = self.send_new_user_message(new_user_message) if with_message: - return + return self.partial_response_content except KeyboardInterrupt: self.keyboard_interrupt() @@ -456,12 +466,12 @@ class Coder: # add the reminder anyway total_tokens = 0 + messages += self.cur_messages + # Add the reminder prompt if we still have room to include it. if total_tokens < self.main_model.max_context_tokens: messages += reminder_message - messages += self.cur_messages - return messages def send_new_user_message(self, inp): @@ -850,19 +860,19 @@ class Coder: return set(edit[0] for edit in edits) def apply_updates(self): - max_apply_update_errors = 3 - try: edited = self.update_files() except ValueError as err: + self.num_malformed_responses += 1 err = err.args[0] self.apply_update_errors += 1 - if self.apply_update_errors < max_apply_update_errors: + if self.apply_update_errors < self.max_apply_update_errors: self.io.tool_error(f"Malformed response #{self.apply_update_errors}, retrying...") self.io.tool_error(str(err)) return None, err else: self.io.tool_error(f"Malformed response #{self.apply_update_errors}, aborting.") + self.io.tool_error(str(err)) return False, None except Exception as err: @@ -870,11 +880,13 @@ class Coder: print() traceback.print_exc() self.apply_update_errors += 1 - if self.apply_update_errors < max_apply_update_errors: + if self.apply_update_errors < self.max_apply_update_errors: self.io.tool_error(f"Update exception #{self.apply_update_errors}, retrying...") + self.io.tool_error(str(err)) return None, str(err) else: self.io.tool_error(f"Update exception #{self.apply_update_errors}, aborting") + self.io.tool_error(str(err)) return False, None self.apply_update_errors = 0 diff --git a/aider/main.py b/aider/main.py index ba311f28b..db0d986e2 100644 --- a/aider/main.py +++ b/aider/main.py @@ -148,7 +148,7 @@ def main(argv=None, input=None, output=None, force_git_root=None): core_group.add_argument( "--model", metavar="MODEL", - default=models.GPT4.name, + default=models.GPT4_0613.name, help=f"Specify the model to use for the main chat (default: {models.GPT4.name})", ) core_group.add_argument( @@ -157,6 +157,14 @@ def main(argv=None, input=None, output=None, force_git_root=None): default=False, help="Override to skip model availability check (default: False)", ) + default_4_turbo_model = models.GPT4_1106_PREVIEW + core_group.add_argument( + "--4-turbo", + action="store_const", + dest="model", + const=default_4_turbo_model.name, + help=f"Use {default_4_turbo_model.name} model for the main chat (gpt-4 is better)", + ) default_3_model = models.GPT35_1106 core_group.add_argument( "-3", @@ -380,7 +388,10 @@ def main(argv=None, input=None, output=None, force_git_root=None): "--message-file", "-f", metavar="MESSAGE_FILE", - help="Specify a file containing the message to send GPT, process reply, then exit (disables chat mode)", + help=( + "Specify a file containing the message to send GPT, process reply, then exit (disables" + " chat mode)" + ), ) other_group.add_argument( "--encoding", diff --git a/aider/models/__init__.py b/aider/models/__init__.py index d16015830..76f1c3e35 100644 --- a/aider/models/__init__.py +++ b/aider/models/__init__.py @@ -3,6 +3,8 @@ from .openai import OpenAIModel from .openrouter import OpenRouterModel GPT4 = Model.create("gpt-4") +GPT4_0613 = Model.create("gpt-4-0613") +GPT4_1106_PREVIEW = Model.create("gpt-4-1106-preview") GPT35 = Model.create("gpt-3.5-turbo") GPT35_1106 = Model.create("gpt-3.5-turbo-1106") GPT35_16k = Model.create("gpt-3.5-turbo-16k") diff --git a/aider/models/openai.py b/aider/models/openai.py index cb396fabf..d0722785f 100644 --- a/aider/models/openai.py +++ b/aider/models/openai.py @@ -33,7 +33,11 @@ class OpenAIModel(Model): self.tokenizer = tiktoken.encoding_for_model(name) if self.is_gpt4(): - self.edit_format = "diff" + if name == "gpt-4-1106-preview": + self.edit_format = "udiff" + else: + self.edit_format = "diff" + self.use_repo_map = True self.send_undo_reply = True diff --git a/aider/utils.py b/aider/utils.py index 5147314cc..a0c6cc2db 100644 --- a/aider/utils.py +++ b/aider/utils.py @@ -1,6 +1,68 @@ +import os +import tempfile from pathlib import Path -from .dump import dump # noqa: F401 +import git + +from aider.dump import dump # noqa: F401 + + +class IgnorantTemporaryDirectory: + def __init__(self): + self.temp_dir = tempfile.TemporaryDirectory() + + def __enter__(self): + return self.temp_dir.__enter__() + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + self.temp_dir.__exit__(exc_type, exc_val, exc_tb) + except (OSError, PermissionError): + pass # Ignore errors (Windows) + + +class ChdirTemporaryDirectory(IgnorantTemporaryDirectory): + def __init__(self): + try: + self.cwd = os.getcwd() + except FileNotFoundError: + self.cwd = None + + super().__init__() + + def __enter__(self): + res = super().__enter__() + os.chdir(self.temp_dir.name) + return res + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.cwd: + try: + os.chdir(self.cwd) + except FileNotFoundError: + pass + super().__exit__(exc_type, exc_val, exc_tb) + + +class GitTemporaryDirectory(ChdirTemporaryDirectory): + def __enter__(self): + dname = super().__enter__() + self.repo = make_repo(dname) + return dname + + def __exit__(self, exc_type, exc_val, exc_tb): + del self.repo + super().__exit__(exc_type, exc_val, exc_tb) + + +def make_repo(path=None): + if not path: + path = "." + repo = git.Repo.init(path) + repo.config_writer().set_value("user", "name", "Test User").release() + repo.config_writer().set_value("user", "email", "testuser@example.com").release() + + return repo def safe_abs_path(res): diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 6e8e8c664..9f934dc93 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -32,7 +32,7 @@ from aider.io import InputOutput BENCHMARK_DNAME = Path(os.environ.get("AIDER_BENCHMARK_DIR", "tmp.benchmarks")) -ORIGINAL_DNAME = BENCHMARK_DNAME / "exercism-python" +EXERCISES_DIR_DEFAULT = "exercism-python" app = typer.Typer(add_completion=False, pretty_exceptions_enable=False) @@ -83,9 +83,9 @@ def show_stats(dirnames, graphs): if row.completed_tests < 133: print(f"Warning: {row.dir_name} is incomplete: {row.completed_tests}") - if "repeat" in row.dir_name: - repeats.append(vars(row)) - continue + # if "repeat" in row.dir_name: + # repeats.append(vars(row)) + # continue kind = (row.model, row.edit_format) if kind in seen: @@ -97,6 +97,7 @@ def show_stats(dirnames, graphs): rows.append(vars(row)) if repeats: + dump(repeats) extra = rows[repeat_row] dump(extra) repeats.append(extra) @@ -313,6 +314,16 @@ def main( graphs: bool = typer.Option(False, "--graphs", help="Generate graphs"), model: str = typer.Option("gpt-3.5-turbo", "--model", "-m", help="Model name"), edit_format: str = typer.Option(None, "--edit-format", "-e", help="Edit format"), + replay: str = typer.Option( + None, + "--replay", + help="Replay previous .aider.chat.history.md responses from previous benchmark run", + ), + max_apply_update_errors: int = typer.Option( + 3, + "--max-apply-update-errors", + help="Maximum number of apply update errors before stopping the test", + ), keywords: str = typer.Option( None, "--keywords", "-k", help="Only run tests that contain keywords (comma sep)" ), @@ -331,6 +342,9 @@ def main( tries: int = typer.Option(2, "--tries", "-r", help="Number of tries for running tests"), threads: int = typer.Option(1, "--threads", "-t", help="Number of threads to run in parallel"), num_tests: int = typer.Option(-1, "--num-tests", "-n", help="Number of tests to run"), + exercises_dir: str = typer.Option( + EXERCISES_DIR_DEFAULT, "--exercises-dir", help="Directory with exercise files" + ), ): repo = git.Repo(search_parent_directories=True) commit_hash = repo.head.object.hexsha[:7] @@ -363,12 +377,13 @@ def main( return assert BENCHMARK_DNAME.exists() and BENCHMARK_DNAME.is_dir(), BENCHMARK_DNAME - assert ORIGINAL_DNAME.exists() and ORIGINAL_DNAME.is_dir(), ORIGINAL_DNAME + original_dname = BENCHMARK_DNAME / exercises_dir + assert original_dname.exists() and original_dname.is_dir(), original_dname if clean and dirname.exists(): print("Cleaning up and replacing", dirname) dir_files = set(fn.name for fn in dirname.glob("*")) - original_files = set(fn.name for fn in ORIGINAL_DNAME.glob("*")) + original_files = set(fn.name for fn in original_dname.glob("*")) if dir_files != original_files: print("ERROR: will not delete dir that does not look like original tests", dirname) return @@ -381,8 +396,8 @@ def main( dirname.rename(dest) if not dirname.exists(): - print(f"Copying {ORIGINAL_DNAME} -> {dirname} ...") - shutil.copytree(ORIGINAL_DNAME, dirname) + print(f"Copying {original_dname} -> {dirname} ...") + shutil.copytree(original_dname, dirname) print("...done") test_dnames = sorted(os.listdir(dirname)) @@ -399,6 +414,7 @@ def main( all_results = [] for testname in test_dnames: results = run_test( + original_dname, dirname / testname, model, edit_format, @@ -407,6 +423,8 @@ def main( no_aider, verbose, commit_hash, + replay, + max_apply_update_errors, ) all_results.append(results) @@ -415,6 +433,7 @@ def main( run_test_threaded = lox.thread(threads)(run_test) for testname in test_dnames: run_test_threaded.scatter( + original_dname, dirname / testname, model, edit_format, @@ -423,6 +442,8 @@ def main( no_aider, verbose, commit_hash, + replay, + max_apply_update_errors, ) all_results = run_test_threaded.gather(tqdm=True) @@ -467,6 +488,7 @@ def show_diffs(dirnames): changed = set(testcases) - unchanged print() print("changed:", len(changed), ",".join(sorted(changed))) + print() print("unchanged:", len(unchanged), ",".join(sorted(unchanged))) @@ -498,6 +520,10 @@ def summarize_results(dirname): res.user_asks = 0 res.test_timeouts = 0 res.exhausted_context_windows = 0 + res.num_malformed_responses = 0 + res.syntax_errors = 0 + res.indentation_errors = 0 + res.lazy_comments = 0 variants = defaultdict(set) @@ -518,6 +544,11 @@ def summarize_results(dirname): res.error_outputs += results.get("num_error_outputs", 0) res.user_asks += results.get("num_user_asks", 0) res.exhausted_context_windows += results.get("num_exhausted_context_windows", 0) + res.num_malformed_responses += results.get("num_malformed_responses", 0) + res.lazy_comments += results.get("lazy_comments", 0) + + res.syntax_errors += results.get("syntax_errors", 0) + res.indentation_errors += results.get("indentation_errors", 0) for key in "model edit_format commit_hash".split(): val = results.get(key) @@ -526,6 +557,9 @@ def summarize_results(dirname): if not res.completed_tests: return + # if res.completed_tests < 133: + # return + console = Console(highlight=False) console.rule(title=str(dirname)) @@ -538,14 +572,22 @@ def summarize_results(dirname): val = ", ".join(map(str, val)) setattr(res, key, val) console.print(f"{key}: {val}", style=style) - print("num_error_outputs:", res.error_outputs) - print("num_user_asks:", res.user_asks) - style = "red" if res.exhausted_context_windows else None - console.print("num_exhausted_context_windows", res.exhausted_context_windows, style=style) + def show(stat): + val = getattr(res, stat) + style = "red" if val else None + console.print(f"{stat}: {val}", style=style) - style = "red" if res.test_timeouts else None - console.print("test_timeouts:", res.test_timeouts, style=style) + console.print() + show("error_outputs") + show("user_asks") + show("lazy_comments") + show("num_malformed_responses") + show("syntax_errors") + show("indentation_errors") + console.print() + show("exhausted_context_windows") + show("test_timeouts") console.print() for i in range(tries): @@ -573,8 +615,35 @@ def summarize_results(dirname): return res +def get_replayed_content(replay_dname, test_dname): + replay_dname = Path(replay_dname) + test_dname = Path(test_dname) + dump(replay_dname, test_dname) + + test_name = test_dname.name + replay_fname = replay_dname / test_name / ".aider.chat.history.md" + dump(replay_fname) + + res = replay_fname.read_text() + return res + + res = res.splitlines(keepends=True) + res = [line for line in res if not line.startswith("> ") and not line.startswith("#### ")] + return "".join(res) + + def run_test( - testdir, model_name, edit_format, tries, no_unit_tests, no_aider, verbose, commit_hash + original_dname, + testdir, + model_name, + edit_format, + tries, + no_unit_tests, + no_aider, + verbose, + commit_hash, + replay, + max_apply_update_errors, ): if not os.path.isdir(testdir): print("Not a dir:", testdir) @@ -595,12 +664,17 @@ def run_test( fnames = [] for fname in testdir.glob("*"): - if "test" not in fname.name and fname.is_file() and fname.name[0] != ".": + if ( + "test" not in fname.name + and fname.is_file() + and fname.name[0] != "." + and fname.suffix == ".py" + ): fnames.append(fname) # restore the original file, in case we interrupted a prev run # after it had saved changes - original_fname = ORIGINAL_DNAME / testdir.name / fname.name + original_fname = original_dname / testdir.name / fname.name shutil.copy(original_fname, fname) file_list = " ".join(fname.name for fname in fnames) @@ -644,17 +718,40 @@ def run_test( pretty=False, verbose=verbose, ) + coder.max_apply_update_errors = max_apply_update_errors timeouts = 0 + syntax_errors = 0 + indentation_errors = 0 + lazy_comments = 0 + dur = 0 test_outcomes = [] for i in range(tries): start = time.time() - if not no_aider: - coder.run(with_message=instructions) + if no_aider: + pass + elif replay: + response = get_replayed_content(replay, testdir) + coder.partial_response_content = response + + show = response.splitlines(keepends=True) + show = [">> " + line for line in show] + io.append_chat_history("".join(show)) + + coder.apply_updates() + else: + response = coder.run(with_message=instructions) dur += time.time() - start + if not no_aider: + pat = r"^[+]? *[#].* [.][.][.] " + # Count the number of lines that match pat in response + dump(response) + lazy_comments += len(re.findall(pat, response, re.MULTILINE)) + dump(lazy_comments) + if coder.last_keyboard_interrupt: raise KeyboardInterrupt @@ -673,7 +770,14 @@ def run_test( test_outcomes.append(True) break + if replay: + io.append_chat_history(errors) + errors = errors.splitlines() + + syntax_errors += sum(1 for line in errors if line.startswith("SyntaxError")) + indentation_errors += sum(1 for line in errors if line.startswith("IndentationError")) + print(errors[-1]) errors = errors[:50] errors = "\n".join(errors) @@ -693,6 +797,10 @@ def run_test( num_error_outputs=io.num_error_outputs, num_user_asks=io.num_user_asks, num_exhausted_context_windows=coder.num_exhausted_context_windows, + num_malformed_responses=coder.num_malformed_responses, + syntax_errors=syntax_errors, + indentation_errors=indentation_errors, + lazy_comments=lazy_comments, # Add the count of pattern matches to the results chat_hashes=list( zip( coder.chat_completion_call_hashes, diff --git a/benchmark/prompts.py b/benchmark/prompts.py index 996941085..13511d023 100644 --- a/benchmark/prompts.py +++ b/benchmark/prompts.py @@ -2,9 +2,9 @@ instructions_addendum = """ #### Use the above instructions to modify the supplied files: {file_list} -Keep and implement the existing function or class stubs, they will be called from unit tests. +Don't change the names of existing functions or classes, as they may be referenced from other code like unit tests, etc. Only use standard python libraries, don't suggest installing any packages. -""" +""" # noqa: E501 test_failures = """ diff --git a/dev-requirements.txt b/dev-requirements.txt index 1727a87f5..3d51fdce1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -149,7 +149,6 @@ urllib3==2.1.0 virtualenv==20.25.0 # via pre-commit wheel==0.42.0 - # via pip-tools # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements.in b/requirements.in index 93f2005d5..e38767dd7 100644 --- a/requirements.in +++ b/requirements.in @@ -19,3 +19,4 @@ packaging sounddevice soundfile PyYAML +diff-match-patch diff --git a/requirements.txt b/requirements.txt index c74a2b2cf..4c867fab2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,8 @@ charset-normalizer==3.3.2 # via requests configargparse==1.7 # via -r requirements.in +diff-match-patch==20230430 + # via -r requirements.in diskcache==5.6.3 # via -r requirements.in distro==1.8.0 diff --git a/tests/test_coder.py b/tests/test_coder.py index bcc4c7446..ed8a9f102 100644 --- a/tests/test_coder.py +++ b/tests/test_coder.py @@ -10,7 +10,7 @@ from aider import models from aider.coders import Coder from aider.dump import dump # noqa: F401 from aider.io import InputOutput -from tests.utils import ChdirTemporaryDirectory, GitTemporaryDirectory +from aider.utils import ChdirTemporaryDirectory, GitTemporaryDirectory class TestCoder(unittest.TestCase): diff --git a/tests/test_commands.py b/tests/test_commands.py index 8e6aa040b..ee1d2c01b 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -14,7 +14,7 @@ from aider.coders import Coder from aider.commands import Commands from aider.dump import dump # noqa: F401 from aider.io import InputOutput -from tests.utils import ChdirTemporaryDirectory, GitTemporaryDirectory, make_repo +from aider.utils import ChdirTemporaryDirectory, GitTemporaryDirectory, make_repo class TestCommands(TestCase): diff --git a/tests/test_io.py b/tests/test_io.py index 77e15e08c..91c79308a 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -4,7 +4,7 @@ from pathlib import Path from unittest.mock import patch from aider.io import AutoCompleter, InputOutput -from tests.utils import ChdirTemporaryDirectory +from aider.utils import ChdirTemporaryDirectory class TestInputOutput(unittest.TestCase): diff --git a/tests/test_main.py b/tests/test_main.py index 960ff9d6f..4a9bc408d 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -13,7 +13,7 @@ from prompt_toolkit.output import DummyOutput from aider.dump import dump # noqa: F401 from aider.io import InputOutput from aider.main import check_gitignore, main, setup_git -from tests.utils import GitTemporaryDirectory, make_repo +from aider.utils import GitTemporaryDirectory, make_repo class TestMain(TestCase): diff --git a/tests/test_repo.py b/tests/test_repo.py index 3e5b42f55..92bb10f13 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -9,7 +9,7 @@ import git from aider.dump import dump # noqa: F401 from aider.io import InputOutput from aider.repo import GitRepo -from tests.utils import GitTemporaryDirectory +from aider.utils import GitTemporaryDirectory class TestRepo(unittest.TestCase): diff --git a/tests/test_repomap.py b/tests/test_repomap.py index e081cc66e..a5ce91f1e 100644 --- a/tests/test_repomap.py +++ b/tests/test_repomap.py @@ -4,7 +4,7 @@ import unittest from aider.dump import dump # noqa: F401 from aider.io import InputOutput from aider.repomap import RepoMap -from tests.utils import IgnorantTemporaryDirectory +from aider.utils import IgnorantTemporaryDirectory class TestRepoMap(unittest.TestCase): diff --git a/tests/utils.py b/tests/utils.py deleted file mode 100644 index d6146af47..000000000 --- a/tests/utils.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -import tempfile - -import git - -from aider.dump import dump # noqa: F401 - - -class IgnorantTemporaryDirectory: - def __init__(self): - self.temp_dir = tempfile.TemporaryDirectory() - - def __enter__(self): - return self.temp_dir.__enter__() - - def __exit__(self, exc_type, exc_val, exc_tb): - try: - self.temp_dir.__exit__(exc_type, exc_val, exc_tb) - except (OSError, PermissionError): - pass # Ignore errors (Windows) - - -class ChdirTemporaryDirectory(IgnorantTemporaryDirectory): - def __init__(self): - self.cwd = os.getcwd() - super().__init__() - - def __enter__(self): - res = super().__enter__() - os.chdir(self.temp_dir.name) - return res - - def __exit__(self, exc_type, exc_val, exc_tb): - os.chdir(self.cwd) - super().__exit__(exc_type, exc_val, exc_tb) - - -class GitTemporaryDirectory(ChdirTemporaryDirectory): - def __enter__(self): - res = super().__enter__() - self.repo = make_repo() - return res - - def __exit__(self, exc_type, exc_val, exc_tb): - del self.repo - super().__exit__(exc_type, exc_val, exc_tb) - - -def make_repo(path=None): - if not path: - path = "." - repo = git.Repo.init(path) - repo.config_writer().set_value("user", "name", "Test User").release() - repo.config_writer().set_value("user", "email", "testuser@example.com").release() - - return repo From 360846320ffbe1376cf4dae1dc5c44a747a15b63 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 12:58:48 -0800 Subject: [PATCH 06/39] announce the edit format --- aider/coders/base_coder.py | 3 ++- aider/coders/editblock_coder.py | 2 ++ aider/coders/wholefile_coder.py | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 1236a1a73..2212ea9b6 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -52,6 +52,7 @@ class Coder: num_malformed_responses = 0 last_keyboard_interrupt = None max_apply_update_errors = 3 + edit_format = None @classmethod def create( @@ -149,7 +150,7 @@ class Coder: self.main_model = main_model - self.io.tool_output(f"Model: {main_model.name}") + self.io.tool_output(f"Model: {main_model.name} using {self.edit_format} edit format") self.show_diffs = show_diffs diff --git a/aider/coders/editblock_coder.py b/aider/coders/editblock_coder.py index 406bf5833..fe367a173 100644 --- a/aider/coders/editblock_coder.py +++ b/aider/coders/editblock_coder.py @@ -9,6 +9,8 @@ from .editblock_prompts import EditBlockPrompts class EditBlockCoder(Coder): + edit_format = "diff" + def __init__(self, *args, **kwargs): self.gpt_prompts = EditBlockPrompts() super().__init__(*args, **kwargs) diff --git a/aider/coders/wholefile_coder.py b/aider/coders/wholefile_coder.py index 6bd7d4634..7c0975cb7 100644 --- a/aider/coders/wholefile_coder.py +++ b/aider/coders/wholefile_coder.py @@ -8,6 +8,8 @@ from .wholefile_prompts import WholeFilePrompts class WholeFileCoder(Coder): + edit_format = "whole" + def __init__(self, *args, **kwargs): self.gpt_prompts = WholeFilePrompts() super().__init__(*args, **kwargs) From 0d4b6b6885efde93635644a37613643050e9fcfc Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 13:00:32 -0800 Subject: [PATCH 07/39] added missing files --- aider/coders/search_replace.py | 770 +++++++++++++++++++++++++++++++++ aider/coders/udiff_coder.py | 395 +++++++++++++++++ aider/coders/udiff_prompts.py | 105 +++++ benchmark/refactor_tools.py | 208 +++++++++ docs/unified-diffs.md | 440 +++++++++++++++++++ 5 files changed, 1918 insertions(+) create mode 100755 aider/coders/search_replace.py create mode 100644 aider/coders/udiff_coder.py create mode 100644 aider/coders/udiff_prompts.py create mode 100755 benchmark/refactor_tools.py create mode 100644 docs/unified-diffs.md diff --git a/aider/coders/search_replace.py b/aider/coders/search_replace.py new file mode 100755 index 000000000..47ae35d25 --- /dev/null +++ b/aider/coders/search_replace.py @@ -0,0 +1,770 @@ +#!/usr/bin/env python + +import sys +from pathlib import Path + +import git +from diff_match_patch import diff_match_patch +from tqdm import tqdm + +from aider.dump import dump +from aider.utils import GitTemporaryDirectory + + +class RelativeIndenter: + """Rewrites text files to have relative indentation, which involves + reformatting the leading white space on lines. This format makes + it easier to search and apply edits to pairs of code blocks which + may differ significantly in their overall level of indentation. + + It removes leading white space which is shared with the preceding + line. + + Original: + ``` + Foo # indented 8 + Bar # indented 4 more than the previous line + Baz # same indent as the previous line + Fob # same indent as the previous line + ``` + + Becomes: + ``` + Foo # indented 8 + Bar # indented 4 more than the previous line + Baz # same indent as the previous line + Fob # same indent as the previous line + ``` + + If the current line is *less* indented then the previous line, + uses a unicode character to indicate outdenting. + + Original + ``` + Foo + Bar + Baz + Fob # indented 4 less than the previous line + ``` + + Becomes: + ``` + Foo + Bar + Baz + ←←←←Fob # indented 4 less than the previous line + ``` + + This is a similar original to the last one, but every line has + been uniformly outdented: + ``` + Foo + Bar + Baz + Fob # indented 4 less than the previous line + ``` + + It becomes this result, which is very similar to the previous + result. Only the white space on the first line differs. From the + word Foo onwards, it is identical to the previous result. + ``` + Foo + Bar + Baz + ←←←←Fob # indented 4 less than the previous line + ``` + + """ + + def __init__(self, texts): + """ + Based on the texts, choose a unicode character that isn't in any of them. + """ + + chars = set() + for text in texts: + chars.update(text) + + ARROW = "←" + if ARROW not in chars: + self.marker = ARROW + else: + self.marker = self.select_unique_marker(chars) + + def select_unique_marker(self, chars): + for codepoint in range(0x10FFFF, 0x10000, -1): + marker = chr(codepoint) + if marker not in chars: + return marker + + raise ValueError("Could not find a unique marker") + + def make_relative(self, text): + """ + Transform text to use relative indents. + """ + + if self.marker in text: + raise ValueError("Text already contains the outdent marker: {self.marker}") + + lines = text.splitlines(keepends=True) + + output = [] + prev_indent = "" + for line in lines: + line_without_end = line.rstrip("\n\r") + + len_indent = len(line_without_end) - len(line_without_end.lstrip()) + indent = line[:len_indent] + change = len_indent - len(prev_indent) + if change > 0: + cur_indent = indent[-change:] + elif change < 0: + cur_indent = self.marker * -change + else: + cur_indent = "" + + out_line = cur_indent + "\n" + line[len_indent:] + # dump(len_indent, change, out_line) + # print(out_line) + output.append(out_line) + prev_indent = indent + + res = "".join(output) + return res + + def make_absolute(self, text): + """ + Transform text from relative back to absolute indents. + """ + lines = text.splitlines(keepends=True) + + output = [] + prev_indent = "" + for i in range(0, len(lines), 2): + dent = lines[i].rstrip("\r\n") + non_indent = lines[i + 1] + + if dent.startswith(self.marker): + len_outdent = len(dent) + cur_indent = prev_indent[:-len_outdent] + else: + cur_indent = prev_indent + dent + + if not non_indent.rstrip("\r\n"): + out_line = non_indent # don't indent a blank line + else: + out_line = cur_indent + non_indent + + output.append(out_line) + prev_indent = cur_indent + + res = "".join(output) + if self.marker in res: + # dump(res) + raise ValueError("Error transforming text back to absolute indents") + + return res + + +# The patches are created to change S->R. +# So all the patch offsets are relative to S. +# But O has a lot more content. So all the offsets are very wrong. +# +# But patch_apply() seems to imply that once patch N is located, +# then it adjusts the offset of the next patch. +# +# This is great, because once we sync up after a big gap the nearby +# patches are close to being located right. +# Except when indentation has been changed by GPT. +# +# It would help to use the diff trick to build map_S_offset_to_O_offset(). +# Then update all the S offsets in the S->R patches to be O offsets. +# Do we also need to update the R offsets? +# +# What if this gets funky/wrong? +# + + +def map_patches(texts, patches, debug): + search_text, replace_text, original_text = texts + + dmp = diff_match_patch() + dmp.Diff_Timeout = 5 + + diff_s_o = dmp.diff_main(search_text, original_text) + # diff_r_s = dmp.diff_main(replace_text, search_text) + + # dmp.diff_cleanupSemantic(diff_s_o) + # dmp.diff_cleanupEfficiency(diff_s_o) + + if debug: + html = dmp.diff_prettyHtml(diff_s_o) + Path("tmp.html").write_text(html) + + dump(len(search_text)) + dump(len(original_text)) + + for patch in patches: + start1 = patch.start1 + start2 = patch.start2 + + patch.start1 = dmp.diff_xIndex(diff_s_o, start1) + patch.start2 = dmp.diff_xIndex(diff_s_o, start2) + + if debug: + print() + print(start1, repr(search_text[start1 : start1 + 50])) + print(patch.start1, repr(original_text[patch.start1 : patch.start1 + 50])) + print(patch.diffs) + print() + + return patches + + +example = """Left +Left + 4 in + 4 in + 8 in + 4 in +Left +""" + +""" +ri = RelativeIndenter([example]) +dump(example) + +rel_example = ri.make_relative(example) +dump(repr(rel_example)) + +abs_example = ri.make_absolute(rel_example) +dump(abs_example) + + +sys.exit() +""" + + +def relative_indent(texts): + ri = RelativeIndenter(texts) + texts = list(map(ri.make_relative, texts)) + + return ri, texts + + +line_padding = 100 + + +def line_pad(text): + padding = "\n" * line_padding + return padding + text + padding + + +def line_unpad(text): + if set(text[:line_padding] + text[-line_padding:]) != set("\n"): + return + return text[line_padding:-line_padding] + + +def dmp_apply(texts, remap=True): + debug = False + # debug = True + + search_text, replace_text, original_text = texts + + dmp = diff_match_patch() + dmp.Diff_Timeout = 5 + # dmp.Diff_EditCost = 16 + + if remap: + dmp.Match_Threshold = 0.95 + dmp.Match_Distance = 500 + dmp.Match_MaxBits = 128 + dmp.Patch_Margin = 32 + else: + dmp.Match_Threshold = 0.5 + dmp.Match_Distance = 100_000 + dmp.Match_MaxBits = 32 + dmp.Patch_Margin = 8 + + diff = dmp.diff_main(search_text, replace_text, None) + dmp.diff_cleanupSemantic(diff) + dmp.diff_cleanupEfficiency(diff) + + patches = dmp.patch_make(search_text, diff) + + if debug: + html = dmp.diff_prettyHtml(diff) + Path("tmp.search_replace_diff.html").write_text(html) + + for d in diff: + print(d[0], repr(d[1])) + + for patch in patches: + start1 = patch.start1 + print() + print(start1, repr(search_text[start1 : start1 + 10])) + print(start1, repr(replace_text[start1 : start1 + 10])) + print(patch.diffs) + + # dump(original_text) + # dump(search_text) + + if remap: + patches = map_patches(texts, patches, debug) + + patches_text = dmp.patch_toText(patches) + + new_text, success = dmp.patch_apply(patches, original_text) + + all_success = False not in success + + if debug: + # dump(new_text) + print(patches_text) + + # print(new_text) + dump(success) + dump(all_success) + + # print(new_text) + + if not all_success: + return + + return new_text + + +def lines_to_chars(lines, mapping): + new_text = [] + for char in lines: + new_text.append(mapping[ord(char)]) + + new_text = "".join(new_text) + return new_text + + +def dmp_lines_apply(texts, remap=True): + debug = False + # debug = True + + for t in texts: + assert t.endswith("\n"), t + + search_text, replace_text, original_text = texts + + dmp = diff_match_patch() + dmp.Diff_Timeout = 5 + # dmp.Diff_EditCost = 16 + + dmp.Match_Threshold = 0.1 + dmp.Match_Distance = 100_000 + dmp.Match_MaxBits = 32 + dmp.Patch_Margin = 1 + + all_text = search_text + replace_text + original_text + all_lines, _, mapping = dmp.diff_linesToChars(all_text, "") + assert len(all_lines) == len(all_text.splitlines()) + + search_num = len(search_text.splitlines()) + replace_num = len(replace_text.splitlines()) + original_num = len(original_text.splitlines()) + + search_lines = all_lines[:search_num] + replace_lines = all_lines[search_num : search_num + replace_num] + original_lines = all_lines[search_num + replace_num :] + + assert len(search_lines) == search_num + assert len(replace_lines) == replace_num + assert len(original_lines) == original_num + + diff_lines = dmp.diff_main(search_lines, replace_lines, None) + dmp.diff_cleanupSemantic(diff_lines) + dmp.diff_cleanupEfficiency(diff_lines) + + patches = dmp.patch_make(search_lines, diff_lines) + + if debug: + diff = list(diff_lines) + dmp.diff_charsToLines(diff, mapping) + dump(diff) + html = dmp.diff_prettyHtml(diff) + Path("tmp.search_replace_diff.html").write_text(html) + + for d in diff: + print(d[0], repr(d[1])) + + new_lines, success = dmp.patch_apply(patches, original_lines) + new_text = lines_to_chars(new_lines, mapping) + + all_success = False not in success + + if debug: + # print(new_text) + dump(success) + dump(all_success) + + # print(new_text) + + if not all_success: + return + + return new_text + + +def diff_lines(search_text, replace_text): + dmp = diff_match_patch() + dmp.Diff_Timeout = 5 + # dmp.Diff_EditCost = 16 + search_lines, replace_lines, mapping = dmp.diff_linesToChars(search_text, replace_text) + + diff_lines = dmp.diff_main(search_lines, replace_lines, None) + dmp.diff_cleanupSemantic(diff_lines) + dmp.diff_cleanupEfficiency(diff_lines) + + diff = list(diff_lines) + dmp.diff_charsToLines(diff, mapping) + dump(diff) + + udiff = [] + for d, lines in diff: + if d < 0: + d = "-" + elif d > 0: + d = "+" + else: + d = " " + for line in lines.splitlines(keepends=True): + udiff.append(d + line) + + return udiff + + +def search_and_replace(texts): + search_text, replace_text, original_text = texts + + num = original_text.count(search_text) + # if num > 1: + # raise SearchTextNotUnique() + if num == 0: + return + + new_text = original_text.replace(search_text, replace_text) + + return new_text + + +def git_cherry_pick_osr_onto_o(texts): + search_text, replace_text, original_text = texts + + with GitTemporaryDirectory() as dname: + repo = git.Repo(dname) + + fname = Path(dname) / "file.txt" + + # Make O->S->R + fname.write_text(original_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "original") + original_hash = repo.head.commit.hexsha + + fname.write_text(search_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "search") + + fname.write_text(replace_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "replace") + replace_hash = repo.head.commit.hexsha + + # go back to O + repo.git.checkout(original_hash) + + # cherry pick R onto original + try: + repo.git.cherry_pick(replace_hash, "--minimal") + except git.exc.GitCommandError: + # merge conflicts! + return + + new_text = fname.read_text() + return new_text + + +def git_cherry_pick_sr_onto_so(texts): + search_text, replace_text, original_text = texts + + with GitTemporaryDirectory() as dname: + repo = git.Repo(dname) + + fname = Path(dname) / "file.txt" + + fname.write_text(search_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "search") + search_hash = repo.head.commit.hexsha + + # make search->replace + fname.write_text(replace_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "replace") + replace_hash = repo.head.commit.hexsha + + # go back to search, + repo.git.checkout(search_hash) + + # make search->original + fname.write_text(original_text) + repo.git.add(str(fname)) + repo.git.commit("-m", "original") + + # cherry pick replace onto original + try: + repo.git.cherry_pick(replace_hash, "--minimal") + except git.exc.GitCommandError: + # merge conflicts! + return + + new_text = fname.read_text() + + return new_text + + +class SearchTextNotUnique(ValueError): + pass + + +all_preprocs = [ + # (strip_blank_lines, relative_indent, reverse_lines) + (False, False, False), + (True, False, False), + (False, True, False), + (True, True, False), + # (False, False, True), + # (True, False, True), + # (False, True, True), + # (True, True, True), +] + +always_relative_indent = [ + (False, True, False), + (True, True, False), + # (False, True, True), + # (True, True, True), +] + +editblock_strategies = [ + (search_and_replace, all_preprocs), + (git_cherry_pick_osr_onto_o, all_preprocs), + (dmp_lines_apply, all_preprocs), +] + +never_relative = [ + (False, False), + (True, False), +] + +udiff_strategies = [ + (search_and_replace, all_preprocs), + (git_cherry_pick_osr_onto_o, all_preprocs), + (dmp_lines_apply, all_preprocs), +] + + +def flexible_search_and_replace(texts, strategies): + """Try a series of search/replace methods, starting from the most + literal interpretation of search_text. If needed, progress to more + flexible methods, which can accommodate divergence between + search_text and original_text and yet still achieve the desired + edits. + """ + + for strategy, preprocs in strategies: + for preproc in preprocs: + res = try_strategy(texts, strategy, preproc) + if res: + dump(strategy, preproc) + return res + + +def reverse_lines(text): + lines = text.splitlines(keepends=True) + lines.reverse() + return "".join(lines) + + +def try_strategy(texts, strategy, preproc): + preproc_strip_blank_lines, preproc_relative_indent, preproc_reverse = preproc + ri = None + + if preproc_strip_blank_lines: + texts = strip_blank_lines(texts) + if preproc_relative_indent: + ri, texts = relative_indent(texts) + if preproc_reverse: + texts = list(map(reverse_lines, texts)) + + res = strategy(texts) + + if res and preproc_reverse: + res = reverse_lines(res) + + if res and preproc_relative_indent: + try: + res = ri.make_absolute(res) + except ValueError: + return + + return res + + +def strip_blank_lines(texts): + # strip leading and trailing blank lines + texts = [text.strip("\n") + "\n" for text in texts] + return texts + + +def read_text(fname): + text = Path(fname).read_text() + return text + + +def proc(dname): + dname = Path(dname) + + try: + search_text = read_text(dname / "search") + replace_text = read_text(dname / "replace") + original_text = read_text(dname / "original") + except FileNotFoundError: + return + + #### + + texts = search_text, replace_text, original_text + + strategies = [ + # (search_and_replace, all_preprocs), + # (git_cherry_pick_osr_onto_o, all_preprocs), + # (git_cherry_pick_sr_onto_so, all_preprocs), + # (dmp_apply, all_preprocs), + (dmp_lines_apply, all_preprocs), + ] + + _strategies = editblock_strategies # noqa: F841 + + short_names = dict( + search_and_replace="sr", + git_cherry_pick_osr_onto_o="cp_o", + git_cherry_pick_sr_onto_so="cp_so", + dmp_apply="dmp", + dmp_lines_apply="dmpl", + ) + + patched = dict() + for strategy, preprocs in strategies: + for preproc in preprocs: + method = strategy.__name__ + method = short_names[method] + + strip_blank, rel_indent, rev_lines = preproc + if strip_blank or rel_indent: + method += "_" + if strip_blank: + method += "s" + if rel_indent: + method += "i" + if rev_lines: + method += "r" + + res = try_strategy(texts, strategy, preproc) + patched[method] = res + + results = [] + for method, res in patched.items(): + out_fname = dname / f"original.{method}" + if out_fname.exists(): + out_fname.unlink() + + if res: + out_fname.write_text(res) + + correct = (dname / "correct").read_text() + if res == correct: + res = "pass" + else: + res = "WRONG" + else: + res = "fail" + + results.append((method, res)) + + return results + + +def colorize_result(result): + colors = { + "pass": "\033[102;30mpass\033[0m", # Green background, black text + "WRONG": "\033[101;30mWRONG\033[0m", # Red background, black text + "fail": "\033[103;30mfail\033[0m", # Yellow background, black text + } + return colors.get(result, result) # Default to original result if not found + + +def main(dnames): + all_results = [] + for dname in tqdm(dnames): + dname = Path(dname) + results = proc(dname) + for method, res in results: + all_results.append((dname, method, res)) + # print(dname, method, colorize_result(res)) + + # Create a 2D table with directories along the right and methods along the top + # Collect all unique methods and directories + methods = [] + for _, method, _ in all_results: + if method not in methods: + methods.append(method) + + directories = dnames + + # Sort directories by decreasing number of 'pass' results + pass_counts = { + dname: sum( + res == "pass" for dname_result, _, res in all_results if str(dname) == str(dname_result) + ) + for dname in directories + } + directories.sort(key=lambda dname: pass_counts[dname], reverse=True) + + # Create a results matrix + results_matrix = {dname: {method: "" for method in methods} for dname in directories} + + # Populate the results matrix + for dname, method, res in all_results: + results_matrix[str(dname)][method] = res + + # Print the 2D table + # Print the header + print("{:<20}".format("Directory"), end="") + for method in methods: + print("{:<9}".format(method), end="") + print() + + # Print the rows with colorized results + for dname in directories: + print("{:<20}".format(Path(dname).name), end="") + for method in methods: + res = results_matrix[dname][method] + colorized_res = colorize_result(res) + res_l = 9 + len(colorized_res) - len(res) + fmt = "{:<" + str(res_l) + "}" + print(fmt.format(colorized_res), end="") + print() + + +if __name__ == "__main__": + status = main(sys.argv[1:]) + sys.exit(status) diff --git a/aider/coders/udiff_coder.py b/aider/coders/udiff_coder.py new file mode 100644 index 000000000..14787b2fd --- /dev/null +++ b/aider/coders/udiff_coder.py @@ -0,0 +1,395 @@ +import difflib +from itertools import groupby +from pathlib import Path + +from ..dump import dump # noqa: F401 +from .base_coder import Coder +from .search_replace import ( + SearchTextNotUnique, + all_preprocs, + diff_lines, + flexible_search_and_replace, + search_and_replace, +) +from .udiff_prompts import UnifiedDiffPrompts + +no_match_error = """UnifiedDiffNoMatch: hunk failed to apply! + +{path} does not contain lines that match the diff you provided! +Try again. +DO NOT skip blank lines, comments, docstrings, etc! +The diff needs to apply cleanly to the lines in {path}! + +{path} does not contain these {num_lines} exact lines in a row: +``` +{original}``` +""" + + +not_unique_error = """UnifiedDiffNotUnique: hunk failed to apply! + +{path} contains multiple sets of lines that match the diff you provided! +Try again. +Use additional ` ` lines to provide context that uniquely indicates which code needs to be changed. +The diff needs to apply to a unique set of lines in {path}! + +{path} contains multiple copies of these {num_lines} lines: +``` +{original}``` +""" + + +class UnifiedDiffCoder(Coder): + edit_format = "udiff" + + def __init__(self, *args, **kwargs): + self.gpt_prompts = UnifiedDiffPrompts() + super().__init__(*args, **kwargs) + + def get_edits(self): + content = self.partial_response_content + + # might raise ValueError for malformed ORIG/UPD blocks + raw_edits = list(find_diffs(content)) + + last_path = None + edits = [] + for path, hunk in raw_edits: + if path: + last_path = path + else: + path = last_path + edits.append((path, hunk)) + + return edits + + def apply_edits(self, edits): + seen = set() + uniq = [] + for path, hunk in edits: + hunk = normalize_hunk(hunk) + if not hunk: + continue + + this = [path + "\n"] + hunk + this = "".join(this) + + if this in seen: + continue + seen.add(this) + + uniq.append((path, hunk)) + + errors = [] + for path, hunk in uniq: + full_path = self.abs_root_path(path) + content = self.io.read_text(full_path) + + original, _ = hunk_to_before_after(hunk) + + try: + content = do_replace(full_path, content, hunk) + except SearchTextNotUnique: + errors.append( + not_unique_error.format( + path=path, original=original, num_lines=len(original.splitlines()) + ) + ) + continue + + if not content: + errors.append( + no_match_error.format( + path=path, original=original, num_lines=len(original.splitlines()) + ) + ) + continue + + # SUCCESS! + self.io.write_text(full_path, content) + + if errors: + errors = "\n\n".join(errors) + raise ValueError(errors) + + +def do_replace(fname, content, hunk): + fname = Path(fname) + + before_text, after_text = hunk_to_before_after(hunk) + + # does it want to make a new file? + if not fname.exists() and not before_text.strip(): + fname.touch() + content = "" + + if content is None: + return + + # TODO: handle inserting into new file + if not before_text.strip(): + # append to existing file, or start a new file + new_content = content + after_text + return new_content + + new_content = None + + new_content = apply_hunk(content, hunk) + if new_content: + return new_content + + +def collapse_repeats(s): + return "".join(k for k, g in groupby(s)) + + +def apply_hunk(content, hunk): + before_text, after_text = hunk_to_before_after(hunk) + + res = directly_apply_hunk(content, hunk) + if res: + return res + + hunk = make_new_lines_explicit(content, hunk) + + # just consider space vs not-space + ops = "".join([line[0] for line in hunk]) + ops = ops.replace("-", "x") + ops = ops.replace("+", "x") + ops = ops.replace("\n", " ") + + cur_op = " " + section = [] + sections = [] + + for i in range(len(ops)): + op = ops[i] + if op != cur_op: + sections.append(section) + section = [] + cur_op = op + section.append(hunk[i]) + + sections.append(section) + if cur_op != " ": + sections.append([]) + + all_done = True + for i in range(2, len(sections), 2): + preceding_context = sections[i - 2] + changes = sections[i - 1] + following_context = sections[i] + + res = apply_partial_hunk(content, preceding_context, changes, following_context) + if res: + content = res + else: + all_done = False + # FAILED! + # this_hunk = preceding_context + changes + following_context + break + + if all_done: + return content + + +def flexi_just_search_and_replace(texts): + strategies = [ + (search_and_replace, all_preprocs), + ] + + return flexible_search_and_replace(texts, strategies) + + +def make_new_lines_explicit(content, hunk): + before, after = hunk_to_before_after(hunk) + + diff = diff_lines(before, content) + + back_diff = [] + for line in diff: + if line[0] == "+": + continue + # if line[0] == "-": + # line = "+" + line[1:] + + back_diff.append(line) + + new_before = directly_apply_hunk(before, back_diff) + if not new_before: + return hunk + + if len(new_before.strip()) < 10: + return hunk + + before = before.splitlines(keepends=True) + new_before = new_before.splitlines(keepends=True) + after = after.splitlines(keepends=True) + + if len(new_before) < len(before) * 0.66: + return hunk + + new_hunk = difflib.unified_diff(new_before, after, n=max(len(new_before), len(after))) + new_hunk = list(new_hunk)[3:] + + return new_hunk + + +def cleanup_pure_whitespace_lines(lines): + res = [ + line if line.strip() else line[-(len(line) - len(line.rstrip("\r\n")))] for line in lines + ] + return res + + +def normalize_hunk(hunk): + before, after = hunk_to_before_after(hunk, lines=True) + + before = cleanup_pure_whitespace_lines(before) + after = cleanup_pure_whitespace_lines(after) + + diff = difflib.unified_diff(before, after, n=max(len(before), len(after))) + diff = list(diff)[3:] + return diff + + +def directly_apply_hunk(content, hunk): + before, after = hunk_to_before_after(hunk) + + before_lines, _ = hunk_to_before_after(hunk, lines=True) + before_lines = "".join([line.strip() for line in before_lines]) + + # Refuse to do a repeated search and replace on a tiny bit of non-whitespace context + if len(before_lines) < 10 and content.count(before) > 1: + return + + try: + new_content = flexi_just_search_and_replace([before, after, content]) + except SearchTextNotUnique: + new_content = None + + return new_content + + +def apply_partial_hunk(content, preceding_context, changes, following_context): + len_prec = len(preceding_context) + len_foll = len(following_context) + + use_all = len_prec + len_foll + + for drop in range(use_all): + use = use_all - drop + + for use_prec in range(len_prec, -1, -1): + if use_prec > use: + continue + + use_foll = use - use_prec + if use_foll > len_foll: + continue + + if use_prec: + this_prec = preceding_context[-use_prec:] + else: + this_prec = [] + + this_foll = following_context[:use_foll] + + res = directly_apply_hunk(content, this_prec + changes + this_foll) + if res: + return res + + +def find_diffs(content): + # We can always use triple-quotes, because all the udiff content + # is prefixed with +/-/space. + + if not content.endswith("\n"): + content = content + "\n" + + lines = content.splitlines(keepends=True) + line_num = 0 + edits = [] + while line_num < len(lines): + while line_num < len(lines): + line = lines[line_num] + if line.startswith("```diff"): + line_num, these_edits = process_fenced_block(lines, line_num + 1) + edits += these_edits + break + line_num += 1 + + # For now, just take 1! + # edits = edits[:1] + + return edits + + +def process_fenced_block(lines, start_line_num): + for line_num in range(start_line_num, len(lines)): + line = lines[line_num] + if line.startswith("```"): + break + + block = lines[start_line_num:line_num] + block.append("@@ @@") + + if block[0].startswith("--- "): + fname = block[0].split()[1] + block = block[2:] + else: + fname = None + + edits = [] + + keeper = False + hunk = [] + op = " " + for line in block: + hunk.append(line) + if len(line) < 2: + continue + op = line[0] + if op in "-+": + keeper = True + continue + if op != "@": + continue + if not keeper: + hunk = [] + continue + + hunk = hunk[:-1] + edits.append((fname, hunk)) + hunk = [] + + return line_num + 1, edits + + +def hunk_to_before_after(hunk, lines=False): + before = [] + after = [] + op = " " + for line in hunk: + if len(line) < 2: + op = " " + line = line + else: + op = line[0] + line = line[1:] + + if op == " ": + before.append(line) + after.append(line) + elif op == "-": + before.append(line) + elif op == "+": + after.append(line) + + if lines: + return before, after + + before = "".join(before) + after = "".join(after) + + return before, after diff --git a/aider/coders/udiff_prompts.py b/aider/coders/udiff_prompts.py new file mode 100644 index 000000000..068949e15 --- /dev/null +++ b/aider/coders/udiff_prompts.py @@ -0,0 +1,105 @@ +# flake8: noqa: E501 + +from .base_prompts import CoderPrompts + + +class UnifiedDiffPrompts(CoderPrompts): + main_system = """Act as an expert software developer. +You are diligent and tireless, and you always COMPLETELY IMPLEMENT the needed code. +Always use best practices when coding. +Respect and use existing conventions, libraries, etc that are already present in the code base. + +Take requests for changes to the supplied code. +If the request is ambiguous, ask questions. + +For each file that needs to be changed, write out the changes similar to a unified diff like `diff -U0` would produce. For example: + +# Example conversation 1 + +## USER: Replace is_prime with a call to sympy. + +## ASSISTANT: Ok, I will: + +1. Add an imports of sympy. +2. Remove the is_prime() function. +3. Replace the existing call to is_prime() with a call to sympy.isprime(). + +Here are the diffs for those changes: + +```diff +--- mathweb/flask/app.py ++++ mathweb/flask/app.py +@@ ... @@ +-class MathWeb: ++import sympy ++ ++class MathWeb: +@@ ... @@ +-def is_prime(x): +- if x < 2: +- return False +- for i in range(2, int(math.sqrt(x)) + 1): +- if x % i == 0: +- return False +- return True +@@ ... @@ +-@app.route('/prime/') +-def nth_prime(n): +- count = 0 +- num = 1 +- while count < n: +- num += 1 +- if is_prime(num): +- count += 1 +- return str(num) ++@app.route('/prime/') ++def nth_prime(n): ++ count = 0 ++ num = 1 ++ while count < n: ++ num += 1 ++ if sympy.isprime(num): ++ count += 1 ++ return str(num) +``` +""" + + system_reminder = """# File editing rules: + +Return edits similar to unified diffs that `diff -U0` would produce. + +Make sure you include the first 2 lines with the file paths. +Don't include timestamps with the file paths. + +Start each hunk of changes with a `@@ ... @@` line. +Don't include line numbers like `diff -U0` does. +The user's patch tool doesn't need them. + +The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file! +Think carefully and make sure you include and mark all lines that need to be removed or changed as `-` lines. +Make sure you mark all new or modified lines with `+`. +Don't leave out any lines or the diff patch won't apply correctly. + +Indentation matters in the diffs! + +Start a new hunk for each section of the file that needs changes. + +Only output hunks that specify changes with `+` or `-` lines. +Skip any hunks that are entirely unchanging ` ` lines. + +Output hunks in whatever order makes the most sense. +Hunks don't need to be in any particular order. + +When editing a function, method, loop, etc use a hunk to replace the *entire* code block. +Delete the entire existing version with `-` lines and then add a new, updated version with `+` lines. +This will help you generate correct code and correct diffs. +""" + + files_content_prefix = "These are the *read-write* files:\n" + + files_no_full_files = "I am not sharing any *read-write* files yet." + + repo_content_prefix = """Below here are summaries of other files present in this git repository. +Do not propose changes to these files, they are *read-only*. +To make a file *read-write*, ask the user to *add it to the chat*. +""" diff --git a/benchmark/refactor_tools.py b/benchmark/refactor_tools.py new file mode 100755 index 000000000..a54663377 --- /dev/null +++ b/benchmark/refactor_tools.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python + +import ast +import os +import shutil +import sys +from pathlib import Path + +from aider.dump import dump # noqa: F401 + + +class ParentNodeTransformer(ast.NodeTransformer): + """ + This transformer sets the 'parent' attribute on each node. + """ + + def generic_visit(self, node): + for child in ast.iter_child_nodes(node): + child.parent = node + return super(ParentNodeTransformer, self).generic_visit(node) + + +def verify_full_func_at_top_level(tree, func, func_children): + func_node = next( + ( + item + for item in ast.walk(tree) + if isinstance(item, ast.FunctionDef) and item.name == func + ), + None, + ) + assert func_node is not None, f"Function {func} not found" + + assert isinstance( + func_node.parent, ast.Module + ), f"{func} is not a top level function, it has parent {func_node.parent}" + + num_children = sum(1 for _ in ast.walk(func_node)) + pct_diff_children = abs(num_children - func_children) * 100 / func_children + assert ( + pct_diff_children < 10 + ), f"Old method had {func_children} children, new method has {num_children}" + + +def verify_old_class_children(tree, old_class, old_class_children): + node = next( + ( + item + for item in ast.walk(tree) + if isinstance(item, ast.ClassDef) and item.name == old_class + ), + None, + ) + assert node is not None, f"Old class {old_class} not found" + + num_children = sum(1 for _ in ast.walk(node)) + + pct_diff_children = abs(num_children - old_class_children) * 100 / old_class_children + assert ( + pct_diff_children < 10 + ), f"Old class had {old_class_children} children, new class has {num_children}" + + +def verify_refactor(fname, func, func_children, old_class, old_class_children): + with open(fname, "r") as file: + file_contents = file.read() + tree = ast.parse(file_contents) + ParentNodeTransformer().visit(tree) # Set parent attribute for all nodes + + verify_full_func_at_top_level(tree, func, func_children) + + verify_old_class_children(tree, old_class, old_class_children - func_children) + + +############################ + + +class SelfUsageChecker(ast.NodeVisitor): + def __init__(self): + self.non_self_methods = [] + self.parent_class_name = None + self.num_class_children = 0 + + def visit_FunctionDef(self, node): + # Check if the first argument is 'self' and if it's not used + if node.args.args and node.args.args[0].arg == "self": + self_used = any( + isinstance(expr, ast.Name) and expr.id == "self" + for stmt in node.body + for expr in ast.walk(stmt) + ) + super_used = any( + isinstance(expr, ast.Name) and expr.id == "super" + for stmt in node.body + for expr in ast.walk(stmt) + ) + if not self_used and not super_used: + # Calculate the number of child nodes in the function + num_child_nodes = sum(1 for _ in ast.walk(node)) + res = ( + self.parent_class_name, + node.name, + self.num_class_children, + num_child_nodes, + ) + self.non_self_methods.append(res) + self.generic_visit(node) + + def visit_ClassDef(self, node): + self.parent_class_name = node.name + self.num_class_children = sum(1 for _ in ast.walk(node)) + self.generic_visit(node) + + +def find_python_files(path): + if os.path.isfile(path) and path.endswith(".py"): + return [path] + elif os.path.isdir(path): + py_files = [] + for root, dirs, files in os.walk(path): + for file in files: + if file.endswith(".py"): + full_path = os.path.join(root, file) + py_files.append(full_path) + return py_files + else: + return [] + + +def find_non_self_methods(path): + python_files = find_python_files(path) + non_self_methods = [] + for filename in python_files: + with open(filename, "r") as file: + node = ast.parse(file.read(), filename=filename) + checker = SelfUsageChecker() + checker.visit(node) + for method in checker.non_self_methods: + non_self_methods.append([filename] + list(method)) + + return non_self_methods + + +def process(entry): + fname, class_name, method_name, class_children, method_children = entry + if method_children > class_children / 2: + return + if method_children < 100: + return + + fname = Path(fname) + if "test" in fname.stem: + return + + print(f"{fname} {class_name} {method_name} {class_children} {method_children}") + + dname = Path("tmp.benchmarks/refactor-benchmark") + dname.mkdir(exist_ok=True) + + dname = dname / f"{fname.stem}_{class_name}_{method_name}" + dname.mkdir(exist_ok=True) + + shutil.copy(fname, dname / fname.name) + + docs_dname = dname / ".docs" + docs_dname.mkdir(exist_ok=True) + + ins_fname = docs_dname / "instructions.md" + ins_fname.write_text(f"""# Refactor {class_name}.{method_name} + +Refactor the `{method_name}` method in the `{class_name}` class to be a stand alone, top level function. +Name the new function `{method_name}`, exactly the same name as the existing method. +Update any existing `self.{method_name}` calls to work with the new `{method_name}` function. +""") # noqa: E501 + + test_fname = dname / f"{fname.stem}_test.py" + test_fname.write_text(f""" +import unittest +from benchmark.refactor_tools import verify_refactor +from pathlib import Path + +class TheTest(unittest.TestCase): + def test_{method_name}(self): + fname = Path(__file__).parent / "{fname.name}" + method = "{method_name}" + method_children = {method_children} + + class_name = "{class_name}" + class_children = {class_children} + + verify_refactor(fname, method, method_children, class_name, class_children) + +if __name__ == "__main__": + unittest.main() +""") + + +def main(paths): + for path in paths: + methods = find_non_self_methods(path) + # methods = sorted(methods, key=lambda x: x[4]) + + for method in methods: + process(method) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md new file mode 100644 index 000000000..cc1703c8e --- /dev/null +++ b/docs/unified-diffs.md @@ -0,0 +1,440 @@ + +# Fixing GPT-4 Turbo laziness with unified diffs + +![robot flowchart](../assets/robot-ast.png) + + +Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc. +This new support for unified diffs massively reduces GPT-4 Turbo's habit of being a "lazy" coder. + +There are abundant anecdotes +about GPT-4 Turbo writing half completed code filled with comments that give +homework assignments to the user +like "...omitted for brevity..." or "...add logic here...". +Aider's new unified diff edit format significantly reduces this sort of lazy coding, +producing much better quantitative scores on a new "laziness benchmark". + +Before trying to reduce laziness, I needed a way to quantify and measure +the problem. +I developed a new +benchmarking suite designed to both provoke and quantify lazy coding. +It consists of 39 python refactoring tasks, +which ask GPT to remove a non-trivial method from a class and make it +a stand alone function. + +GPT-4 Turbo is prone to being lazy on this sort of task, because it's mostly a +"cut & paste" of code from one place in a file to another. +GPT often creates the new function with a body that is empty except for +a comment like +"...include the body of the original method..." + +This new laziness benchmark produced the following results with `gpt-4-1106-preview`: + +- **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. This confirms the anecdotes that GPT-4 Turbo is quite lazy when coding, and serves as a baseline for comparison. +- **Aider's new unified diff edit format raised the score to 65%**. +- **A system prompt based on widely circulated folklore only scored 15%, same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". + +The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. +The benchmark was designed to work with large source code files, many of +which exceeded GPT-4's 8k context window. +This meant that 28% of tasks exhausted the context window and were marked as a fail, +significantly dragging down GPT-4's performance on the benchmark. + +- **GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. +- **Aider's new unified diff edit format raised GPT-4's score to 59%**. + +Before settling on unified diffs, +I explored many other approaches to stop GPT-4 Turbo from eliding code +and replacing it with comments. +These efforts included prompts about being tireless and diligent, +use of OpenAI's function/tool calling capabilities and numerous variations on +aider's existing editing formats and other diff-like formats. +All in all, the results shared here reflect +an extensive investigation of possible solutions and +a large number of benchmarking runs of numerous varied approaches against +GPT-4 Turbo. + +The result is aider's new support for a unified diff like +editing format which outperforms other potential solutions by a wide margin. +The rest of this article will describe aider's new refactoring benchmark +and the new unified diff editing format. +We will discuss some key design decisions involved in this new format, +and evaluate their significance using ablation experiments. + + +## Refactoring benchmark + +Aider has long used a +[benchmark suite based on 133 Exercism python exercises](). +But these are mostly small coding problems, +usually requiring only a few dozen lines of code to solve. +GPT-4 Turbo was typically only lazy on 2-3 of these exercises: +the ones with the largest amount of code and which involved refactoring. +Rather than fully completing the refactor, GPT would often +just add a comment +referencing old code like +"...copy $USD formatting code here...". + +Based on this observation, I set out to build a benchmark based on refactoring +a non-trivial amount of code from within fairly large source files. +To do this, I used python's `ast` module to analyze the +[Django repository](). + +The goal was to search the Django repository to: + +- Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. +- Focus on methods that are a smaller piece of a larger class, so they don't represent the bulk of the code in their class or the file. We want to find methods which are less than half the AST nodes present in their containing class. +- Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function. + +We can then turn each of these source files into a task for the benchmark, +using instructions like: + +> Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function. +> Name the new function `_set_csrf_cookie`, exactly the same name as the existing method. +> Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function. + +A [simple python AST scanning script]() found 39 of these source files in the Django repository +and packaged them up as benchmark tasks using +the same format as Exercism exercises. + +The tool also created a unit test for each task +which again uses the `ast` module to check that the refactor +was performed roughly correctly: + +- The updated source file must parse as correct python, without `SyntaxError` or `IndentationError` exceptions. This is a powerful check that will surface any mechanical errors made when attempting to edit the source code. +- The target method must now exist as a top-level function in the file. +- This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments. +- The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. + +To be clear, this is not a rigorous test that the refactor was performed correctly. +But it does serve as a basic sanity check that the refactor was essentially done as a cut & paste, without eliding any code as comments. +And it correlates well with other laziness metrics +gathered during benchmarking like the +introduction of new comments that contain "...". + +The result is a pragmatic benchmark suite that provokes, detects and quantifies laziness. + + +## Unified diff editing format + +The design and implementation of aider's new unified diff editing format +helped clarify some general principles, which I think are applicable to any effective +GPT-4 code editing format: + +- FAMILIAR - Choose an edit format that GPT is already familiar with. +- SIMPLE - Choose a simple format that avoid escaping, syntactic overhead and brittle specifiers like line numbers or line counts. +- HIGH LEVEL - Encourage GPT to structure edits as new versions of substantive code blocks (functions, methods, etc), not as a series of surgical/minimal changes to individual lines of code. +- FLEXIBLE - Strive to be maximally flexible when interpreting GPT's edit instructions. + +A helpful shortcut here is to have empathy for GPT, and imagine you are on +the other end of the conversation being tasked with specifying code edits. +Would you want to hand type a properly escaped json data structure +to specify surgical insert, delete, replace operations on specific code line numbers? +Would you want a typo, off-by-one line number or flubbed escape character to trigger an error +and force you to start over? + +GPT is quantitatively better at code editing when you reduce the +burden of formatting edits by using a familiar, simple, high level +and flexible editing format. + +### Choose a familiar editing format + +Unified diffs are perhaps the most commonly used format for showing +how source code files have been changed. +This is because it is the default output format of `git diff`: + +```diff +$ git diff hello.py +... +--- a/hello.py ++++ b/hello.py +@@ -1,5 +1,5 @@ + def main(args): + # show a greeting + +- print("Hello!") ++ print("Goodbye!") + return +``` + +Choosing such a familiar, popular output format means that GPT has +seen *many* examples in its training data. +GPT has therefore been extensively trained to generate +text that conforms to the unified diff syntax. +We won't need to provide many details and examples +in the system prompt, as it knows this format by name. + +Unified diffs are +usually intended to be consumed by the +[patch](https://www.gnu.org/software/diffutils/manual/html_node/Merging-with-patch.html) +program. +They need to *accurately* reflect the original and updated file contents, +otherwise the patch command will fail to apply the changes. +Having GPT specify changes in a well-known format that is usually consumed by a +fairly rigid program like patch +seems to discourage it from +leaving informal editing instructions in comments +and being lazy +about writing all the needed code. + +With unified diffs, GPT acts more like it's writing textual data intended to be read by a program, +not talking to a person. + + +### Use a simple editing format + +Aider's [previous benchmark results](https://aider.chat/docs/benchmarks.html) made +it clear that simple editing formats +work much better than complex ones. +Even though OpenAI provides extensive support for +structured formats like json and function calls, +GPT is worse at editing code if you use them. +I repeated these and many other similar benchmarks against GPT-4 Turbo, +and again reached these same conclusions. + +Informally, this is probably because stuffing *source code* into JSON is complicated +and error prone. +It likely takes a lot of the model's attention to escape and wrap code +in JSON containers. +Wrapping the python code +`print("On Windows use \"C:\\\"")` +as valid json is pretty painful and error prone: +`"print(\\"On Windows use \\"C:\\\\\\"\\")"` +Due to escaping issues GPT's code is often syntactically incorrect when it's +unpacked from the JSON container or the JSON decode just fails entirely. + +On the other hand, the core of the unified diff format is extremely simple. +You include a hunk of the file that needs to be changed, +with every line prefixed by ether a *space* ` `, a *plus* `+` or a *minus* `-`. +These markers indicate an unchanged line, a new line to add or an existing line to remove. +There is no escaping, and very little other structure needed +to create a unified diff. + +A unified diff looks pretty much like the code it is modifying. + +The one complicated piece is the line numbers found at the start +of each hunk that look something like this: `@@ -2,4 +3,5 @@`. +This example is from a +hunk that will change lines 2-4 in the original file +into what will become lines 3-5 in the updated file. + +You've probably read a lot of unified diffs without ever +caring about these line numbers, +because the diffs are usually perfectly sensible without them. +This is good news, because we're going to discard these numbers. + +GPT is terrible at working accurately with source code line numbers. +This is a general observation about any use of line +numbers in editing formats, +backed up by many quantitative benchmark +experiments. +Specifically regarding line numbers in unified diffs, +GPT is frequently off-by-one, or labels a hunk as +being line numbers 2-4 of the file but the hunk actually contains 6 lines, etc. +GPT-4 isn't even close to being able to consistently +produce valid +line number headers. +Doing so requires far too much attention to numerical details to ensure +correctness and self-consistency. + +So aider tells GPT not to include line numbers. +Instead, aider just interprets each hunk from the unified diffs +as a search and replace operation: + +This diff: + +```diff +@@ ... @@ + def main(args): + # show a greeting + +- print("Hello!") ++ print("Goodbye!") + return +``` + +Means we want to search the original source file for all the +*space* ` ` and *minus* `-` lines from the hunk: + +```python +def main(args): + # show a greeting + + print("Hello!") + return +``` + +And then replace them with all the *space* ` ` and *plus* `+` lines: + +```python +def main(args): + # show a greeting + + print("Goodbye!") + return +``` + +Simple, right? + +## Encourage high level edits + +The example unified diffs we've seen so far have all been single line changes, +which makes them pretty easy to read and understand. +Consider this slightly more complex change, which renames the variable `n` to +`number`: + +``` diff +@@ ... @@ +-def factorial(n): ++def factorial(number): + "compute factorial" + +- if n == 0: ++ if number == 0: + return 1 + else: +- return n * factorial(n-1) ++ return number * factorial(number-1) +``` + +The following "high level diff" of the same +change is not as succinct as the minimal diff above, +but it is much easier to see two different coherent versions of the +`factorial()` function. + +``` diff +@@ ... @@ +-def factorial(n): +- "compute factorial" +- +- if n == 0: +- return 1 +- else: +- return n * factorial(n-1) ++def factorial(number): ++ "compute factorial" ++ ++ if number == 0: ++ return 1 ++ else: ++ return number * factorial(number-1) +``` + +Aider's system prompt strongly encourages +GPT to produce this kind of high level diff, and provides a few examples. +GPT is much more successful at code editing +with the addition of this "high level diff" prompting. +It is better at producing correct diffs, which can be successfully +applied to the original file. + +**Experiments without "high level diff" prompting +measure a 30-50% increase in editing errors,** +where diffs fail to apply or apply incorrectly and +produce invalid code. +Each such editing error causes a round trip back to GPT, +asking for better diffs. +These extra round trips slow down the pair programming experience +and increase token costs. + +There are probably a couple of reasons why high level diffs +improve code editing performance: + +- It is easier to produce diffs that both correctly match the original code and correctly produce the intended new code. There is less risk of getting confused while generating a rapid fire series of minimal, surgical edits mixed into existing code. +- The high level hunks often contain more lines than a surgical version, so they are less likely to accidentally match unrelated parts of the original file. This is important because GPT can't reliably give us line numbers to specify exactly where in the file to make the change. + +### Be flexible when applying edits + +GPT frequently makes errors when generating diffs, which +can prevent them from being correctly +applied as edits to the source files. +These imperfect diffs exhibit a variety of problems: + +- GPT forgets to include some semantically irrelevant lines or details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. +- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file, and incorrectly includes them with a leading *space* ` `. +- GPT jumps ahead to a new part of the file without starting a new hunk with a `@@ ... @@` divider. + +As an example of the first issue, consider this source code: + +```python +import sys + +def main(args): + # show a greeting + + print("Hello!") + return + +main(sys.argv[1:]) +``` + +GPT might produce a unified diff like the one below, +which is missing the "show a greeting" comment line. +When we search for the *minus* `-` lines, we won't find them +in the original file +because of the missing comment. + + +```diff +@@ ... @@ +-def main(args): +- +- print("Hello!") +- return ++def main(args): ++ ++ print("Goodbye!") ++ return +``` + + +Aider tries to be very flexible when applying unified diffs, +in order to handle all these sorts of defects. +If a hunk doesn't apply cleanly, aider uses a number of strategies +to try and apply the edit intended by GPT: + +- Normalize the hunk, by taking the *minus* `-` and *space* ` ` lines as one version of the hunk and the *space* ` ` and *plus* `+` lines as a second version and doing an actual unified diff on them. +- Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* ` ` lines back against the original file. +- Break a large hunk apart into an overlapping sequence of smaller hunks, which each contain only one contiguous run of *plus* `+` and *minus* `-` lines. Try and apply each of these sub-hunks independently. +- Vary the size and offset of the "context window" of *space* ` ` lines from the hunk that are used to localize the edit to a specific part of the file. +- Combine the above mechanisms to progressively become more permissive about how to apply the hunk. + +These flexible patching strategies are critical to successfully apply the +unified diffs that GPT produces. +Removing support for flexible patching +radically increases the number of hunks which fail to apply. +Each such editing error causes a round trip back to GPT, +asking for better diffs. +These extra round trips slow down the pair programming experience +and increase token costs. + +**Experiments where flexible patching is disabled** quantify the importance of this +feature: + +- **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark. +- **We see a 9X increase in editing errors** on aider's original Exercism benchmark. + +## Conclusions and future work + +Aider's new unified diff format seems very effective at stopping +GPT-4 Turbo from being a lazy coder. + +I suspect that anyone who has tried to have GPT edit code +started out asking for diffs of some kind. +I know I did. +Any naive attempt to use actual unified diffs +or any other strict diff format +is certainly doomed, +but the techniques described here and +now incorporated into aider provide +a highly effective solution. + +There could be significant benefits to +fine tuning models on +the simpler, high level style of diffs that are described here. +Dropping the line numbers and focusing on diffs of +semantically coherent chunks of code +seems to be an important part of successful GPT code editing. +Most LLMs will have already seen plenty of unified diffs +in their normal training data, and so should be +very amenable to fining tuning towards this +particular style of diff. From 1e38577a3c9697056a66e1f433b98fc6a618497c Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 13:01:47 -0800 Subject: [PATCH 08/39] Announce repo size --- aider/coders/base_coder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aider/coders/base_coder.py b/aider/coders/base_coder.py index 2212ea9b6..2057d7e8a 100755 --- a/aider/coders/base_coder.py +++ b/aider/coders/base_coder.py @@ -179,7 +179,8 @@ class Coder: if self.repo: rel_repo_dir = self.repo.get_rel_repo_dir() - self.io.tool_output(f"Git repo: {rel_repo_dir}") + num_files = len(self.repo.get_tracked_files()) + self.io.tool_output(f"Git repo: {rel_repo_dir} with {num_files} files") else: self.io.tool_output("Git repo: none") self.find_common_root() From c46024e76af400423039aa7beb51df3f4509ca62 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 13:05:22 -0800 Subject: [PATCH 09/39] art --- assets/udiffs.jpg | Bin 0 -> 85462 bytes docs/unified-diffs.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 assets/udiffs.jpg diff --git a/assets/udiffs.jpg b/assets/udiffs.jpg new file mode 100644 index 0000000000000000000000000000000000000000..5e754e9666e6fc71ed3148493f3a46c9ddb566aa GIT binary patch literal 85462 zcmbTdbyQnn7w#FXv_PR0C{k$9V#TdcAjPe?OL5mii%SZWLb2dl2p-%iZpGc9MT@(8 zjLG-iduL|Nnm_K$VV$*d!bw*4$=Q4V_Vc`X`19}&@Jv}=NgjZO1pr`S{s0e402#pJ zN00Cy;XTI3$A9wVF#!?zQzAk_BI@U4B;<57z*lrMw6yfh+^qDBZAu4{K4_LkC~)vx+%1#&X@%(Jwl!kyr86_reS%_%ErziC?qWMR#Z$z zR!&|)QAt@_M^{hZz|hFb+Q!z--of$n7f&y5A78)FZ(-lVBYs3CBqk-Nq^6~3 z7Qu>3N^9%t8-6u5HMjik>Fw(u7#td&o|&DSU-ne6BjlvE)FjKf8xTz_Qvcuq_}tte2<<>Y2sVBlQHrKKPH!s z&#mcx!X%(|Mq%kOMeu@IaGmA+KcW3Mvj4rnLjHe6_J0QUf5o*3Ai}}Ie0eye01#m7 zw)o(E&PvzM`eG2xNEjC_cA@n|nQaB5Y||1AB&Q%VP^usbxR|nek_x`n$nZO0>DVqm zegI(M+-F!emEc5HrF}~rZC*dJU_xi`PV8~Kn8`4~wvwYg-tL0i6&jPMk;wH~KB{jr z_&wnDm+q(C^`Ryz8N9tSZwOj6B!rAaw$NnLgt_+2hRk!ld;9&Z%md&{B~=i9#*{{m zIY2U5PjNHNQdaNPrV!OHc65=i)y??&Jx%?P{q&=3nmRq>rVgY7@3l-lS^+X&Xndyt z?>Yy|{TE6858q#_Ql4&8PgdH_BA-=}^s3wMSXM zdvg#o*5BE`Gs{_=u#q3lG(~)S-iOXx{|fA89q-(P&xFmMKL9F!F@yb>TCWa~2O16z z0J+aZ%L??YEmuruCEcadQsiB}@nu%IO!0R!a&1!&fZ`@|P%Hf^XyCet@x)=Hgz!iH z1K^{pA{wdSy}xij7vXA=2IQ7w_N49xYA#0Q)Vzg7Mvch<1+YFKzTo5wfLp#z??~Ai z6;fy)H;<1vEFA#Qwvc)gXscsB>K@dk-5F=l!v4G1wZmqrr_cV^`aV>k~4fyZVte20P^jbM55G$3(7&up!tS!A>$}3M4^rLoIy13ca6qI zh5TKK10T$)u3>>G2}{ub$$`{h8IO~bS95e_pgxi_HrA43OXNd1M=wyrQIxCtzN#7a zxj?a*Gu7-y)n>>tCr2^?pTvw z15Y|B)3IAwI;ktv<)x!bQl>pNc7y!x2SZ!L2jcl#g9m^?=qZUj9Thm6Q=>w}h0kC} zRa!-l)X}|M_N)%ZC$foT{e$+*|0=Jb9G|bRRi2v^{Y+k}yvNGj+}q)ZhWn-#n2H8~ z!}n+QmFNFRsNCgK;1wb%t@}k3#87wERG!Fp5cfXgKFSY|&*c|W$?W03iUKRw>T^3+a3261lGCu0zITC4O!pfZP2rY_;>KEjOk$Y8wEN0^o0X?M;03Y3m$id**GqAFFW*zbq&(sri!^@>1z?<6Y zAkvot+3*h8Ncrc-5wnDDku&Hw)-7)o3?hUH~^or#Tln8!8vf(%q+A1 zH}UKN5FF`K{R>}G*7uq-BiT|8QJlZWr##U_f&Pns)(pPYgj^L;zK5nfSwjEayq~MC zZ?)h4`1`25PI-mP8uMy?$f4HvCeTGc-*qBr8fGxGSAfC;Ihf|#-)4o> zbeQC&2FZ@&`bPt;f)o2ppK!%Q^nFoU*IrH##lSN1L&JxAF`XmuQnLQNZ7u9B%g=*h zx*~D1t$b}#*WY!@x-@W60xymxL;_ghwnrgS0^oxk$yH!+1&vlwkYW89OHmp#MGJYkq=C9 zWGXadg|E1&ncdQMUQkH>PSxu`B*8>#!R|(Gq)v&^DrKhC5^ba4H@fWHuG^vs2kQ@j z)pW>pg@9Wg>)PCw2WU6B3~cm%5j(c!=h{hs8FIf=u*G-_dXFz;TJiue6QC^|fjg2t`-<+9 z@%Qu72b9b^Yf>|*GtUISXL7?T@N05V_7LEK!@L>NR z#eZa18LtYuxVIXN4}5HUJ%}# z=?3+iZOeX8i+Z8&mO}(Z^L9jKS=XrkN6x=-3v~92Ge=W%(yEs-`5&(GRbPiV6n;IB z^IG1h{xbF zCL(pqZmtJLa51eMFmRb?Ms^=0MR8IE-5Du}v=Gu*RSr5XfS$d^WZ?Imt*a2B2Y{dY z8F^*D3D<`=9O)^0(DvsjETNZ#-w_K`4**XsgDv z9ss^W==#dvef`P$r@TOI4WaAWY`YIAM-oQ7eFE^BV7?`4PV^l- zL_I5T%LQ}0dj!gQ+s@mriX58ks* z@=RaJx#F=eK^wMgohA9Qm9OMRd@9Ww6|EA;WvKuRp!uIu<2^gA7U%;kwM#sPl)A}U zCTI24rmW6?256nY1dN+O;n0*)7mH>gkzRoahTi`n2xdoD$p5Vp#jT(NmPHEuk|OKPJIsN375$SVJ* zfRETn(-NTD2Raf|AC#(x`5|aeM6_^ z#4uVvdF*t|wvp5Y!iL=rP+e zNV$%pS1Wpev2k)vsB!RqC;7h@{@lh@&5*w*mw6`&>(gl2Z5l~Q5uNm$kH7%*{byat zuCM6hVZ_^2kg21rVeTQWtAyo0v;xw;?gTHmL<)(m{{Zk322Dd62ZOeM9$#aD(+gD6 zeh?Vs`@LOs?8$X7?Y0mU$&O4hyQbQbn2n)c}{P!#F+nL0tin} zSGYCX0dB%4@q)DoDr*&?hn5(w1f>OibTrtGRU_?|%)(#*AWbJEjeX-QEr)J#pCTK! zj*u8lu%m7cA;DcqiZq|D&TQ!{mOBe$dBN^{U8qj!?AN7Vvqj&`B^7da8W?7Ce!8Sf zqw|_I+2{;NN=jawU{aDx2i#R^qG>XEz+C;^bA=4+YD>?hKZ`jac^Dp<`F+S2Z#^RfBt zk)0Cd@d-~jz$>L%#?9hmJpbI0r9(`d$Ewa)Ll~q`6?RX&q<)Fo$5aX<`2B#7T>Zd?daEA%iQI@ww}>Q+->QtLQx)xrb1`1zJ=_rH~ZHC+_( zs{q_d4;&Hlc4e2g%gLAJ`Vdw2geT-aUg~A;up)I-?lrDu*|?A~BaKR^P&Mrk5n!-m z+aCgxoa?Y+O5nTR$W1wzUbb~=^%+2T6r&rs3x(PvH4^_!G@3O?vqWj7*e~V3o&-s0 zM+sSkr+C4K!G)FwG@akgvnsi+5${xbhIpIYSrLPhx0ko28c5|SE&fJM4?8V|6NWdj z?|4~@0!bJP=fhQlWNthTJXI`Q#d=?^G^0Hkm!zRy%w~C!8%1{|MXzPW^m2J}xo99L zLLp_h$cqQS$gdwjR6{dp%KiqGNMf^+KNa%NoG6gp-Bg#SkIPXCQAYS!y~z%pS3Go1 z(tEKLSh>pZ0QlhY6;vYFUf!7$5Y>DXth6U^1@m=@PG&mb{B3)#A8{hE^Ut%;G#Av@;45#0QeBNWX4Ixdp`+<*8!?RuX$?Tc`YaHX)Eg_Xg z2GVM|~4VwS8&|P&>xHLbrUtpaa;b%h$ z=rG^Gihs!($^8sE7U)3?hIFxIdIc&;$87!s# zqDA`IOI=&cJL!$DRjAh%&Q*16-X!jIVzYaWIc^0Z^g6%=N1Y=SID#|oZ>EPq`vuW! zqw|=eU|Gb6%EzRUt&rJ{!vnx!CMh7pW(K@q_#0C&^l1d)Jk6jBvJ%OvbF&@0KJ1d= z{`|eXs-EP24%t~t{97ZrDkQP3Un>#39`>R|q7#Wd^b;L0U;3>@3f1pguXkbQOS4=} zwE~}2cN2dAcu)gTv#)=?gf$CtU111~**}34Vz{KLB*5H!xj)kL(^xGn4L3AS!6~ z0kFVks=jSn=XNGGX*Dr#Bbmo0{QwZ3PXgk#iKSgux)owNe;8UiwWVY6o)r#Be*kbI zrZ%JmDG-ZcTd-+P{$|Yh86z@2rL)c@q}2YWKlEy+BRG5=^eq=hSX&8=JOWpkg|F{^ z``5qn{7?K!x7)6I2E=e=E6yLqbXT{F<7?h|aHZe|KlJLkC-7Ts-5i`O|BRmB=iL7z z&X;QpE}1%tgGcC94cNIn>-9Q=A&T+1{Rr=&<7;t7F#D=@D9`TzO*ZTW)mXlzp=!`X z#${cY)cX-0bT3tpo_L{C%ya$CJONM`7E%)2D+R+ zb-NIyd$-Q-NNf*j^eS9V}2+Kvya6c2f#|0!c_$apP_QQVSpCc*YtPlMPAFIan3z`Oi%=#us@-3S_XT?W;~U785{jv z28ZZY)(LTpaQOxV=MHvuu_Pl^?!f0OwC7oAIhy%sEvR3Rhadqq^F@}7#@XcoplYJG8>;X13+s)f%;ZMx zuLnSH1{3D#Wj)w8;xTLB%~PMb4m^~n1P%8L;sLQ7NEU1Xwf@HG(Ct1|`cm3R|O5(~xQ+n@8rFYGkYe*5w28e^b3`G|My;tCUy zq$Z@|v%2dvxFXw|emC`0sxBPJiz#HjKMD?>YTJE)e*T`~c^U@zSj z_;hK^?Cyrtm{ul3_Y14JYbwCy@pE1DmFmQnu4-b_!@2eQ zBVU#CJ>j~gzm`*#zKEE2hIo!@Glin&#|)oCj5gP2Tv!^yMvYC1@8gmM&*{eMELYJQ z@7_Q$c|_CcSY-vWy?sZ@^`TauBO|uV6+hF3;y1ka4isOqdCpav)kr$#3SN?djJoT= zJWQOwtnaV4Keb+R)LY;n63B<#7*&>cT$>CfTK?@J;>Go&v2T12_8Ox^ghNIO`E1A^ z0HXMMn%6@56yc$tf!0ajHC811p)b{jt`7v^G_X6JNBPM+9dugi)AH@ga!u}a)?`#f zcfU@Egw&W~Lso>+a35piZM9PxD*3oK>(PG zb5G!9*=$eZs1VEyYlWgw{2R2GYJrBo&--A3&~=qhQa^HZ$sF@ z^YRsY?t@5&PV0E^?063<{n0rZRKZDnq3VI-|Zk9uWJ<^8hgE zcH3h2H}K~CJ5o~_C)*Y=b5SyjHHmen&x;nHOgxe34M(LZqCGXyjDJc{^H2{=_VHs- z#b0;QB>Yo_I??e=<3hs$TdA!-iLFG{1 zZ`9vtA2RxGPSKe>0LUZokKnwljhXI{t)e>EU1oU;dSkpF=Y&T6fb(hn%OvU)6$9{L z;ct>HRoO3xIu8Kj(m&B&3fy2Rr5T7}5W_fJ;fzOp^OWpE0pV3Ry6Jrx^3t%*C$X0j z@S`Df9)DW36aE;JAgoH-myXYejw#B({0=e>?9*#nnTIv_*MiA3ZWE_gyTxtMjoAk# z_l+X_c4)s3$H+0M8=IBZ;g74Wz7-8E^(%<1$4VIS-HjIgRkwIS`)8=#`Uq(@hUt9O zcT~S95Zr2j-IL0$l2TVcf5SWY7pTUeQ=egW?XLI$ILUR}{#e)%z8O_5IZ`z8_&Rb7 z)4(Rf%%awBA*Z1EA#w8uK+wAO13(IPM6)V3|PC-@!0q3j zaj~Zy^e6!{X{ob`-*MnkUPbVK@OQqx0tsAi)gm%tiHx62lLA$+a)3wCTdy7f7Z{=v z8t`>CezvCxLVaoV2E}1lNeiVLqeyygBHxi(UI0fk;I^EEY=8?FdI)eOnxGm$2LLLY zPD3;A9{^p1I{P*k&6m+MJh86t5fU-I?;ijY?_?nl0BMR`%CBwyejvYF%}^8~9*7Qc zT02*YG@XZDn9Vi9At|uSz@b;vl-(20mcaunR|F);tE{bxmuA7OpX}x=%*;4Q&V`P? zXdWM*_3@Ckna@0Q)}zU&VGM-Nc4w~pp@ zX**r}o6tWpl`u~H9w&3AmYJyQ5T^f%7ID72&<(Y1u<)|v+v@BC1Wu8Z}6=6@wk$+|9 zQ<38(BiEXG|3r?99FX+C9;D@^L;{rA^1c!%8_B$<%I2j$`(E(?Fr7e_Z7s!LqOm*b z&&?;kHMqgA6Yuqh8fA8ncz(~oNBm~1C}Lgo8H)0N8?+%u`T&51qooU{xJ~D8#}uQ> z$-m|VBlj_6@B`QE`V1ovYu8(3c1e_tW~dSDx(LtKi!2$;E}a<2g4Uo&Jbj4HT;+$z z=ZnHIzka5;{=K;_3-c-U!ns}OyvEc?8;bmB-RK#=;6y{Wc3*7BjRk0S)`hs>0pPC_ z2X3W->>Ir;I);tB!sw{G+zbhZyAG}}Uy5N@lKMEYFNH7o_+E}yp|8rp6^3%X3*hUO z&bSiNAa@h(L@hgO#tb>z8RseR=3jvm0nI4;XIa(#opKZ~4huXT>w8F{!n+eQ4k~hh za51uUFgjvjKAk3b>1`QP64K#gOo6V=%a`ZHS?+*=LIaC#q$4{eZzMOKwaLt!!dM@( zRfxmLtx?7@%~aeU$kkhPQdRHwELim4fvXZ0sRZ^2EAuM4F`k#oxBd2naP3%JXc$9s z_wcManKo&#<_Y#U982K-1g0gV-e>%h$7d`te!g&uIb68UXhS)SxN}AD&1$!>> z_V%8H-Z%YKQlc>Bs|JStf9#1^!G{^Kg(L&RoywJ^9&1L8jAz*!W!&;X^}XLTznhg5 zCRyy^@4#si%*DH}ax7!XwF`jf+^v{a=7KEg4wC9)&Zqf!;xE58oQNwnN>bem`a?mK zb3bxj_QDrI8vuTc)}_tAExU%Xo<9HeMhVI#*%V1k!tt3QO$wf1 zLK6OQ!h#b^mx_Dfd0Dm7q6e>W8qM4TV87k~2vsgH_{yh6OaWC&({<;73kB*O2z=x_#m2;tF_vTq=r_w@mMTA5HVu zl2V&h?&=-ICr4esEvvZ(AF3zi`AsH)d(7QP$lctaxVp6gOOKKxicgGu_i(yFmrUT3 z{Q*T7h+vZlheVoFm~gNVD?i@g)xl@Id82y7^C{j&UCb1WAvG9e@Lc`Ty4bfQKS71R==BiBpwYlSpou9;w!ZaF zh}bZ_8)BU#-f1eU`qBIRO?`Rv*#emrC?wPnOxhp<)eV_4xaXAx(nX1Y2lW|9qZPFr zWUW2#RlpoOs$;FKOtwjSi&^DR((7i>oPr$*s*VRaX19D-Wzrg_czYBrMis9OjG#t2 z2?LMX0v5c91DP5J(BB+~CDRHlSFbVlinSD6?>r}a0qoB(>ja)O6spkMM7b7vttq8} znKbxP+^#DxzzgzR$U|u+!k<$4;;>b3w@HH8U(-UZKVOrEZfx zWV!vhAzL=dftIe2a}k7kume$ZuUiShkM33jeEpn^L)Wa8BBvqi(C8D;f@y&jsw@vA zM`7A`-MISWW(uA7w)iUZmexoWqSb`<6G3{_r zP77S<=XXK$;zQiqk|&h+|C!i**E8Mwv#lj-mB&!_ZJ^8(vz^};{tBmdkx#x{?Ncm6 zKbCUHwto95SL6md{xEPwamwpsEQRT+x&@0Q)n~EHtf5rGK3?IRUM|-v4eM?`z zCWp~7?sDf~puI&JXkl67fg{qC?mTN;yzI%k_KIaJXxH@Qk(k!3zap4tbq$H#Y`1nM zt`tP)({cJLu!mm4dxaFzr89D7rv=T-lK(@t;I+^Bw>W>XGS18s)2y~q4F!gd(NB!o zUImqckCOmkekV1OP(N&NWlMW18n+A?=Ep^jt`Ta$h!1i8RM&2&b}#XNpFZhcQ1=BN z7GP8uN8>xr9^uY|dKHH22;i>qtd>t?)iJ0|B22PR@yiI|CtP8@^I+7Uujph4T^$P@ zlAJy+ip)g++F8b5Qu1th1vzhp*pMHgFoaLS6ruYEYfLHI*3CItNA7S3Qx! z%}5++@!}+RXRx)z2T7ec`fg5E>bXwo2&(FFdCS($nMW#av_>fZcr60(v!mEW^W1e3 zN0m$z?F%dU3X*J(2-Z?WgFH=$YVaXdW_Ly_p8o%Ozo zVe(B7n`9?Uy;Rlj7vs9oI!iX`#pn32!^L(}IC@64!M|)Nt>T}0J99ZX0eL|~Hk8*v zf~mwZa^v+~%{rS6U&Yq+=3RcHWoNc%B}DH0MdHRwuS}J;*meEW;`P)6j83c@>h%Gp zrexY*NUmuVN3JOcV9N`Z1N`T~|JI&Vp9IjN`41tWIUfM=>Us}=Dnn3mK$aKpz#iuz zkOI@;VP)Wja_De}MjKERt{(teE=2Q8ZkwHwOa(b>fi|+fVW?&liU9#B^v_NiDd4#p zMm06l0yC)OLhjOdCKocb|0y_2G8J<6X;wb~AW;th_z~!e)adDS@&4xR?EpFxe3aLa zGC1Tq;#0Iyz?eur#LF}Q^?LjONI6D^iIPlArvpVq{@dTn*q`BMciA>^AU~YI&T^-T zdIe_Zo2&&962G!%$P!TZdsi5aYEJh^0FEU9CQLg;$8?_noBtPEa2WU&nVk^O7J@N zDnD_~zGf?Nwm!@!bUCHZ{oL%R8W?2k20vhrOt&Fl_)W5LPgdOn@Y~4b*dnP=)OT(Y zUw2@pExs0F0@5R(%B7Uw;d6iWzYP@X4B#TB4}_{`KXh;?rS6&)zLD95jq`5qt8Kh$ z=HO*d*xJASdrXa%hed@w`Z1@L8PF5DEv&Gv6v9x$dc^ktXaXhkt6z_3{n8{ zrYw_i^W>m)kO5gG65Y;{f3|0#%3ms)(+wV($&4}M+(z}k9eaMxq>-KHxa}nIdfb@M z_k{p?7#0s9IqP9$eVn7$fbe3J7{=RM#|gfRrpf#OJb#0!sg+L1(pAp6sS0(;lWnfh+BsG18;l08q-7KvyPSk|Qo=69!|C{B5T#Z2Tz)V{8iU+#0yb=*( zPZ&OC)O@&jI^x-^eyi)~IdmuvL*qfVO`4}G;v1zaa6CteweQ`vjK_SPylL9E5~T<5 zY&7EsECHoB#Rikt+-v%;VKEG*@m1S~C0FUxc0e7K}og1y&v!3c=&$f+(;Nr2fN;zv2X4x2`77!eq@CVH;hBHv*ST0^NlPaP6r3&UfS^9I#=w){DIwW-Q zMY;|3oCW2rHTL?CKd?z+jNIepwMsR4^c5iqv3d0_;t@DGV5KBL<=nJHKrmx&R1v(L z+`*`M)irUbdxX@ll6if1oh2($(&B}BBB)LlQGT*Ee1jyKBhWNDm?&jx3}J{^k5NEy z;Hj8W<^*UjxO{vLm+K7g%*c@htpaChu8#?Og{V{(QKu+jq@#@Wh*J`Zi5A2IS8@r-& z5M?Q&Q9ZKtS?Z6w;~t;ha1!N?8I7SSeQWZpuz8}o0zp_4ShTo`%f|Y{dQr`5%Z_3T zN+qQF&g2Xo^|@$O#uI;$rKcD1J)jmu+;HFrdN9)!QQ*yUnc#(^dAtaes^4v|RO|Xq3uu21a#5-M zFYjo&p+i)pzUBuD$aSE zDc?-tC6j>=p|kH<$sqle=gnKDKcU9(@p!u`2krgF$@1zCP2C5uI-EbDAy0kudtUzd z(ixF*q5@6-1)z58&%Unwx~h8bYx(DYs70#jRkB-P@dq{Md< zNeIa9V&*wd&x=)tS!dRkbBgatDU*Y_uD)E&!EYojP*W!0C~xrZ{U07)nIkgM=ST|H z0il72FAD&>BeKsB1+(*_Qk|A1)q1jMPYFpbeVH@wZ&$7UW47~bT630-*$M=&7xR

6oO7bw z+e9+jskMvH)nlxrE0iDSe64+%&|~qC5eyS?e!7~zz8&5^OTUP z(NLvbp0aH5fjW5s*!R*%vfwtGzu6ipGxMA;eG0N80P}A+gfVXES zi3Mx>?sA{GDs$^i-YgLH{krNt01=#zy_2hUi+bju?wDWzb{KjRBGqgN2w_V0> zSGCe1A&O_=eTL$?8)<@&qvGQfgMzrB-4lyOS}VFs)4OFT4)M zzCm^#j|N_52ZgksjL1xXnb)hCd2^gDPmvp@1L-9=uL2?k=PegpuU%gKt#aYE7NA-O zwf%r_bsq>M#!pDF&lF@%t*-R$OwSol9)x$(;TnIViG}T2q1>&zC0o5Vnjw#TjQcuh z&qv9?HYTXB*Y>PUc-Z15F@IKmev#n(uIk;}@&sd>H+cF#!pE{p9@t_Yti0^*r0{ev zUHa=Z!?IE^UUR={V93P!Rf(4Lrx%58d%cz8)Fi{;wvWKlUS6;UpM74W`TrJY{oi(LB zfc!@xsEu6kQ4~^Fi@Ht4STk;@B)yr%*L*9noT}YrF%$~h8*#Cg*Rjj@EFz3!!)#Qz1sWIPsQk2CUiQbNEI#> zT5%#RH0XbVXL@Fg`AzqVYhsg`ux>C}f0zm8h|lkz7A+bKKdJ*S2%~jbQgWoW70`QV zodpwioXWRlFNQ91`hzr)rR+hPlUKPX%rDwDFk^d+iY>|OA83R|aEc&C-F!Y`2H)x& zuCy$32t~7`bY2gElZ~r%oqel3%B@iRn`kU#jCy1w13KLuvtAG_U#^YK&&g@{O~o}* z1z_|3llyAJn4M$jttJK>q2piKUT3-mCvl-K$+U|y?O{>&b%*sjdmNq=`qkeR+-RcT zI7)`V6bIa&A zDYoG=UTxz=g(X3iyWJn@)ashZvPr>@&-t7)6B`SaO6^JOKMd!`Iy6^AZ2M=u$_&Ve ziS{PoH3%5lb6oL2lP4zz)*OM7Jt?A-vtk%eVtF6%i51KmU1jaHoW=6N{cmLjKmnh@xp89&OCHOX7M>fTn=)784cdts8uc1 zr9s==)3|974xw%POU|ui>L!n`U&^NJsAR2vAQJy|ulB?lH3gk$FgjD{*laoXeCpZVFvT!1%)OfK@)YP1Kx_*7yhtf1~ zzVuuo0@}-J{tc4GpMg$S{b?HO8)~dV@<~p;y=34}Wd!R^7dG#7RwZ z@r{WUbo^>=JBV#QK%w|@`1-tbc2xPGl)dt?< z(+*<%4vy-0^=ck7j0( zpKa(#Ki0B(btPEl&U*t$>0o8rF(1%esgMq6lJ|E1n-(IB^9O5S-)QYsCJIMEN#N$w zWow|c)%}a&iYv1yZ?Cg*zxo#`OO%%7=arPSpK20kR`WQs)%5g{@r8@iJDoki=c}lE zt+V+?rpyZQ7cxvXAtgPPesOQ<;k=dadi=H=)A_jPs2LGc@#K(9~z%0*fmb-2T$xNZ5U^frDbb`Rr zFBiS$XNuZGlIc`MNb0j0DRTJLRI&o(=q8;uX=1bsP79E5&%TmBIq81=i(lNJ`ECXu z6=(!LY7iM<#y6Z1SpB_;$t5Fkb!xZh$vCl(OGM}=dO<>cojNnd5^rF%4}i7?WgaQv z1_|M1a32d5=0`d{4}dAptu>rf*FtwF9A5$1=Xb;jA?ye3_mUvPP4&-gR~0?qg-o?n z{n4;V3ZC`OWB(iK&-6E3(-f}|a@SrmplW2cb$yy&Tm_tIW3PnuvCNq%XE}e*XD;%x zCagc{M*KZ_=bcB|{##2$X1?l`{So)8KkQkSR#X;o21L=3=a8oX7IP<_2)@e9w}SgY zhO!zIe9zx9r-M^Hr=C8U>2wsd=x{|Q&d>xc!46_Q&~aMYEk8De1M8VI>7 zv;bY*vtCe$gr#~lQr!qnf;Qs9(VaGEY1moF$vLhJDg~{(t8=Hhq`vV^l;N;Xb0+VR z@hKeR(bQJBjy$#7kGBn!f!t%h`M)n3v!||6L`~jz|G#f4x zH|KjbmprCO{XXjLsWzkSo8S*=me1W+E6c5ocIkq1oQt)2NlzRbeuMK6dU22!N9ulM z9?-ok3ATWeWasMNHfO>d?;c`MM}Oce1L06{&0v2kxq+gZ(2@> zm;JVRl?ShNsWD94qT4TTda6$c_MW-ADK5|(q8d=!~3c45s3m6%zaB0Z(mtxinWy{=W%mRM8N9!sf*4)|sJHb2p*7vGxz*ZxFtooTV z@dmh7ld<#_VM>tbIG>hL)hS7(!&LEer6X0b$)Kyy>w-pFOg5@_&tQn8sf_RyGX*d* z^(hRz&{fu9s{HRl4NnLO%A{PpXKc+`n*nGIw{c52I>eRV^~*2YhC>+e6tsS%tMy{U zo?_ns2}kGA$eafNF-OrTP9L3=vrY_lr%{IWkdeKKh~KY0;i5pcW?8IHS?sq@Fx=vw zkJ0y`_cS%tzJSf?gnhpJkC;;R8ZG}_Np0wTBpJF3t&_@}ibGZQf)85Pt8hyml(du( z_7%;b%M+pb$7>>m?^M@XbLaTB;yHg!MwsPOh9UHu=>s}ko!7cJ(FD;92*G=CRPLej zTdZVtI*!H^TuNmU?Cp}uyo1PtpCpC+Hm?`me2njD8n*#$c7+Pe;JrE^+;(kn)}(f< zWU=Pwf7~_PP0@{t_f4^&TZN0_0^Eeh<81yvo;apSoyhTj*)E-{O~_}=&muNE&jlRw zf_rNh(9Jv-WuC?=+Ne|qjJ2qLuRB)M?pd=VFNOn&HCGWDJ z$SQ%XC}5Pj#QXVp%mL8=CCW>E_dPa27NuaXX|gNZj~po)+h1Q)*jzo|GBvyWy5|u9 zdkNuXUQuXKNtDSNjCmFvc+Df#C86%LK}S|^(K;eFUj!?U49|9>og2WUun1?t!Wm6& z#)hA{LoL6O+Zwp>&-3r?SaU2nCy~yFF8>a<WeWVe^@`%GB5M^&H(PywOZhx0lD}Ca=U>9R1RtC~ z8K>NBl%55De;luWr`AvYO=LH@+29T$4p2_cZV;882B^{#8PTfKj9hRr!5%|t~)yr3~;g_S9H&#AuebDc?#`$m-J6YZKvUb={J-xIN*hN*ea zgcb*5shPK@k(%{T;%kZgcLBdZ@Bd{~oh4kngkl?azYOK7uVs!9t2Bte58v`u&L(1U?NuwD(TlgI# zt*Yq>BwK!ysp!TzE+GER%T}Vjxk~x`RYW%Z+4X#!t?;2(*d>M3QV0po8BEMkus!Q! zsrC4eva7D>ah{$i#t$W|>v~}|ZwNt4I>nx0#)+ugvQ5M-5Kf>EyK|COf4ZmP;66T6 zE*B9$i|x^S1vJWq-2qi*+H^l}ye(*UI{fZsei0D*3GkKgh+~(nr8Z&7tXET2Thr+y z@ynhv2*j`E28vl<6PdVy529>)7dt|NjuzT~(Lp(NbiLou|3k={tPo#^`*jMP2m0b$%EJ5JVRMiD-^8@=WBRf(E4+?cY<(i15A`u4;gK1 z_$;W2S$HB9Laoq@5Apjacp62snwhUqK*y$9{62&{zUR?qa5Ghje6!$-B=Knj~0B|X^qrLlcK#{tEN0E{PDn>ff@IKS0T;D(5F5F-I8og z&1XomG2HUs>6}Lc_Kg`57-?D5=}K~chiXP$3J-L=SjXPLKmW;NOPU6zQ6UXg@0mpT#7^YKG_1b*ApVf37K~mw}#N zRoqPZq7b((C872nZB5eu_05#wXP;2@A2{hO=4N(7WUt(RUhSVwXS!E)o30hMO}M$vV84FPKU$HZMdGSBcy%1BZ6!v) z@zTH8wKs;))nLnhXy9dVH!ygk2A6z3^~v`N*$k`rD^`$V!~23FNz!jCP98)uU$;3)|AE>r!Ss_yDGpBx z)8c!&$8>(X7Ztb-g#JR4pF0%i>G^(#4Q-waW|&8klrD3})13_VRCGn^(1!dUY`t|@ zR8jjiJV=Kir64f~C@qbIFobk>OLun(10pEh-QC?GDc#*6-AD{MjNkEj-uL}}U;M#! z!5J1Y^!l~OSJ`+6gkYRw zLE#X)G|yAAH@rYN>;U=3g3F->t_@w6$yU-IA8d?$=$QxMOvC?!LaUS z=s?>7wVmKu{b63#P%3vEC|W)F84|3!Ow=p5g{9_8&-^j9KcB>^$o@vu;MBYM1L>j~ z@6dhuv7tZvnfBxp4t!Ymd-}D+pobleu=cPDT~s>TXh?ELG7aD~Y)L+`oDwG%1yF7> z>YIXmE~J>Ptb1ZNg9jLN*MMmwY6CLgQGB5Y4{fb_^6tE>N@N{4H#pV@G*ef<&YY3I zAa*e8TpSWW1iBi)p6C!?87ICb3DN$5R&NYv1GR_96bd*l{$1Y>c;ykY1wP^Sg86}u zTh=7E5N(uUv|lv*l3txRus#5`IRLoQh6QRbs~L380;ua47)PnFV@awyFrrx83tP@* zchO-b{xmg%->)l~79ok~0Q_1aI%&gXB(sS%d)vV6s+s^3qedb7mwb;Zj-~-t)sCc| zpIC+DbTkk)V&$3rk9ZE_jb=YyWaDjthwD<1CaQ=-N))6nn*}*_m`Hf?P14mJ7(%~_ z^jR@`Hg=JF&xo3vhK2F)aew|TxfJgc`jgstD%hpFbcXJMe}7ixHGRIPieJNP$>cO4 zm>tpm7u@<;gSDn@8CI3dKD+0fdAjM)=hBRX@6mPqzCpHR?GU@NPM5>%LxrMCFQi`$ zqyN-wCShrsg)+8eu`wW1=O%J+#*9l|<`oaed?ic`RzTIFI^A?HHWSOtOc1LPeeV`L z0SCN?^M$xG+LMLc!GLy1bDz3EgYSB(0DZM~kICwB4j)z!5>r%uEO=0nDH=YM$vWzL zLx$=^6Vq>513pCmIE&D_(n5|=S?tO4>X+i0lr=%`GufYk9`D+SzerS`L7y)1Ecd&W zqd-hE>2OiS9f~5rVt9Z7ry@nirewS&aqBK&TAthR(erA}hLK5Wv z4M*84Fn8UgUJ)SUm?2bslKkKkpWiD(&>c%-#w8Go(edr8dsQ?4SLhVhYw>)I;oDIx zUe{Y0%!a(klj;-O#*y8-sA|aByrqv2Z>Poilqjq$or>KpRRl~N_F4xw0_rW;r;Z_~ zsYMEq$qt7)(v-9Y1>z2rcITL(J+d4 z!d@sI89VGMHL;w~N4HxoU|1#zJq~V@}e+f)1RSv`WFB+f%^bm~(?tx{l8!2ZmyY(%`-a8r>X^r5ji)N^2h42`s?+h!(owyEK;4_W*< zXYaD%d~V|mi$zm)u;=;Nuc3I_m+T^h-stxe+3J+Zhj6`I2fWon5mW^XA%MVQztAZ7 ztw`EGKmK`fB`0<**?%=X+6O#Jxjj=b16Q@#qM-b3Ld|D_z1O9?7tZlDKT0e!kA^CA z4~jY2^ijmZS3tu;?`xHAYtavTZdtd8)e~fpc`aPp+78BtK~5mAGIPN_{OJ-!)KTk( zctIvlxh6cZFf*fIbyX&KohMQ53=89Q!u#QscrAV!%prANW4Oc?-sLiQdnU*<^yRtT z-m#ym)y=IG+772wv3kJfS9$L%{c`u`VNMS3u70Ai;WhF?f(RXqu=7`P7SxUO#Icf0 zVx=K#PE=1j`yO!g23rjm50lVI`rOdiKqBK%29WYUPb*+2d<0b7=p6{H~0@}4}@B^`Y z8DLf+R00Z<737*wb=Agslp|y_QV9b$z+bSIQ%V0wBaycJim^FlZz(Iy7{+@XDGaxo zhg{+|0RM7^f6``NiREjvM}57|n8K;6z3kdc=t*Dk#0F5Vz@gQ%WKpK+jX zs~r9X8F}W$F3SIgf+4&uH`u0+AyX*GO(l zYmQ%iAb1{4{U#$%we4-qUS#I(t1F>|f{)cJ+@%~iLsX$upv_adC|Hp^@?dAk>dyVYoYHWT+(ma9f+#K;B*v5FuJ zL(BZ>CRNp?4#iC37Oe5^@|<)NbMD^(U_JoV$?te8fo0s75bQOA)W1V|3q+paZCcJg zBeRpyz7F~JsLrxhWD(`Gx`Ig_6Ml(KZk$E#F+EWv|1#r-chiGFVmezuH_$&jvTHl4 zLimgLo;8nhzmb&LPaE@fX$wkaH>R%c!9yizkBE;@h?NIM;6$pM;`_H24@ES^XTA|H zNu32>@BTtN+8LbEPq|uQ2l5Z%dTV8;d6f?PC$qPqcJ%2f8`A zHzDQ9exp-jA3P>Q+D%ew&twn3WJtX5{CZ}-!F6ogFIa1@289)kQt;~gn}ow|a(gT^ zwOR5HH3@WODe*npe^g61&>C+^(BidwyjSe-J!WyWvpJcp+mN}XqW4{}MPCKQ1Ox_y zuJh2sp`R9`VqJGmgsd9Bd9r)C@e@XUM8PFF$R_?O6j za+hy+JQ`^?2~EzC-#6stZ%p`Wie}6n1sVb&vG{xaWT^4ageTw_2sba9D38?A4;=Js z8CUYz-XDCT%P={zS-1)keB!QTG;$AK$zb^X;Z{uUsoBkBE? zGA<=l`s*^w%?5vT_LUcd*pyOd>qVXO^C zuKHkqj*sepLDYpn?fzT~WxTfl>bE}Kd`IDhCOkCtf8pTsf%Bb*1zX6PLDhfsaLExE z-9Nmm2fcX#h~&b=(BFS1Li51aW2MOkce!~G*ncnQ{5f>lTKxXR>9G6R%^~1RCueKa zz%4?uWay>M2kEFM8nx?3;C@p!x>XeBhss&?M_I5N$hD5f^pZCsyw8o+^5|XEa{X)m zpuD{lx>vSYhrqbK9|ZBWfoC08oPP;2&HWy-d+`?(JG-t$>n~U~UUe}tdtUS>7hP=6 z9=wi75Y7$mLZ>6HpQ6YvKG>Kpm1;5cmEs<^4s|XBf!xdJi%!0|Ze{SBSqpczP*FPd zsk}{@mP7=5A_&#?4X#}uLTB98tdHX+LyH`f8^q2Sh}0XSTrDCKUJnF)@qXQ8niA3Z zT9KB`C=!#KJ`W;ha2Kl!ZBa;pcz0GP=7<68rmXTo@{_f6l5M)TI`zxJqWWxMbkJ+p z&}TcD+3JC~yPum6!U*3ebn^UDTwakE+e1KalLK z>XwNS<_&s&CsEcVINBE|0D78e{Wtp4kos2r@YBZ6TR+jCxxjdA9LH}WHA_F8CdWMM z?e((aO<($E3M_^zJYwT1$8q1oWTZ&OvC?gBDV4=NxE{MuBJ^-aBbC4u`)&^Ws`A}* z)RQ2KjTp(5uSj9%_bX@)%i~`Vt%g=q>N~(154)}ycDKpCgSXv@n>?}W6)Ov14ySQhOz@EuK|;Ry5os}87R{HnA~8J7CnY6>Q8s7*$Z?v;qy#)% zaK^6k5g5G)U58xDt^#ix&`ZfoN0Q5Boy~a?zSR5{b?*~f!s-_1tBfg9k7L43{L9F0 z7jiw;8B-SILNZ`9Chm%N<2H01FoEE=ez`07eVIYm%;*3TUAQ{X;y-{^rSnN>xaAr= zotusm$v1>>T_!N|@vM0P#ClEFLj^3dQY%)n7kT$-XHwbQcRaGu52^u^MdCozz2QG_ zOw`H@xuC7#{R>KRMX2ZOJZe^ZRk=_zL9g2)1?Voh*_&?4qI(fZP6MWZRg}#~6+hbU zsWbsr^}-3r1ev*Ckg&U2E?ryR5gULv0+jI&mQ!yh9Y>udPGwd2|d^#IGs{2x?CS&RUcZIaC_mhYQP^ze64E@0KUd4x9V zv_x$%Q|tS0JK-iZ+lShJ@|d$DpD;>eqaPwx!+OvXZSB#qn=|uDc3iKyrw$+7qNnRu zs>=yPU%C(JHJYM>ZwDb6Y954w4!@rYlir1vdD!7h6E-J;(ib%CjNJs*O50edKW^vT z5D(w6N;L#+hnmOnfr@RN^@_Hqf6CCulFai?GUEDcL~3~B+eE2jl^n{|0821@VZln*7=~m<1O5+5@|G`S+g>9M$ zT~C?VAnP(@DorK5h1a@crrNIia4UkkRvZGJ|SS@fLwikYL|~XD#zP zR(2`^8s>?Izm1w5{i`=lV}Cwf=#0wJ5Haea;K|K9tZ*&9_Imq5Ym(uOVCrteCVETd&R<^9A$oM;#B;?G@ zjAY;oAuwQs#*MWmW7rrsD$p@+wW8waF zUAaBjI9^d~$00;gSJ8q4o%bjWdM>;bScwR$ZaPuu;-{^6xO zfsSeOnD3vEmVm+#4dGuPs{bbetAcVv>5+we*+!dI>`5uFtm}eEKOvJz?yWvF$g!|{ z5#{(tl7u_;ql-EXL4-kY>p7wTOTeD@KeT0_ulyempFhxi8WII3Mo*$aLjQtx)aIba zz%3KL{4B=X7=l$5e*vUrPPK46{O{OBeY`g-_eF+~w0)sd1DcdjR z%@e<$>Qp`XMm@buXgt<#)%go5u2nxcr;-mE z#prMZ}3-a1ZL@5x3On_ zs=DulYYK3GvI~q!pKTK|(8G$5Hk2mWpRQnxh@Z^2Xf>wmQ%m8r4_wtqdvD?=XiXO) zUrO#0`r?7~&-;x5u9|FytO~i1#uyrEo{5}8nLfRVKX+@nI&n|Ksc!crA9*@2Xn;=I zy{J_WBz$C5kv00`q(j_QRJd#6y0~EI2W39K#wIE3P-%#eFra$^TEG-<#zxi*-sgJ% zrwD}mwa~SDAATi;PBMW`^rkkb_4EQK<@m-Fa(AUsqCf1SJr8h{g!g8yK6YuqdsM}#iE?%NXIN;<eNwHmmC zes}0v=wvsWL+mwBs!f(NKNYiT}cr=!+iA?TnH9au2 zA3pht#nNeCWM<+?py-k~m%5mX3wS$mGa-j-kOWKHD!r@L`0TK!_uB_qgL7H$5>Wtj z)C>q%P#TS2P#r_nOW)GOB9@AHV$*i1*_`os3UO~ z%nOg6?i;y<3$Tv{u+$>=+8uN*67!s#E0Y$Ullun;pW-1`8^sp2;N-i)FXCU(4&wg4 z{q8*mxa2h?&49SBROk=4^G2;c=9qHX*C`-%-jA6r2cEZ;-qS}K7k*LXyWv(I4wr2f zyLM$mN1og&q%Ek-Etsm9>t?9-5K~g`o6%)7XBXe9-}m)qeFaYg{aMlw z%lu3Yjk6B~-QOp(hFc*h2KqWY*~W~gOnr>%l_JqC+wq|(NRz_y zt%hk(?^#N-2l}d0B)_RU+E0_tChPk^)mh}jCZ@NmD*A7l65>Yg2c=Ev_QpZohmn~L zz$1_M#euTrcb9&uv~3MhcQ6>x*Un=e0ySG$#U?pr)yvN-8ZbT{YsadMW2#O*fMse@ zqhJ1iY(p$>vH>qqKAL?UqHqGyX$ZUprTV%EVL-xwodSx9wV=t9de)O)?_W@kDl9Mp zPl*oU3nWoZKBwhwe#E;+TL$nK5%9IRcIgc=;_b}_tPdDr(_jpV?3vdnd%LQcsAy}( zo^i}kF1KxtG%hKzjeKOI5{l+L&tc<$n_hLZNnl3sDnhOHG)mxVeBE@vQn+YYtb#0y zJ@%Sz*_1cc^f8%C8kCh3iLRs|b*z(Gv6>ib^(gyew^eJG9*LJ~(`B&{oqUcfOXX#v zCWk)b{cTIU?P|?04^UtcIV)WYoB@>RzTC5{TYeEpt4XbxdXh*lOMp4Ri-P02eW)y+ zyT>e7>DRY(#@gYxU|1==JFu)+6*F=9EV5@bpattO*O#Jjgw}}Z2#rNcca`&eD0iaKReAOkX0xYn z@#tmUIbL1)0jAkHE96eTiRuDJ3ZblmF61%maM}b-W^A*Soe8=Ubd(3T5_p@B*Y>BT zPgfpBvq)E{$j8HxohMT+=H{2e(Wla}n(MLzucFb5c#CR3HaT_dUgoI?pXpuPBV|o2 zf9q1KYZ4F8Aby4i)AJo?ALyjZ^8Ugm4ss%}Ti=t|FQY8XEU(Oo4n}WH57jXwQk5-U z<-n#NQXT>aiJ`(8dPJw>D)#9u2Si24THUuZt&q$^zb<~6y8cLC1Cb%p*dyFBC!xF2%K~5Ekjs$n*E0(GQZi;g1f)(Snpyy3msc<6U(-#F8olAeA$+&>eeZz}B)i zKYi~Sp|v)^yzr_xSuF6X7L^KVN(7-@p-rGUm#0LlOi{)phfbDU$)>2<&6Uxg;x0`< zG8)UKMj>5vGtR_(SoJ)J57orGNb5;8d zdkTHZV#u*bbVb!g1Kw}jV^PIw=@TVf23XQ@e32jtZ)VCzZ?1e3SveN*u~ECDXwV%B z#nDEe8npwX7%jg!_e-j9e#NuL1Sjyl@2Z+DqO4!wXBmU@#L{#bu#Nc-Sj@~i3*~Ve z%ig|f#C~0-t6v6RypIoB#hVOABz&eoTL;>5j%jI=pmB;P?61sAL`u-Ib&`weO5ow!_A@ZYpUlx8v5Q3EizVP0ch`iEo_+ zIXKnF)6#T{BYtI?jS$jigBCv;dK)@I`YXj>_r8`0#0VwA6J|6a<4BVf*aE*WIJ|wK zyL9Lgo757@$yjbg!sDD>hwFJ9dccw&YvUTLB}c$E6kcuIu|%#SZ}+@Ad<;u^x7l@- zv*Ej_;E{C^*e8XZ?rbRtP|CSQ+2NWKLEQgU%Gs!sG`0$J>VmJ?L6i z)xrOQ=D#3YU!xE`|Ii|JLk9`oUD*txMbPZuP=H>N(`{&HV*sfitHS0FqQi#NA1bG{A$ZZOtcj1-tL}e~`dOS(Q^rz-fj@v0Zp}w%m;1!H z!aBGSA;$I>B#At!)Px%Z;ta=ilGnSpcba^i$yU%WE@1d#ED+A}WJP4=X$$@Jbr6`# z=Fi*>h*6z1Px%e72}Elle?f(Af}@5qxct)x2-AV0!LP)i8L`V>`;iG;HbGoVpygq7 zQld|V=KL<{Zp8Q$ZF7xs#eE#KqGLx4dX}U0`mH-1o#qBl81p6QCQfQG!WTHne?eK| zkh_ZtwoV#fhR(1vcns5T1ISncFrOspx2ld3em>x!GwEk+jO(PI5o#IHIpURe+?khetfhIj8+atH=*NeqkwdYZhoD$f< zCbO@T9EQL-TyutL39W=;DEMWhUi+b8MvsDj4*X^}hHPYr^f$Ll=V&LAJR)5@x9^n_ z!}XPs>0bnF5R(Hm@~_hKb##P81;3+a0f%Sfp@$eEV*}uIxEbPaSasjOA-x7-m@E)) zRJ%G`!6NXI*upw0O%Wn!-NCT4tl@ed zb$w#!mNY;KeLjQ-8T5G%V16bBaCX)|%zbzIYxrpA8-{Gm0I|;7zR?YFM1ze|2B6nt zudg+MnOsg##igD5YO^a(zN~EfDs?n?n3=DOXP#Ez=+PMNn0j;%Y|BmGc-n}_`+d@! ztz62Rp)rafA^*4oRtP(Xm6IjLK0JkHtxvoIAXNPTGK{-mTi5@KGD|8&W2<#RJR>AX zSps^o1(oT?PDG1XmxyR+f9)1gnB#-rG3Q1q=C1~vm>8wD$v=hNj>OGSnNZ$UY@=)g6DMhLa`#udwaxXJ zmi7udq>V4M8fV5e=`z~`C%nwkHb~N9kBxGe|>fkOGKIibFMqYGQ`qzk&4e=tK zKswd<`Hla|#AhbSp4%AFd6^x24B$!GkPQ9Nmhh&Doh9Uo`YURDr0HdV!|1$r%uaM0 zyE$E~&-_h=6UFSWfpB#JyN`owj3?s5BN)y4wGx$rB+zt?$dugp_@5aw1N3_Ru>sUH zz8Lr4YbJE2o2K%qUVOi-plAM!LrBn6Y&OVsul%uJfB(DNnCKn@YDF@uErW@Wl7HmrPOr-MP|Csvslj_b~@r4VhAr1KQ6(oHjFi;5bG_R&XZEUyx#& zTqcsGv3{*?B#MuxEp8wwib+JvfQWYO)CoWrA^)#YglKFjE34=K$z|?9(uaV{Fp>x{Mg%ijFF+>Ob@&8#^KDQ3qSOIB-)}e%&uVj+Z^7x$zrB8CohKMsBQx32h`BVLFouuYI!MbFGHCb5^q^w;!SrLjBKU(Uc?Sw}VmeY*Saoflyf* zvVIXIAw!`IYsLNuP@ajv&!4q}W*j`HV|9_3VD{4|8y0Rsl`SbA1}U9QbXWh_u&Z5M zpIy~GASEq7N+VwP5U-WsA!r6i%5hyAcZ2>+7+bpNF5bFMx1gfTONHV6**!aa1)z^q zV@@QwPVH=&VswQ%nR@I^QeKRusH0emJ|77YGZg-4<-w@tF(mXtg9zbA8&&uFef2?`yM~=s@Q2ODMuQ@U zGpQuPk1EP8VGgl*ykQ*Xe4(UCZi{}gGxzWwqs-#f;^|hfRiboCa$^75nY2m3{lx*J z?vL=B=^pZ&7_4Aq0cZBq;ijq2OoR6M35BtIJBGBG5?JAE>~XW+CghJ;%lVvq+riUj z?WwyojrIQh$c}`(Bb*jfnlM;d*hf17QL@|6oMB-8#S{IyG)ZHcB{OfgJoab5dp1pp z($5E<>GJoi6}UYp8l&ZFweJ3qi)qVeJ2kWyA62qFSB>JYsk5Z$2iKZt+&ROqLO~G~ zIc8YK@;UG>_WqN7h~AZoao(dG89sbx`LssY1GO&P*QHNZ7!{=YHpVDq$K{)_b|j}i zLt~T1ZQ`Mm4VQ};)vz=fng-=BWyg2dO22vQk z3h~v+A9d}!7@0G4-tCW3J#p^HVniBWyh|0hmQ0ikJLCs{U1G{_`e|2SS*t>y0qTjtVfh4dBOwFJr|~8=dp5 zd5E(os8|}pSjeEnnf3m~@4ukxZ=Da%ZNIz+hLyUZ$6>$^7z#st;)R~PVMTmpGWl%@ z(8zs||GaQ@3j;}5~drpH0%>0Gu#gU#idrRF!m^w zZ7d^VK4MamvQOz|t=lT|cuztf5dI18rnM|C^TQ$}0TR2aAK)$s0ua@Ns|y*L+kQo6 zkRWWwEUHoh36BrlC5{|li(4<(8^XB5m`-}IT1e3NE7eX8HK`5OCclbXkc2;3w{JdW zMai^KBTnXQVi2L0N7%;cG~}sz#-z|Shh15MUA*Z;HaQi-18Zg5@jtEmy*$7Gc4(7h zR=b{SDS{Nm(|Q1WHD0M0u~k;IZbp%m%VgWNm1nOmxRRP~nwbIUwQPykMgdBeYJ*Zs zzDJd-rct7d^F0ba7TidWqwIa}ROpw&??R0hp75d9G`h^s%5d)vwR!YB69Jj}A0Qn9 zef5r0LqOUtKd}aV`3vGviVi>qaVqJSn55%2ICWh{qSA{?#Rn{ZZRBnQSX-?oFsH9I z5~5nY7)|TR#xg~!>mqvCDi4u4UH=CBQzvH|d>;&5*wV;o9!F-If>KK^2_A?r^d7r}yZ&9o7g0;w1 zchLRh@uB5K!$MA>{78&A*_3m*dE+03xmy&CcaV35)~4>=~g@vtFMY=`UCjCbi&@Q;-Zti)nB>+1y4*@89*%@e9K+;q_KxHJ0zUi*Sw>@*V)hu(QE>|hT+Pz#&|Wq08ET|8VVyym(%c!(HG z{E#|koG6N}?)B_h$G1(w!$5VofE{I3n@py-M#L@^AE5XMfo-^uV=QT0yT)x4fO!Lma7 z%AX}4%rPkyiw}@EgDR~~O=V%Bk}-2=gieD>tJ{Y$pNEPMS7uN2YBuq_uY67Prn#OZ z_U1Q4XSqvQqLSR)>_sIa`RMbrQocLV@12pCm!O8IMl%6npp>Md>|1$L_FO-q@;0aM zq+j!{nEsLD8L zZJSa_^zgHGajf{-nbVYAnkX(?_WmM4?V&E%%BlWB6gSfd7*BV|0*7<(%GR+rmg{x3 zp^IG;*@q9t^){@%^Lq=Qn*3oAqFKctjRM)#g6e-_pvp9sj_`j}z<*<=|0v?p5kx1@ zDFGC*K?cG95!=N91Xu=C;i?1RK#4Nw;Q##!Ew-~P>)oG5K;e@H+Q!h#>{=k_4BO#h z2aw)Nj=nX%Nd2ZOc{9;q);_^H^0;ckcL*zAfu5RFe8$vS)y0@CBz}M|cGFR&bz-H- z`CR#yuxtvcj0=r_0a~u^bcz8Qz7qjh_Is--k$fVgB(DN(*<#;PuMCQ6o^veD`s~w8 zk$%qbD4qC~MeTMLu16dp+nNw^N4#}7nqPY8_}sN_+nAI-(CGZ4ZTJh4U{1r3_F(7J z`@;wRKsR0QV3>9~e_Ji_){{EnCC7@#tCjG~-rfEC*UYHNI&&yW!Zhky-66`3IRbX4 zkr{%eU*^>lv$#{O+`GO__K|Iu`{ zlYDN@e5PAQ*bms&+G-O`TrXt8XdBl*`AkNCr;T-#aETe9V1VJ1eWdo%U}B!Y!fu{( z?66^D%DQc)$x>GB_1YgoQX3`g-RplEkozk1&E&w+=7U#J+5`h_G=^Gk)EbO79L6X3 zgvzY(S?y1C^u3bE&HyF9>j>fMZ(;b8)ibb@BUS7g#!#v<32@wm)?d&`z5ZH3I;Cys zt_SKpz_w;pfq#_HhVuCnP&aGfAcGtw#b8B~9O0TX4P)2xmcMid!@!A~b-_x`A~fW2 z_v$`^pJmtdE4CZcPD(ILIH^yWr71jI$(*`q=&Fdl%I&mPW1cbTzhBBgn~w{3mK0>v zGSZ@oq+M&|#`r*!xg^k zNlR_GBueQwFP_L+>2S2wkp|z3hNwp0V}($8{Rm5!iHtvY6G&fr|=j2E2-4%tA@h>PW#d2h-+AHi4jJ2#E83?t#Hlrneb*_<#Msupa0w z5#UK=Z`TEM|44w!`&Fa(F_uk_D~qwI6N#`VYz@7;VRy|B@Gcs-qfC&zK(ZF>J@gJ}=KGN|??ebP8`$y~In;XU-=e?TgYaYK_hs z$G1mSp%WF{Ic9^SA4ENzl! zE^F4_@biqKLlxptYm8Hc7;mTLe51>7Z>7Xy+h$y_;L%*k@o{Rfkk*dc9~?qhMXm)) zIGFT;G6jgL4ADs(`??N`+ve<#ha6tms33oxV?&U^pxFf%@EBJ%W>g;K*pB`*Oq9&{ zEomAd;7q#Z$xQL_ja#2Nb!2pIyY8MR#@g-l$y;*wm*7s-VId=>PeR@`tPkDSIkxY# z#8KJR8@K_44^@Lba%BPXx^NWz>+eeYy${i_#(#_eS3xzkJZG3?RK5+o;k4Hd^)O>Jg?KJNl>i#L}WklQ*5=M=A#J#>vU;9 zsIl$(Q7vm!{h;5>pg?Wqv8?mqKJG!CH7KlLa^ zWAHzYF?F@5#%~pmkptQaN#L&j5U~0>7L`)Zq;U#f5}95@5v0~+nrmZ*OgVdqe*XMz z^QKGsOuoT7xPSumGY{`Bb`j`|P!OV3#-*0M2--#nKsyeqpJ*04Ml?l@cBsRco@@sF z_^m&?SJ{}8{pdDkTDtxtw0!1GZC{m&cy|PLxCoj-P7tft;Lg}3vQac2H}2tlI~?Wx zfYu+`7*}qMDs0eOl|%ZeI*r zlc;xWan^_*O5k@uY`9E~h8`R33m^RXI&IL$4qWK>nu@lhN5wc-O_TA!Q3L5$!Ejt8 zxYe85e8ZkDaH}_1>`-@EeJNO-uRJ3l7-LY-+6N@Q+(%`uf~kTz;^Pa=u*?WwFKY1A z)k5PUE1N2BK#f7?zWV;1yUwE2H>DD#dazG9&@>P4H)Ik)eC++Xuun79LD|;h!>y1+ zhKmyk>Q=Bckxvkko)K1)Wm3;8+s8a3o!Co6Y zC5h)=pIdICp05wtxK3GMlfV0YV~@NmoojMgvr^9=i@}g?7^pOtVxuC6b#lJ<98a`1>TQ}k6&XKmo_LA5Jx!4 zUp511LU|xQAZMRgMSzbak4>q}R6J+rRw9aoY^e>LJct*YsdKbXw;^9r>9>$j1&`pE zq;HkLh5qC(uUCImF8(!TmALO#3VEl3M)X*GoO$BROUl0-nPIjCrda9q{&>fni(yE& zV@}JyDsZ!XRVDUl`oUYTaFsY1-O^FhUf+}VhFE@4)qnVKt$)0v*Htq+I6?>o7}|g%rk7OH`01U>YmSmnJTsGi_W>zaLxudA zdu9qR$Q5q=>-AZR?^6mh-c3+PuP*;5Es)rYU)#GV4BfmFGEIe}3*K%Gqt*G&(j>;E zLg&?ER4h~CL|uUI4aD#-S4CRJivFLg?*H*okY50}B_gqZR2mcXQV)^RqJ2(5M~9*1 z=Oc+rSo~1jc|~a@My0ST(l7ilD~edE$ZpYg!8s|7MCxXCq7{!O6sonv=QK46)~Ior#W*3e%Ee%QX^3 zZttQB3<4Xa-$uL(L&j@9g~(BQUgxwig+45<<<`Y+DQ_{@;RNN_n8mZ+;%@Oi0D4KB zOs4dS3)8u4%=X69X}K_@fE=)fe9oi4Ylil{0g%Ce(E$Jgo@@ZjkoEHFH-#?Zr@_7E z=)(SO6|8a*+6FP0+=ZTlS!X-(;6GxZi~o@#H+s(R$yEU<(zpi@D8B*L!T)s`V2_An zp$o5oL>gluytXTW1GoWn$43@xYzFTZ_qa3p>>~`IcEApa8HD(TM?Z`1{NPTPXOU>dCJO;orc!_@dFPN~r1L?UCEe zozi`?UY5+5JIaUY`uyJan5WufFGgHqLiiqnSyb7jhLR!_(@UOmBl}M^mHOcxx zKTFe7R=vth%{iOsmP-=^BCR-LL>CYeA<*M7=ryNqxscd(xvn^JA0c$%A8s&j%zw<^ zZ_vHV8u{PEkeW-h=c&pzebI7`pUZWLnOo@d6GzYqr|7Ef&sPuqH)St-6CdVWwG?FRBW_J$1GhC9LEJw4?a3GVZq!f+xUsoSARhe8~CxZ=KjNMH{$f62D0@fhRYTi zCzDep6U!&nAE}*QkWs_d?qOOt{)T0!+n_)dy+X%XVWU7z4?Zi{-$?rd4r3m;4HXw7 zy+5+@G839T&(>9O92n_)pUwXJR1#P>|G>cR1 z2jEu*JB%ZSsFX0=bWStO2_c<-XOFf%ANGB&hImJR5|)?mMhgD0t+M$g;Uy5$+h1@d z6!c?UX1kKH1>D+t(+WM*$VPX3Q1@+#Jn`1+{KpCjPLYxrt*RFB%~Uo6!>@S%?x({0 znV}BAzdV4ydIr!a>vR()r$dAza6Oj+w2<_s^K~ryBVd?vg}#vn&N!WubylWD-cc;z z8<@zlenYaNV0It1E$@o&Aql>C`(G{yfC?p6;7ha`@O3vNx@@g%hBtVGU&~4-;y9x; z#)NM1^}T?0%ZJwdtGGCtMEIC5%>`IV|3g=74zJBu2~GnJb`Ck;F~g^$;1MYb#X!3+ z#|L-k2EygXMF-*&WU3FE5uUGSnTKd_u&yg~thn)Nt$236Hyh?h_O`&wDi@^5Pmf&; zy4)owD0S~8&#U&VLLh78tT0G~)`fe*%Zh>#D{3_6f^8HxnsdC7)aAxt0bS3EEi#jk zXY|*|q2>kHW5#2z9u^U<50P^T2Hq#F*24os%l%=L8D6x0vR^Sl<==3;jf;QfxR>VU z2Kb}dO#4wC9@(R=GW;kQ0-rRw!66A=n|gdVLqoTZRU-nDJu*bKS(631)e3j>9K#gvBS zq-^5|s#8Hg<1%AgjwCEE@8Mv@_R_<>X7vpyC0?MDI#=RTV~!t+4Rn$pSR^2!wn;sz`vmU!Dl*?d5)7$>AE_#wc;`pe0S>w`r3zeL2hf|J{o3Ym%SEVoQ{Se$=f+(3K*r4KNMAe{P}sF zwKY;!2syR~j*)gkzJB)8L)qajA@1}u{^pR(x~LX>iS%KiNneUg5?xT7haSeml@r0Z z@-*C{&eeI3x&m!xToYSu_zz|CKRnz>8RgJYU}aVlT*liEgBUQ<@- zUOhP2$Ky%e5|oqTA+b0(M1d=*=%k{=Vo$Wdj34@b4;04pK6F6jo5k}l~O5a|Z#l&+zNZt3ps?(Uf9-1zKw@3YT` z^MMa@|A?94p0%!Z{jVt07g}q9YDSwH65*C4cEc;c?Tv6%sTxjm?lle7mSZswS~rN3 zM)u_L%PS8to7Kq%9ET>T<&wu9zrCCzWex(xJwq2@*8 zBR%(Ga?a++GIE=j8h*u<3Zt5c8TW`~VC$lmctv!9>Z7n?W1pe@rJBh#W=F}_x+_cr zlUTFXO{o|>AihC6Q{&n#Bn6r36Q}d@O`S5GWd0fZDZ(TfTUPuvP&EYSA{d$XE#k_w zK7^Af;+x=Z&Q&`RlJi497$ozNSye;*$kFN)(mB0V@h~fe*>v4YeVI3>o6;z?%}|j- znZVbM^n+t1%A$*v{ZfcqkGq%V23M8|w@$FJ<8e@e_iZFsp<0>Eboa`ztJyCbwFM*z ziOr479kP6=Bg(p-m;w6U&8e1NpMW%W95H@)F0vrSSNJ|S=L0c4-v7>`jO9h~ z7gc%Srr~}&VPCLzK8F9uH7&IG6VCFgEY&f2g8^r+Wc@Dhu@}G|IJ&u*Y_I6ZBbDIP z^wpBHGhrw0e4~j>eAz2hp{<%ciK9P^!aS`+-QBfij+}rkhv16*R9J106eI zp|pz;N8_&`B0}UMjg@JbDnNgrg~lbnKZ~OmMV#5DiVqcG++QY!PF|sTOn;t`q~btH zn&U1o^zm6G&nWXmzk~!ChRovy;Hd_`#FrFd!hNvWy1dEgim0E|Hw_INtv znH#rZFUH$=)5(*VvFIeqNGdVs_enOG0!Q7F?1XfjqFI#kQUBNOAjaj?h0&=h|7qSu>>Irj)=I>_cQ;%8BH|B#msV2 z^j5UDT6)&$I+J^pGQLUCa`Xg^Qxn0z3gcc`cC4M*1PtK0MFe3JdMvNjs{#abn+q2! zBElrayi+qE+7F*7X&o6~1g(qd{1kG#+-FJ;!O-@A&@Wc+zm_(Ru8IzO^jVFN*>p@b zn|c0#I_%%rk`R)X7;=;L_t(*2-qG2%&{x$RHxLhJX(sVt6G`=_l6hgM6I(ZgimOox zh&~jw--&j^-%x=0f14S3*4g%Fe+v2I_e0gL%> zVAwfb6F)H7Ay6fjC@!3A5f#D4Hwh>J9|4BT;$sK#SNj5sY*@x$JR4g9@Lhm>$~#{T z~wWAje}ps3Fb5UF|(F zR7Ml=MurshAZ?cz;}I1>PCoaG>)75AZ33?uV2DPCtU%P}Cz>@oFh(~vU=;Y5=9={+ zCB2A<1Dij#(#YIuj9i#FlT#D_%2+D(VhAMB&Y|{w2H{hEX4KO;wIIIxNuwVt+B)h_ z8(x;0HgYv=O@=qsW%myTJKCXxA;=ID*PhZ};JqVong!*ck6(lZ{@Rl$9YWd{fAJnAk zJzem*pU%-w;c^n^>>`&o9pxv7J^2bAwEy~A4A?%oc3UuuR;?|GIR7QUifA(Afe8sA zN)LT&uWBV}a-bshq53NMsy+xcEpU)A{|x7CsLz%W7n$_0jT!t8r@5b657fcVvgQZsPK^QaWCqMT-y;q zf?T8VdgI_tEI`Vmi+hLK#IN?22`0*I1WU|6FXTd7Ieeh2KM(WeW5nb=RxHQgt+iJ8 zR5~JfI|jtNYA?VQZ(*fm5c*0~Kj&qpC5M7r|=r zOY6EAxelEfaz3%hHaMH&xnB7apgDY8m@@ zZ`-Dr!$tPqYlJN6R1S7VNcWW$IfZjM$+PP?yj*8bx+=?P63r3HT?`qEQY)9tHLiAM z-jDdi!kc@6kcz4Ekd>3nt=_Xl4v`2#MrBg{_+FGewM}MCLXx~gxCJ^Y=*$C8Ibofs zR~*28Olx#W+ZyzM>25Y%K@sHU*xOmD)3qa4IHfZApz54_G@V-@nmfDRO@wGF3M(&A z?zF#gtgRKi%G)xeb(~!2p8pVdq-rt43@)HR?tP&!(iT;CdvT(5@n~M>3^J`Ao2Kwq z;?7&BxJ2{Swe~L=cVo{DGbNFFy;`zcB1I|ven~S1Y|vd>M!dd@UpHQT8zY6i&SYhx z8cKlalam5@#zhjiyanQ)yOzRI6 zab@k_rN(?jV%N3N2(Ra)A1vn z>_hn3L+C_FA$7q6*t1CQ_ycy31<^gm3?nLK8@&`hFy~lmky($`NU5>Nl+%s+^<;aX z&i!ybHe&xI{rND7nzWDtmO1^RHK_E1c$TEaR~{z16TYW8)K_pgH|Or8-?@I9+tU>1 zwy1IzY>R}bDGtQ8@0=K2++<>C)BkH){hMg?I)%pky)9`F8P5wXO=E0#KgFC(^dXSQ zMD>Al0F$hBx)HxG0-R4e9{TsUjDynz=W`=g6*;n;rrd0q%J77j#j2l~-y>3BDtK8k z!EhE3(WAW|X#&KY`$gf;>P1P9c#+Abie;rxjpS-DK>Z75v}rtFUc0CB_1#azbkiSG z#k2QP{`~X$!;*-7x{$Zhomvq+(TlVqt$A^)u)&H_9v<~8y2aGwbbt+{=F)o4{Cb*1 zttLnm)jMeae+ zz~Vt#cc5wjU3$$i6-zF?rH(8P=q5D zxTXqwE-wwy1&G&o^j*w&ZE}DZiyod$958R8M*_;>-oj^g3pndB-eoJ1x47RMb&;Pl zS!7%RoJ4<%`6Tj5#(^F%mU98p@&H5YFA)5#r#-I z0J_6A{0mR+UqQ+M*uqox3|sWQz=7MX94&!eKukzgt@hA_;=L)r>KS|lI5vnmm#KNi z*~9a*7zd~&C?vP?if^M4)=11buWSzehAC)Y2zYmISMCv19#oL|5=@{SSToLoB?|_uxjnp$Oj!#@ICmR|GL9M8H1O7x;#ePrn zX!gxPy0Fz$HOmM?a(_h@`9`uhq@5e1)0}pE?5ONLDZ)D|0K5-&I{Y!x5D(|6-9s-7PR`pc3C{cfstY zj_fnpWh?rI^Yle3==j5V;p}5au~x<x-NW0>Z zEu?7WfGz#%gFPhl3=AQcfnW-cxB&lp2QYsH*8SLJ0Tpje6M*mvL1`KiECtSz@eBzD z=cvG?($|32D>aARn{yxrTv^5sE&x9vOESVHv%GYw4PFisbfKKIxp&i@#p=*Cx2rN@%F1jH{^pO&1BC? zabDvW^?h=y%~ej1ht8-1l771P3*Q2?L^syv3DHp+Lp0t%G`3 zS0O>RcCDBOO$q_$HxNQrPA_Wtz_-LJiXX2#aaL_>X&b0I}(GVO?L z6%%}r1n7iNGI+YUnMW&Sz~^Ymy@!_+;o_9#Gpq7rQ5`~JJgmfiHS6zfV8byYeKFP#=}^umo5Npn?H}TVd{9*Xp@j3(`%Hr=2F!oaDyZlAaePfT8asql zXfR9>=C9v;#;@bJx{Ka8CF8HlJ*vqg_hjLWrhC>sMf`Hb5_4-9k#{ZF3LOjL&Yh5F zDPT=E`9Y2HwS-AkO-H#{8)?PcN)*IY)26H?U@SzLq`KhG<`xLA%e%QzEhd_fAwU;zopHZRgaOJRurCQZS5 zUCfED{okztSy4b?fCy3Y%zX;z{t3`rh!r|T-XU209mxH#U^Ytxc8O;Igqk}Ah!jx% zo!_BkqFDU{x6!=T^#i_m?7aEJOdzaGLx5yR?ubdDcv>CGH! z(>>zM*2E<&oi>3txuGw<%pnV|Zk%U+Z71VsSpjVqhQ*nlJCud!qVJAJj*3CC0Yx^>ZNqc~loO5j_Jcy6)GhxMuE;ip$ z7yny2X{e5lCL}A^kk)!bg4&FTu}%;jzDI!`L6eJIs4lE1w6?}7-JyQd%l>{!*4lY| z&2xjKx{qlgEwt_jyC#Acz9=YwLjxlXl9dx=RO4^P(fVb0j)*3mUBPP~JDF{94I&y| z!|^_mv8@)#uYIdk8_vizq>py?Qxf~DnF!6a$ETOwH&2XOTuv81?b?~5zt@~ZS39uF zq~ES*$1uM}nIf{DmNtPiY~V%zlls_1aJlbgVH&95Sk*74?-|9B!HRp~bS*fFSO!J3 zMuOnUolEp{NBhiCw~hi@^pLfr$(MH|=Dj^uX9v@jr8eIOJd%uK5Y9O(Bn}I7-tSo4 zT(=8VXm~Ff2dMKzov!I7N%r^n@SpW|3s=Ki6zeW>@(={obygl)-CNT|_D3F}QP;k) z^Xx%;Xz%L=M87hf-!Df~61S-*MRpt{>XkR^w^Dgq^;W z-th}*&QrXGuuR)s@W#+uPkj37yixq_<;0P%(pyV;OqHYP0F-UBRQOsKU8CcRh&sH> z$*eeLlR)-Af-UNg`0ec!tG})^|*2kVdfM6Uj3gPA^wpaaS;k3u6Mppt&f~{daM2tiG z$%tmSM|yZ>8GPTGkz@D z8?*YzGVTJ`qT~5nN}a~Y2knCpU!HJPo{W4WU#v&ak5@^4v2|y@y>}$(smkR8(|Hor zKZjt|-MJS}hv+aUmziR*q-m_QT@HR-9A z?RMUnA)UZ+2pOuKHjGuQZb-!E@$4eC7-xGM0e9IB^f2q$bkOHT^&UnwWLJJV0+V_zp`0mron|8}49>si}kdi{FsC>oC zw-MQ$sM{)TB3-cL6|4TuC}_lVkQyVc>?bD2-V8qVJ~I$RH>af0+xP_%-Mi@jE3(t* ztgsG9!&aqeOqQ3q?t3gBD-x7@bE}CnA{eC#ixcMx%^N)`n=`_EtG*_(adPO@k|-FN zOULxCovD$eT1i|P8yEq#F)(}sP6hx0*IQNu{`X)|^_jdG4Dn)E2jAgef$pz?WNj`B z8LUM7H8jG`jKNzm+kn;H3hF1oMBy*d+k|5$W(y=3W6U1 za6W^hGQ84Enq0Q#n}KG&a@D4mgv)L}AG2`sdRIb9RcSMFYVAQZ2Gj2>%Ptt5b;yC4N991o@yO_s1KDbVFLDA%eD?D>*ESqaF^hW%`f z+zfWTV2kJuCV2r~#w(e1cy%Ceqs0qy&o9&%ZP-CFdw@!C1b|dDXtvsbqCVBnjlqWs z^zmzN10`l{1XN>$X}HvdI^@!m3ISEB)>RJstk>AicxKC2PmW?^@IZASV3FVg!l|p0 zZL;c4yCE~=Ccs2dlyf%J8FY4p%zFo&t{ z6~0c%lEoLtDpFM%Y@jojt}gmF7CWHa1Nd&qMzUJqz>vOF3D}qWrG6a1TDD+LvTI*G zvOaaMbf>s<$GT;c7dWm@9c{w^Xb4F zw2b4(Et%UzEU0R5@OwhW))QjKFKjmjM|~6ySe(~%_Cs8ixQi3xlK1!t55Z4NgDw1^ zd+VZ|tmMlj)(r)yt<*1A+|~-|C?Jtr;Q3l#+P`AFlRtSrMCq~&1Ok22sJWFG=^tlK z`8{g~W(CK&_5{W{-2*x#uNS{Xt3K9HfJsB|1k}&e!v_~87snOvaX;J`8$MYqrhNs@ zJ%0C7LQFwsWHG?Y(Vzhk|BUFjxI8_Z^y~CP5no)aRudgRW)SaZyzguDaLVOtsUTQ)k4t7`X`eO;AfFpXeC8 zwoV^B2Ui*_h|Hu-Z{g~KzpK?n!K~H@LDz!wgW8WH3U|Nr85Np<{%aD)BJ)5`0ngcE z!LIqD5C=uMBT{d*t>m;EVw(y#vl=1_Xw!GC8zWc!;K)*tl8(xrA!iz~Yjkky>!dfP zTXM2c2M=`e@uGX&uTJAR4QI}w^hv*Cjxq(JtiKJfyC5F(mHp0{5FS~3Q66vFs(I5n z;{1DHEs{CR_@?Y-`iv5>O~(2g70!2OzYTsd1}gqyksc8jo;q>|hrBOyl9&DS0shA^ z;&!(KYX4hx1-6Nk(P^JZyh*D}zt73zQqh#`HimHXC|5ljDqX7!%@W-FTuaqk6^Vq2 zq#kECEvaf51Y(9WG!#x;s5j)0a6R#Mf_?X4$gLldt1{mZL-W+%MwqfS7FSY}py z2%HQ9pmWOvn5t(d@85#};N;EuzYhY>LOyjfSO!1}J^MajBEaYRZ#0lOA}v5UMKXc^ zb#oMvfNsxgfjjuG(<9kOhe$gj%n7Y4RoBg^&s7H{9zHHWbtkfU*U?vLem8GI!y62+|oPgP|QZ_;>D$(6{grJTB__(+`Vc63LfWmN7LDr;Nl}Ve? z%-Pb^G3}{6R0l_(NiN8G{bddX~W-#$Prm*eXD#!QstXr+o=9nm(-bpT^aMy z=yx~PMg*h3fAh+$m5Xj4N;q81yD&BzkJ#RVBW5lVS7QWY_|j!jvi7ua29O76NjNEo z-Q!4j`FhzE?e_{}V@Ye1{2y`#W&0m&J!DX=Mu%UC8c4%(+2&7us1;}||fPxZ}Z*LMG6vy|Ti;pErVZ!a@tu6~4MYpdep5SI71*vglN@f=* zF_`?%nV#02*u9wNtqD6lTzrW9?Z#ULQM}93!!Pk3^`R%G%0=1V-XCh@^DRp#a{c~N zLY*{6gcRFYRNfAkUU!sgc2@iyy_1Z%=5xDQDuO#HP8dDf1TTr|{h3q?5tLTnJU2!U z_~c|ev-ySDhiB*7D2d>O2hau%*ip*{*4Zt_45cI2Xim^hXhGij(h}8W3?|Jws76PD zRw?Kqs0{FWlu-slc885x_|$5LFQw@gWA9Ka#PQ%|Ze*$^slFXk_1DwZq~*jkDk@$p zFhY<0FeXVTlP%4W3aNU-V^VbaP_Yo5`R=Eocx}A~m|DmQ;KVWZ!w8`roXGbQQULP| z;?bAamgmD^HoD0FcC<1`X6!!~fyv&Jik>ksUOFa)zBy(!k~o7Cycx4|B%UEC9frWS zf!4EgpiKw_?=vlHqxGusR9nTo4>|}dX6QY*^XAg`dfmOYZBA{9OJF9yW3w&q?{nyU zmOPr9iSB%QR8be8$nOvo!<5Y@Z0;-}5SjPc{;|lFQ(kypHJ-_?lIbNnmY_}B4Wftr zi8hcEt$7%Q%<)y-yy-Bm8|(D!5bm)`y&x+TKE*bNM!n`#mZ|SrUHDVzukZT44p@Tr zC%!WG%-K27v3BJx>tD4J>1k_UBO66W?`dvz)DAJ!rcJ8vop0Oy{XjDJVi zRogz8G1*#Bnl?%U1*6y&$N!;|L2YN$7#8OoqF&Da%UKcsXlnHr=Xh2YLMtQV4$)4! zW~^F*cz3jIe$sit?u78N@gG|&cav6`>8ya#lJ;-M|22pKA}9f}zw9qmE&LCOo&Op1 zj8&P|9V5|zTmhh`qx&XqQ?qD18gNOkG9d-IDmEFCHVu2Jitwc%=~4se*nJ& zP=toRAw%$kb#Q=pscO96066#=upa#QJ7Raj3O#v@G_!%m<$-&SApji-aGP1vP{L{T zBtZE&-}-+8)~Zhv@(g*g>OLFmK=f=n`8qq!|8oJJU&PyL%Lm|ARcl#)C+m(-2i+jR1vKpBb zi)BI>a$o>N+JZ)vZ(*`mocOxEgPPbv41CK;uqV`8qY=DPI}Pe)I9>@IoAgh>NE6fZ!*$o&pu#kRthM=)U)LW#r@ciQuR4 z39gTk=oXqlG|#iUSd>0PfvCdosS75S6MHhr@A$O?2iEbNDJeBek)d6bfs5z-Ap+eA ztpfS1Q1l_G;4RFt$331?x7IevJMI;zt^t7F_24vl_`8WcwbAY6leeV+Hnm(cZXTns zViDR)0H42V2ENDTS$0Rno6`t7qyO*6VvD5#&|=Tf=Su0=RvKeWhIn9tLFRXdDh;H0 zS`@wxKGN}F$KoqYy||m?_DfEdbBuJ){fkXc=k@ZNm>!FLZf7h^$4frI?Q+N7YIx-U zcRJ$;tPY^;e9zcN^==7PH{KyH3a43C*8Xu!PPvIhI-TK{Rc}b+eE7%K!2PaedO;Ej)q+YL)77PEK@QjKe2w(kHU0Y9+Yl?be&wEM zR#(?Y7Yy6jr0ee3ze}?H#?ltaYyq9DIk7vk#2f4BLmg^FT=Z4mODSca z@o*|qti!c*=l*R4@uX(gVTIokZEHh}Uda8fZKZrbjs+>1UIXd|pss@s#nx;+{Bes? ze29Z1Gx#g6N=HtzqRdUf6oNcN$a4vg$&GHO3(}L2<0FV;p1eo7T=Z&AQHjot+v;n@ z{7E(51yz~2OS7P!!gl=J?eq!>Tw?Hyb_lurYyPxl)rV{h?UP#E4RAZCBSs4e9cya-cY)DE3DXI*&r+o6%# zPD?LHFr78$Mek0&d*U^tgz)cTgN1Jm60!DJH)9PiU63|kZwQ|Ez-egUFLkYS z*U|nwkH22))oZ3C8d(*0%RO4Wu{A_j=$2xR=WRpW+&l%zTxYy+_1;i;ZK<8fT)50w zM7bY=2Y#$a9V{mQL8 zEu!NBo3clg8_catV2p;lw(Wa+amW(UK>ZQD=Mbwzzv8b}v#9$GA8_=erHahgdA$|| znbL_{drfUw$fgVZozZ;_J2=A$u-DoH_rTQz580~Mo9d9Eo zpk!sF35Boos4NXxJ#p)zI_aq+5Hj}EnQhh2Uxk^c?UwYr!m;n1@7#^D3%KQmzyH{4 z!b2IcO#vr!jOcrdb)a>9e|53&#zP7%m<}DW77h2y^}eLsdQEN;^ZfjK_#G)KaZ>p6 z*hhb}xr5WCj`19peir?O8w0+@%$?d=t5Kv13-&&HJa#dv*dq+wtd>Ize_uhDjYso@ zhOkp;{&82$L&p23+)1X=Zb57XK7>4Lby^)B`h6CnU-2sE*6MV^Kdre)iMMk5e2t`~ zlY(>$cHw9nF7E;r-1gct(d(d%6BXqB>|R;Q4t5YicAsYcN5AwdhqqeG%S5@!_*x&3 z3|)NGFzv8f72Q)0;$5p5W2CQxAN^;4hj4Sm`c3Rj2#$lTw7H;D?Y_wZZa?Ka(pI>a z{-h_3=(}cOaPf`w|3M~=Xq71}lsTJSDi5WkT12sWDV*v6y1(*37GmOGe~?MjUS4-4 zC74%=L>hf@%GQ&MuOOtM=oS2S@l(xcwvdAzi2YT*xMyq^Q3TIxsj%J%w*FJq^rRs; zovhzbx15^rV=C-^$st1P1u$`hyi7k`G);ZDHY?f>cjJw_(HBLGG5a;vPMipaiBSSA z67BI!VfUM=KVVB`hSloKG^5pLamq?j*eFXAw}E&-5M{vhnhhkzM7gmNwN{N{)$fvG za`HLQ{$Ed#Im5UAWhMGokxc);t^v3D=a-TppiPZQfEQbO4o(>Z!|GvqEeU6!$B_Gs zCl8?fF4?wq8ZG|zzDz#r@B zCoN123i}I!K+m3Anboq>rOoKT)%%$^i)se|@jg+5RyR+6yq+*=Z3pHxc89QLbJ^jn#8TF8BUoZirrhdN3 z2Y+|0lw)r$@qmp(4h=fnUq;h~33uUNhp;ogqpMAV-gy=I4EXwLNa7gH|B|HQ0a4o@ zD9M4xx`bw}tvn{*V89{(BRi_sJl-}B*<|lXzqwj@q_j#r48@!s$8GN|s z2q~C$<|#hA0u#aeUN1zn>f0VIaDKl{B0r4Xs4nc^SPn{i5oRnyO4Kh#yt^4 zOS-SeV(gi*qvkp-1FW@o9o#~Us-Omg+)pB?vzLXUotBq{I=`C}aUV`CmMR2N4pfOM zc#GAKE_kM)`E~gf!5b>^ms1tozl+;_eO>2yt$)LJn#XxhzDbyW` zy(y%~b{9Ba)VA8EGVJ}ut3YI+$dRzXNgBMKI3*+w`0`NlDG?}>iwjx=d5W>_o&S<% ziQyO^aw8w`1Y2Z(Fb(aGhs%QMiH2!%C1OBP60;E+cU_#(*%#5BBq-svUlMr{eWJo) zPd+)Ng>?eLQSO%-2hb2JIG|W!uBH*tH74|d(xkC4fg2G+^#+JU$+xib{t$l&}SBnb4FFR84Vy%(nOoLSqb(p5Ao%i#>1 zTdUW3s#k;wT5d0qZ_TJ8J~~~R#QLt8F-`0M2rm3vYd|oVsNxIv9uSa-wKF73d6xz3 z9zRlE%~_+~Q$>ykf;hofqbeSuzrEyTph!T+oP5DZ{@#0Q8)itJo#H%;gpn_vNm_t} zr07gxL9CL7iMU>}M#8DA}nG>#w8S*CZk8-QCwXWzdsejPzovkGFf8UUn)ZLx-8Ou(#V-< zL^2ra$|*TcHTmv=sb-ITvnmySMu@e@wHp&q|8g!3m_uG&r4e^`oX+pv7waME_OX$O z5N_J#u`Wj2O*8&AVDv*-> z$mWvdM)HLK;Et|bOi8@7RwoelTDlCi%iK0ExW71UdC=kB6X;^M{&Ch#JYK=u}d7vQ95^sG;sTIff@&)hYC0 zaao!m+r8%rkFY=JY$~Yl>fqL;wCnn(8`=r=3GWW?(a|MDb^*jp$G}v5PN_r_9>dFf zk3NIAriiv|-he)@ceQBEIZPXcm1ggQmFL20O6RBDW&S%g!_l!F#$G<7{~(!k4N4p% z+!PBZF$?n>#xlCK*Hx%Bd>2_Ji^f1`q$((B=#g#Ed|&4=#3fhAM$yP!@Ux2E=}|=E8+JUkU-zjdit9jmmvV#Vt|zt2HdnD(Fixgz*)n;_wyx3d)HC_o)>EO2%aXix=Tn9;R9URjD-jOvMu<=|om>6xYJ;^T~ucG00IEW6~`g>ES;>(Omj(xrdiFTqg^q(L*2pgl}Ri zwulzmFn5j2%^A-dCYc};VfcuWM+;7S^5Xe^Od<8Rx% z;RB+fKQ!!=u{_BWsPX>f?765ztYUcLNT>i_5Sjj=%_v>LeW%o3t#ysONtXNP@*lyl zbGDdo_BYvcV+yS%(iXcIa@CT81J&b(ocKITaY(ix)jm(8`XAt_XkN-qn&W;2d>l>;$|(I z+zac8VTWZ^oBg_Ww2^}0Z;0yTSPR;JZ1Ee1i2txAR{Y^ir17VTND?Y?wVGZ!`ZxqD za~0ofPNB&d06`qbZxtR)sas906Bj3V1PuDFJn%!S6 zq5}lWPwtd@6}cWFam+VJpX-Q>QCTDME%fzN`uZ@&EfS|DxQvV15_679k~*L~6n=GV zqZTN~Ixpnh<&rvHw=C4@m4c%v|2h&I-IKX6;tmJR&(fJ|f0G{57~pl#=Af`m0T5+z zxVt=JL zcl${Aw0u!ZHrp*!fN({hllw2qeRj|P|GxUpB>xA-egJZad8S+9&qRSOSw{s}9n4^A z7XX!a-~Hbs=?}dPfn-Pkk@x!V!k+w zz*%m*j_JR00yCa-0>e$7m8n1=4B+n^GuO7V{LfH2@)_MWxTl^j-4jbB{zbPx?V~ep z69JzOKa2x3*4)5Q`>Xd z*dY%cX^ji>d@+vJtWuRTMl5r$&cSOJD&~p*BHf1X45|kzj9*G}SS!O9l#)D=aPGAn ziQ9q4e6tI_QwFzul*wmnrft~fNdGn2Ch=&_Jf>)>d^$#c8a4}_p;X^$k4ooPBge?N z%?Bv1#A1%u#?-v)$?n(%d5=TCQV-*8Jc3g2T7%EU;SwasQFJS-Q5&JiqSMi0HSn4*0$XS6X$?+TLXoihRq05FH|O*flk4{+ zYi7?vtFzrR&3Ov0<^JW~8Vaml@dm-lQ!& z?rIYYNp_?KNZ1^c*zU$KCpKG~u0H0GqF}^`KOsi7LvNnGlTSC@w3A~SPjAU$sAaxU zs1I=snEJKzTQ`*~?QSR6rF2Em;3RU>(u~A%u}?T4%(Q7_BYZG;?TLSi1YvtmsU-yM zA2=@U%fysiyA^yvko`IaRj-aCZS}IvpqU+GzvMm>aSFAzS+x0Ug46sUD`T31@?;bJ z-u^Ryg(yHi7;)buIr-W+cL=T!#3}5ngLkrt->v6;+Vn9dd!)`V&|W2KaSzq1I1bmr zqS7&|jtk;I_!-?cqTrp?u!6Qcl;s+C3`KcYQ`>+b5WGR!X;){o&z|{&1+~Ao2(0({ z;;>43uhHsi@Ufz`LRO%cKr@|ntm0wpqPjph?bT%;SzO8YDI6v)Q>EMJVr^Yl&ySXF zwszx1L8e;xp@UkUm2f+h^&<+FU4oK~x(DvdZQRTo{dGb&*5JX-nX22sHF0Q|%bS+B z1o=wgJUK2zh9u5TpY2FSqa@tAE!&bzY9p@9#A5t5D{J)@TE2L7Pb4R>Soeq21m<~? zojLB!oMaFG16P;0Etnn-1E+aB@|d$D5V!D*9qZ3%)Fg8RXu+>HpU9oP?T5>!A+BTj z7ENdBlAN=LXSEfG&;`ZHf#R7#@K)_a zYQSLGd3XK(%Fdle#H_`9y07aEvDZNt^^eNUmslf=`wm!iph$e>*dNlbvvNJs$ioD` z-d7FS=C9j=9HjOx&{^kk6dfb-Cub^L%_NLZvvzSEKU}-h)4|;7d&{_P4cGRXRLT}z z165J3ElpVvD8kV$dl7y2?jAI%%y|eVR%aZN!4_*UK{X68hli2nU-OmK_;av zMPtS^>zFZIPmX*WbZe{rpxuR6JAx~wEms9Mh%$^->$ppAWv+SuHDu!wMB>%J>V)Qc z)Pp!A*}GGZocVaB=StxtNflSZdCUyxaytO;2Y{A=0-mxPKs*HmY40fh^&$P6qLXI{ zOyyhuz1j-6lfZw?NtxoIwzpS-u==udd}eU~dy6X_Uk&s8-&5A~Zanp}@MrtEz1+1tfFGyQfeWwdCdtHNF-p?QWiqwS^-8cQV zpk#9<%ayu5VqJ5CUSAqtmYewV&X#bxKJSSf^o;qj9S&L(ojjKZCxy*MQioKr`UMeo zt-!C=4C<6P)??=*c)?>%&4%teFGV;~N{<%%4Y!Xa$%;0{uC2F-PMJ&%( z1Q9M3xeIlspzZg1HYN`}x3aHW16~zRetxN5>;X2nwx{{rfgmXM?!y2=8@B`&5X>Hp zR7WeN-u|{OU-^o3^d*pVVNc@@@0PSxU(L*4@z6$jt- z(L_MsAH1XH41e07-F}SZ)^>*)FH_psk%r++DH@!2T!pIX_*y(kkRLx<(x=$yK08*_ zi&5-MFAk9;4~SZZQtgj>G#;~y9i&X03ZVD!3o8g$-T%XR4hNOIy5^x-{rQHl_?-Vt z`e&&()GJ8j9T3Q(pIS^`_GiK9_X=40&QX$9q~6|B=*fb%=GTUtKe!tnRBgFY(ZnUpcAAboXy%qQ3hjPLBnH1T*!Jvo5kN z_$AlX#*f%pPp=asI)(AR2Vp{$-!lJ(me=4O%bH;^FbsAgI38WRUGX=-ks!AdM|Uq* ziWMVa8t~Y%aIOge#jnvAzB~4}vyZe>6rR!qCIe?5>Kne>AVPhL<;EA zjvtgQSkOxV?e6XuMxafLXQC-%KOE11)W(f8Z3RK?g_ROJ()*Irl_KmtFJtJdV%uY~k9t zyt|1QA5}n?0A9F9o$`0uw@4;k0iNyO$CIa5$xvOkYpl`mmcw;A7FI0%9UO|4QtN_}Z0s-OII(8+Nub=%_;yh38Ww4)?#(aNJ z3q7ChS8tx}SJaL@qpF>oK;)uUC1k`R#Z{`E1`e4f{TFllp*r#t&DYX4$LJHS{y&4r z&3aI)jP6o+4w7yRzg+Ol{B+KH!DH#SmUP{EoUQi{GLw-A>df=WfV?uqfV_|3<8k7V z;EV0VW389Rn0AcJJuYvZuk+;=(9IUrT^JHHp9a%IRVP=;9;fp7m@bP9%Sp&CW$hyV zKo(xZxGYv54>Dwa<9c6`-mCSUYuS(BMa>zrv997KJ7-6)L8O3ep%TENU35o9;k9?4 zopCF{rQk*ovjMx~T<*{bHI{}3Vcl)F_Cur=)Z8|ci}N>ngED{%y{Z%Vi|O^n8Qo7( zfgRzv)BFG`KB(C#EEYJY2JPf(pr!bDY(7UWacWS`WUO5kRaCI*liz#B-v1xE-ZCo6 zzI_)SML|KB8|fS*1*AJiQM#m&R8qR785%(v3F+>T1_5aXq`NzY?i`qLU);~L_x|s9 zt@i^T=3>^cX1L~ep678Mq0#o^JEe(HAsv|I7J3HUH)A!1I1GoOM88!ArN7LK9SN=P zp8{L9c>Pru(e-hXUiU2pM3C;hjnH511oK` zKg+eXRJIi?X;R+Y;uDv$g{jlx+#`ZqQ^)qC9>TL!b1XE*2k8$&v`aCS zsyMjDGQS?z+;*ua4h9=C{J=&yuKL22oHA@#mztSM_{&yXgtw$Q7|$pwgwwqG>d&o2E{eh*o>?$-mD8#O*}y&i5wY)bI4>)@Q_3hYq-7{| zAGV74<)j=#_k+K`P1Ibl2Y}Y?wAf_a*EK=G*8fO3$(Cg5rh`v}cjp|a%2N;6;#)dv z>R-81q@>yyr>~N1u7@9U)ipOp4IA{I_%9l#5fQQ!3BKoi?ADJu(>rnHH(Lspp36Z_ zk1TA)LS2r;V5g@UQye#fFW!Q*p!KA$_0*o)RCR|3GQ{F%5|^NHQO zDYt=6DdR*Wi7!}aCMTLjM@&V+)ttNf!DkCx*gDqkv!9$Iz0$`P0@%U|v zwFY2-IK#(QBCLK8cd(~tTfw%WCU5G=TOn%w{)euJI~L9ibB|lwWvOPI)8VH$e;y68 z(-InoEmyOp`Zb&~B?ne6{j$LK9(avtOqoPfWo3is^E9Ek zj<7&yWcuU*6xRpRZt8@WS7ejEfA$FHPvOPznXRuqPn?=L(-*u^P3g>Qwb$P|)1Ow2 z%kS+(r*B$tUlphP=stBOIL+T0f?6#ucYt;RD1=U~S=`KUZnguDWFwjvik~f(g=q(E#VMK~4 zLsY_$;4bt1--3~vy^&TLH%$*qrLVQHigt7udU8a29>=Uj|B|Qpj^{?tKPCXKXyK#i z=Ec)AH3VB~xyI&_TLPH~>LP)HhwgVi!7V)P$c^kTqa2^onPXf#5nRTRyRrx|hxExo zx97{9iA#)((+a_4YzH&67dU4aZ3p^T;Wf(Dz|QL_QJkog)LXrO;1G8H-(r>iSb9-% zkWUN~2iz1XWIr_Cn2+H#wt4WqP)UE?~quQ{&2_y*)%s4A@$X?epMc8gy#oqf6)C*zRo(ziX-m)ad?E%=Jl%F6XYIv{c3=|} z|H_AEEZ>cDp+6c29?`&5?%^odV_|#XJlQnwfIl#BrJBJRbVKmTj10a=V|=p9T=#`s ze{!;Hv$_7cGNDM!k~uy6#qeNL1?zH*zhGZqX2=ICa`nAWmmk&jYy4;28Jxls#00S_ zep?;{tu4>y{gK)()wc06`4pBOcts>7hTSuzFZ!xH%7#vq<%5rz5L=Jf(-otN`L?MT zdPlv)oB78vYF}h&K7s`AhCMKaJqv9nY9cYo)cyJ%e^r4`gvEb-FaHl{SGKKKCn8`; z_tpD_mZqoK-|GZypR9(hAlJ#LYw?P5ac-r9g}8ocZcz)2n;W>u%=RQ)z9#s#xJeeVc>~amnr!m>aSu|vBua2g zqw6{xak!%~rD<%StFI#jl3&j8x#R0d&G{uggp;*h2tFF6tn6X4J7y{(rwXS=8qXF6kl79fNlpEMNK41=rizlj+0gwbqA=f#;cCupUzn6#p zbJ@SO9kA!I0Y?GcK@;N-9zge{0CZpbuRt`u*F z!yDc)w-O0+!UB|tI`EoPK$EAL%Ms~~t2KP5;G*ma|MvnDZ8Jb5>q;yUDe}S@Nn3b? z7%Qq1w-pXcQ2S2wVTflhtUZO+oQ`Y*`)AatOX-d&YRPGt$d}BYAf6E%S3{Zaff5) zj{1f*6eD~x)?RR}gv$`l$H4aIG8|Ta2u2AP-1l52MO&3@crN`>o+iNCORke%+|%JZ zf+lW!H3To;R54RLO-^`w`mvCXGT^<)CvdII!FTEloHdG>P(orRF_yuchlpuDO}aCJ zu-Hko7g(nO+mmdG>n|ut3+2OfMwAPkUiXz1v*PH%J+pX=PX9jj<0e<{RJH(+#Loc6 z)X`O@hMRl!lxhXmfmGLpX}t}fhH;cF#6Keud{62tT|%FQc^nkOoGGsc6}nP_n0}F$ zXI5PxlBw4syDVisO4ojtd!um&i#KO<4T$rt!*=>q3Cx{2tI#pMBYAB2(iVxt()Pc!P(481B8FtGp#@6}2|p zU>=(BGS5QSs$FLkabBwwDm{WBYPFt2L?Q>3zjR2yH+iajJIP_MOKu^8NqJe!75nL) zE!YRmQwY5ImcEwd8&$QGf%J^4Sk+7qS9TUOq22o7=#*<{F4_h(`cI3TP75J_vRY^? zv~vENkoPjj2)~(~rvq`G2j!f@qCTIOLd^;xKJK08_c8T;TW-vLmpxbyQXS}I_l}VK zten7u+E?M(XdM~fry5+OK}c5#0alm5+B`GtH+eNLp#BME@B_XzZomQ`U*|!|l<4}v zZ#Ii*aXpXPlAqWarl}k!bY7j%uaGi#ond-`(Q14;TUrG?UEc@vXNHni$*NX!N_qJr zk9m3?3Xx%7i^x*lRABxZy`YfqPto1rQnp=9YQ(~YXZ2z2q+4OR{aM6~PZo<>EiVxJ z3kqOU?8I_=8YpC4{c>CP?o{ZqWUfBA^vFv--^yP$tHobZ!WjDMs{mE0ctM#%#xs`` z8J#_avrr34ZXx|bZ4h?&)k^YtXauJLxCldF!Np5dy|rUF4DUJ%*Ca^;|ji_LgqXzfEuGaWCh75}(^MdOhvAwz+Tx z>_-%#ndP)IKDQ@DR!hucc;lU#a)(VR7<55I2R;<|^KHa@?RL!`3CQ$|^>iw0nc6*J z%A9@!E6p07-v!E^UuaEwnO?di*KvD$v-XY4#3JOag2%-@t)gRv zb;%0Z7j>k$SWm?Dn2;#hD2! zV9-}2mk>liQ2t79j52>DK2+XN&)PjFRtGQVR`o3~KA6>s50 z`^eo$%M?qH{{7|SMy<|EfjO7ly2M!eBFdzCmt8fCimV|kgV=-^*?YD8;}?0EBba8D zSHQOAfA=psAt;Z3r?!Jya5K`PNBDmJ zC{0OwIEK&

*qsxOCs|MYD7fp=Uf z|M_zN%}sP%v^0Pe=v1v9*Uj3PBse=gW5(gcZS-9-Ohs(50|Q{wkOeZ8WbE4iDSo*Q zUtO9eVbs-IX5WFC+YMjPFeuq4Ht|@;f~$3o_FdhJ6*nyge|Dl76Xv0Po0Vhc{HYEL z?zDp73lR(J4U$Tq_4fkYCe>udFZ=81?R`Evp@-?mZM?*a=XZ7pPdmMNT~(B#Lzyz^w1XTOVGYuD>E;>^FJR_l*A->LG;4Dl$Hy6}>E+ zimY5G!>?rlb7IB~2aM7@V$5s--l>TK^f=9UKZ{4Z`^Z1VAOgc`C zg#%F!%{;ZnG3VP^9o-T^voGOY-P1x6PguOAp56_DdNEr}yjm}o1z$N(OZeX6$*dni z_7v@&TgXz#T!|1FTD&TVEPyJ3HRec{e#He`OPA!L6exOet!Jx}hJSS;yf zS7(3pp9ZIPG0*djlQBO_nrYr@pg9DJ*JoW+)0gGy#Ll?z6}&qqnO{{DZd<2GtOgVc z=2>`ZP=%wri_&Hf*J)I_A_;Nc4~`dKbt@TSU7tiYhsX3NXZ0{YzkJHn5GeFqccnKM z-&g9*6(zNIggJW*LwQuePTl4Qo6W$4+bk6dSBz~=tg%wK702b`So?t3<0sfPk(n0oFmG}DC#!M%ULmxWG0)xYR+rLnoDd( zO!udrZ*dkx{Yl>wn0LtwkMU&qg-SjD6DSBD?e~GnDb9aXSjeH9YIJ9>ed4?*u%Ev# zMcYZ1W5<=@5v2KuV5DBpdB{p;^-hJn|1ap(!H3mHR~)s&EBsOLrNBenT`QFj29!2) zudIiLnPUxkt4)rU+FK1KwR(ih4r5kRZc`{MHsWneKZz;?>~FkyF?7~T#oYml_|~d zuyGd{5@`r9AhLU^Y68$HrT^te=qj7;A$dq`QMhKj9iJIfl5=Id5U6{L^$IvzUe&}X zbBGTYU_%(*vmxBX+$*?S0cZSSiNubUr^+-YNR1B;ku8>8aO=;~s?w*$uY5i#=Y9N4 zpX2z^BWz^;o3(jn>A}s>T#Xt^TJ)J&GG7Nw-W+1$Bdki4Ay`GlQO9U&RWwr{hLaQX&S{ty6JYe1^pu-eOl~dV z?H`l%O8rPG*pVbr%V^&Wl$p#+6d6(97Nge{p-jv=0qxH}I z6oU=9V*Y|=&^4Ddde8AJv0qgcNfvSt;$o=Kq22!NVxlYOgx0ct*?Wl;d3-fNpdVU> zvdyJxw?{T%of5{%h)xYVDb#%YSl34rNO~6}DPo%l_!?&Pbc2H^*q2`!LlHzDHgIT; z;!rGJ7?jd=%BRJ#sf%4XkxocePyd;*i0W6cHyW9C=Qu~6eXulT2=W}VKlwR{Eochs zuBAWYcDERMk0!f%<)kl-l-MYWy~# zoavr;Fl-&(`>OAa3luPQVMIy!PqSB#|wew!M?4yQg>=(5{*wv}B&8X=Uu zfuAl-GrrG`c#{LUj@5=uH^Y!mN@hzJDc7YcfqVMjAx0}e>!*7=0W22)!5*MB2Hcw3 z-&@ftzExJiD%uznU)A`~r$X{rFEp)2q zd@qD{$Vj-D7@{(KUfeq=?)pM))Q^yhZqSEm5vS;`Ta>f+&Z^hscB_S;o2TZOMW|nj zpIS_?{NcNsVWOZ~ejEdO%odSOr|)BjUk{!_69H6mjHt`M3H1oTAnNwb%bI~rLz!`9 zYQ5O+B?o%wdsw{61jAsUH2Md4^Swo)$l7a9KXn(Jb6uH_8?CeTagm^%sp78Xw}W19 zASo8S=~XTE(d>*+o%0B#XWLu?u9~ocI0K)`wH1^Vv_@xU*O_FVcDisoZk15$y9Hb2 zR11Y<%FE`d#b>|LUZ%JV_6?r|-NjgX)3_&J6z+w5f%+%+)~7fr%-)Vew;fQdcAfY) z$<^|abfK;MP?WK}iJGv;Aj=T?bnI}A*1X31xWVX&`1P*fj^6JdnJEx*tC(7UzWgEr zq3`(`erV!#st*l&`gvr9jhcdnPd`M+x6dVnvcM^FYTV_`Lp6Sx*xt^VLOIKnAL%a* z=k!|@w!LVsNz0CY*^%y1IE*v#4C!H1Z{vJxC3I-!M`o~E-jVm9~FFr ze3mhkLR9mQJa5cfN=JqHP1E-{smAnIf)4@ZGWMUTT8cE$52$o{C%yp(euPRia9Bsm zY`bTlX~c@DQs8_Z-Tc@c9{CW}^HH#GNGqMCJFs<*TqAyPK!uDxl#vGaPw8t>GhdVW zJePVWiXQnWE^Z18EPrV#2hG>Hd4n`u`xo+|>tngGf^EO07YHeHiW7z#+IEyf)Z()u zW$`gN$j9^>dt8Y$A!?CgsNx9^?`G9^Cs~@oJd`l54rU=iDxS8jY(*B*O#df~T@J&D zx}&7t8@5>GTWL`bHR8Or*JOg-Z*jY{ID~r~KgIp=LasnII`Ubbw1Wa>CUwR5hdOkN zY(8qI?<(^fwRX)*AgCHJ@-TRgYRjGraj@lky4$`uo%&45zTV)QlV01{!^stys_B!J ztFy}`SCy&towmmBjWw$+MzoX=^hb%q8=_`UnowGWxLU#oar-a7Job#~NvIp^Rf#uP zGt%WYcRJX9AZp)&l_xCt%&DI#jg@sBzw)u#&W<+EwWZVq@eX_bqh*R%)zxXss;Z3d zn~vOAAbZwq$(gXcA3wPoqnH*(Mq;QIs}|?_FnC@%MD$HiZs1>s4b+aXz4%NoyA@9h zaS4n6a?_)Pb!*Kvep*JcouB?k=3v<*_Zd~UXLv+sOiWDrY`IUT{8Z^+om@gFHuyZ^ z4~8v05q!YaM&#GY1z|u@kxd?7W@V(%`))hl*q^7Dhl@?57=96x47sW^rNnzroMRT6 zUaax-4S|h3Zj&)WZ)XGeAfGF&etUy0!f zYoGN}ySB|%1}GE6vMRi#qp5}fw6 z-F)|6;fV>727l18AJUxuK>?~6R3 ziFbspm3~7vLQhL201Z$$YpXS+VVfBM{va}QWTt0&ujX8e)-IqsX`&68cg-SwVoFpfkdhl1}E z07H5d42WF3Lj)KY(5)wyh+!LI(>K6=wfNZWs8eqCVnm^TmJH#%9IiMoepcmPvaNhx zDoh@xWeK(%eS4d6jjNLM(TB(ekL@Itsx@BSl~MN}h1r_nTgH-w-!Cv4ivSoSN>z-w zXtCiR=InLSQaR_9t!ymfgJEOU*REOZ+blHF(Va6B3)8!y%>6J-{4*B_4j{ zeo>6XFz%c(809!EP~yr%Su?3u(9s zyjA_v7IeXwBz**?qp)DWa)da1Ag<$e+6)}FcPpKlKQb>e)=CQIDp#i2$_&CB8^Cm> z1X~s~By>o%$$(ZKggIARCx!X*u`(OEu_f#NHW$1sBFl`$`gSs?)x6&MtLW&`9e}sh zBz#}<>?kx#h>8`)jR2@~@AxjQr%^VA|X3 zOYA~PQpLFAy)zbjS~v+EdmA&BFmvjgTuGZ<&qT({2fWgE!DnFmS|x7Wh+eA04WQ{R zHhlGzCw)@;Ih(CS`s4Y(AmKv5ltLmr5c*aYXv$AmfUQnB_`XoaDm>v?2&=G%-?AV5 zH^VVKkK+CMej9aC?hlPVEV}fX89bY~8EosZ;(+$^YzfIGv&ZTfODWDz7Urw)EGYyY z;pR||ACj*A1(ACPO)QO3py@B6|4PQR0yGUGmroe)+r|4?K++J`pnWSMsCDe{H}J*G zoVYCp;M2TCZ(v4xQ$m)H5n7xW0=5G1<>?zfZN?%)fk6wP@Z;Pi`|{!!JA33usIjV& zZTfHr>?89gwAbA~or?l9-l}O7Vy#7Bjjf2}y&hg{k9vj!z>nucjCEmRBkUBYdn)xK z$fRh>Tqr>qT>dd|QpP54%y;}(qs;Uj^g(^3H@-CM^I_}nOl_Yl;k*Pn^gjHxNUQOW z+;hQ^Urj<9?T&9Y(zj~f@r?8Kg>{0V9j$)~EvFfHFN8#2Gds`19PpqGt-YE)W$f2B z>%+@41>##ZCeEz8CxK(e;jl-aU@r`uf#FKduFQk6^!DHha{3AIP{yvzsGfJ*;AgLc z{uVHhIi;^P=&FQ1{z9&ouHOW`QqSns45XNHG@GQjodudelvV9 z@@i?%%!{Z|LLdJY*0;X%+f%(hXY6Mewx1I^b(48jB;VZcm>-VH%D?^>m1Q7Q#Sr*# zpEigBpYa~Jq9qsQ(Jl9+Zd~2;B@sW;7^ltNDh|O;yzhycJMQLUrH!Xs5ji#mSdTug z`9A-z)?5BANKOWWe#Il1?hWPVZ>cOnWS68_@gL-VK9*^JWP<;iHZQ%v&fCG^8GRhy zLcI~G$;WX!zHbyb4Q#edIHKF9Qb~QLs+k#s6Fz*nAt5y_{7Bt?T|v6!*C5kS^}-&& za7hpn&z_atS-ALgqoG0Kh1(Z8+*|>#f842xw%Oj*C3Ngw_dTvUO5omFdHLwTDA2s3 z>~pRa`J>kokpV*88nxr9qbpo865ZhVdQ0CE?ekuv;6uSB4XXn41pQgZF5`fG5XK00lO*xO676?7 z;})z`6h{dO8Ttp@{2Shi5>7TrcGKO<}{ISP=C-hbU}yo8}A8q&aEzkH)f z3|mW6`3q83zAtX`*UqrD;wW?@5S}9bKen?5z;+hE-sSIQ%4dH;7I~j*%`BKu99qCP zco>x*-+XD%n!_1ls60uYJ1(1<{E1H{F5)d9^T5^m*L(1d4$MbML|p}tf&ijuB;fBd zcKA5N$Ev>z2I!h|wv$O5uX>4B$rb-s0z^LVu8ZN%Ps#|7nAB0lFv z`Q0R^kAD_kFu{XrsU5z#B903PzJe|bFC@T0F8Bah@)X8m<>35E=fpbI-0!Z4Tu-iH zTamx7)t)EyBaFr56!ho8R%oktm-|M7kGoTVW1#yWVRZ#jT7`&AHok0L(yAIERF{PL)yE$v%4oTCwyAZ+|}Y{F1-G z&zsV|XAAfkB@K8c+pI!oT>;CbeyqD=OM;2I8h)GQe=0^CRE8541H#uaV;3vynKB7u z|0tvyQ*9@#g_`xhz;qWX&=cU8=S}$0hnZqOYr!o-?)^{1AkBY(jDIVWKJEvkEU=%I zO#F`ydct$oF#d{U_2 zR?5h0!;Og%l9*ys%ff$4RnYo32l3$8bw+0C4Jp}afb zPRw0N&!H59U}-{}DJu4df#@3#>_NBXSMw~3 zDzhnCC?W~1A$2Gza2ZpHvC-Q*ETY7ME1+3R{FNv-`fD3%fxWY!khIu!x`(34Drvng z5&s3Xm&9wH1&k3S^`vg><*TK>Uk}Af61mVf5zZuRlb)JN!%GC(WrNi1*sDR#(UcZr zQAg~*H&A3)p2$8 z)(BLIyB3n%E%-$F@PbO{cNOciABTS?IeJ+L^9_!H@{70S9o=)>i8 zY1%_^^O-9T)=m<5I^(w^`9knj8Gw-ysB&~IdO%x5Gmb?Z(1eceAtL*p-b8E0YfJeV zKAv{hgm3RGO|QG%gj0zBf_!68pD*nioNlNGCEY1uq_*Itx@xQ$x%@=>fHdk5NqdVET`9X3t1qn* zA;`(^*J+soDqmVJ2OxXW^i5bbp|fMtj2ZY|f*WRQrpOLh-iIi-4<+p8D$zCE8TgHObuSUTLkNyZ)S)|sQ{!t|>4#Xn#178Vy3?VNh|@0lUx zTG!P|k6RZL_>qFVr5)SegeKIfV=G8>m`#R?i`zd-Rq@iMFEsSzT90fk(wqHTSHy>q-UN8*^?r8Lb z1*mI9rMCo&-}M89@t`{xhFFEg^Fq$!5v_UDOx8GH;LzPvQ2Q}r-dd(LV^Eq=d&)b4 zzPfcShJRo*NDK*7YrDVK$6{P@F&!y;%nxCQ4yi?(vM% zT5NT}s)YV26aqdc$Z=K+Aj~{hlFt;i=TfLAt4~GbO%z=4zFTflxYP;b{|kDTL3Nmm zcAoC&Lesyr*Kn8^GpN6lXCFp!!8YG#-WYe2xCv{_9&JKT`B)v(dlpzkyA6B`(HaB+ zo3d~vOSh3GcIdCxKRNU^-#v;7Uc9R`XH@c*B%$|0%#2$DjsgXwHy&z?LRygh%Hq1O z>G_ulGr+DA#ihK#GQ*dVFK{P!0No3Nz0eHrZFQgUR$`qI9RZ5rVe8VpWk}j&Y;r`Z zmVo5pib|t9H~28j^>QoOjpS$Lxzyra(M(FRX>t6S*Hr7jr4oFQVtn;{SNV!9*KS8e zPhYIv7jEn&Ax9Qk_o)Y5G4<{7prmEIPIdQ;Vaww&lugDRi{$Ts>mSpo?HL%Cj9F~7 zn;00g*-6uPewU|1ieH3XJh(;OQq#C>;_dECh1f~T>tEj!`D0o-pzM8Z+%9v0$8a)w zJn2^Jf*gf0;rvLq_;;^1SR!*LBZP%6C<={&=cH?&2k;!?v>`C)T?B_~jq+xjTO%3# z`UFp7a`kwqrKgWez7_92)IKORi6IN7q6JmPiI%Gqv<-vI^VDK>LnRk3o+O_T;LrBR zl6Ee9^a-Okm}HtZ?x&-CjFV$5e)_*E{Gzv ztSBR`3Qnxav3D7#)qnqz#eY!yZP4X;-oB3#SvV+N?W2Z5OvZe##}>^>1D+)l2Gvz`{XRDJU>`dRoNEg-8NphLZLl77YYOOYb@{ToeEn#jok zuXl>*r%mh7=8(T2DeRJ(&??a~+jTK|!vXaY;f!@i+DAg6Or~1)FO&r`Rpu=+l*r^P zGw5gzQX>~@Vp30&>6yh!+ywYfQ|rT>FC@SCx)KxKE3K^*S{OfjJttTm(ae)2h>)v= z9OgsgMQWZ>%iuu+eKcs|LnNd}rVCpG&H+!TMBLUxnNH;7mt*l@_-5p$Ph7^HG^h4i zmWwcIb`v7|1YNUQxEPtJ2EP4l)dG89!FFZdqgG*XF`(OcTQRv4{0i;C>hcZPU zc<&#q%$aU4dtrGJidj(p_#H*|}75WW8uRz@OG z&Hawt0Jc8lf;1398>L<9^PZFRq4#%T$&)?DQw|2l8z^9g0bBpk+19mOcwl+*I=o`n~M zW_oXb0fgSRvQ(Iw@-6TB;cfyPwdsP6R~}Nn*>3y`a#IcUesZ_nn)J;_@>tYhMqtVa znmiHqD;w2kql@Z~M>iB3eClQgvMsf|yG>`t64EOU;XR5>E=6iIX=-oaW3U>c=F4KCaJ;^C5V z^;fJ{wq_7niaQHSV*N-zA4aPaaQZ1QGV9Y~+=Ru8vsv!7H~FGetkTKLwkh=$uN$F5 zX{a(cX&wM>c2@@$>vEk{!bxa$OE1$h$R6aHycW{ke{yvIY$>C%?j*jZqiFLh9<1Jbp7B7JsC-fJ;?#M3TKN6E1>%IEgBFhE?XC-#F4rlU0){nFw`SmrqVYA35Z$tYl8dj?Ur}*N zPtk@G;H2zD*th+V>Bw=axcrvq1aq{p>gMh#`fGm2!`Q`cC-{xA&mZ zXzRhxiqzkIFCpv|*07~WNpx_Mjh){O$N3iZ)nnz|nnvR-l9=<3<=NK7rh2>F5;Px6 zKY{nG%I|PouO7Awy*Vl$d7g93ccyCS zR*WBCJMj;!nh_SKUb1FK3}7vIrQ)G0pY zy-r&b3>owmjzU_HqO&6<7lbm5x2;DF#VdNK40viU1aUmL^Yp&06Ic8NiIq`$c0sdp z;s$^xEfx;C<-$lWl-Gg>TMvFfrplOleMvJwD1#|!~=0Rr6{ zpV?FG9CwQXm@#0qFW@W{Rk1xlH$8qK^@Dd|M3FPuB=w_E>jWGn4n_Od6t^}b)gC+q z2!l*$803bHcIeR$xRTp{B$*#}RIf?0_=Y4Rg(t}#ypjQQH>?mrSd8hmRHa3qGUHJ(H= zikuBOj34JN`1GP0czYok9dXd@DsNo;>u7m1>C99&kA8F`22yoFh{tJ1VMNJG1ORvS zf`nH-oh^hx-s@&LeT||-j+0gXbDm?*!u^^G*rNoi-TuVnN+p*u_4&ceCCKae#&}P$ zw>|g$JSEIlG3GCbedlhj#*EHfuD{xA&k4X^?pIP-e1_&Cy5TFDvH!b)&e5)jeLzQ~ zp;r=}lirhSr0^DC`bfOz>Mi=5D+j34EeEi+pgP%4Z@^_A>W!NL1}(RlU1+2Z9C~&P zjh6}g{rScz?!8-RfLQd=r7uh_-t?XQ2LPG<;`R00oa9BD)9K|}=(Qx3d$GA{O_T^C z+*>C*PP@`3Q!h&hO>IfYFjFsh#kV0idtThODSK6XRMNKT)xe(H2Jg3BUR4OQ>La|- z(b(I#QfwV2Tx2%<#1bR#xFIUY_X-hhnf#`-`e8EQS~&dSJB-%S?B}%Er=(I^cxiWQ zbk0mKm3A^V%vr>_7#VSt4TOlSXiv>CH*;w{$bN~D zx4r;ms-m+%r-O^AlnY=4|3iohw56V$LA;oL#a&uelS7Ic00!}1Ro&(0=-!x}xqvtk z;QvQGkn;$cs$~#^A8rD+W!B_aJXKaxFOsCym*t6hTg3T3nd%TrVvzhVh`-3Ra?18q zexSv1551j+rJRzXg%l>LYw1 zOhc0gZDQFq(1BVPmCQN8fO_Nun}*B^e!%Nx&=y%Np@Z7 z1U5URWCNx3w}bv8hp0Dd)}*2uFSB*I%bGa~=wCM3Qc3teO62Cm7?0Cb>l79g61J&^ zXeurb)fy?sSlav7^Y9#;r;vu89GZ8tQg4T-3cWS=5!2Cj2bEe(Q&BjFBQ8 zmI~kvnSO9sW+vtnRFMQkE(v_8DEQ(Wl1BUx!V2w-Gk})wT_qa4IKhiz_zOzQinpl@ z=XD5rkSy6Yv}x~-c5rO3THhI~$fe@?x`NO!h17Bi^ML1XI}AP5KB6s`sFUyg(-JFZ35h3jSZzT^u#C}-@Il}o zqLY1b1(bWmn{kAd6=tkMZP}-U4}^sVoGR)b8+B0f(=xovqj%{Pw<5k(pL*W;7;$fu zuwrzw8b^CKtwd=a<09QMw29|}^3$VVXPeg5)t|0&G)nM1lr`d>ceKZop)?D8c20#M zC+r%`Tk_Oj=5i|L-C%tYHl`H?ye@_#kb}%f!)El91k%IW-1rq^tPw)Zai=ofstsst zxHLur+HylMb52Vn-fc`6==1w;rf8r#yjeCJwsLBtu9$T=JClt>{(?YrLrAJlNqS51 z5jSV1Yw0Ya2@3iB6NW})4qzz!7X;4g@e_cY9s{~Iee+oFm)6fY(SxuZYZx(PEUtF~ z>#oul#Yp!TWLmb&I+xggnddNr7|>cr>k|U!gCM$QtUjPDumCmv7G;$^d!WdCpq!*B zG&cRVENBGD0@wfsuSwocB^sk=GIsRI!adZj{UbkC4pB^EZAGs7{1ilgA1|E8t0Dqg zNEx)zFR|teXd2_P)6Mu64(0Yl>F{04+v^6xi)O#!eo^!7wR#0P4_}{5b!AfkHurd< zy9b;pL6jYpNes9n0q492`(l=(W7ec+rgncpKEC;XLB`r&YTuxD=x)fpdNobG5fkIG zScO=-EbXIZBCkr&`-jjn-ff^$dv+y%o|J)*+x}vz8N7(T`=@@26PBmt%_<*vv@;cv zp{;et`g;2>XtCuNBrIc#IYc()+HMo*@ZN>oy}Wv>#@sBBCGM0v=Yzj~qt3R<_PBM6 z@*8^ZC%X2hb5r}+4Rqx_EI<`zZb^qWU`yE}0Pjg=iz)CQYx<+uHVv=L?M?~}+jMxm zr<;^>8-Wjkz`Uq%eLvza=%EWbIAvqkxU8%R*5rU@m5kO;?Y|QnA71#~WLWqX1q@s+ z;A6{JV6e!-x*^{KU;1B+&j9}$ZpfCTz)+K6Oa5Y1e&=2CH}{|+q2E-BcjTpp$1lOx z6N<-|cixKCfsQhD4kG!2`?QebT} zFuAYS6na$8XMe{5{PvRH=&|}EX2YwPkTb&7qB+>`fZ;czw50MVVLQydexTZ0E+76W zKu##G+PGs0o4~h$F+t~i^LIV)R`8L6MUQ%yT_#YtLT6i<84 zK{Ij!;e@#>ugmBzwPfv=E+5V5)=vQ^E_MZDXs-O#c|Wkyvz8<=9yuB7Niq?y-HTfF ze*9~PKL<4l2UK$HOAy#i2r#95(Sl4xnz4ZyEuiEUrs!R?#%|P#{(~suvx>3Rzo4E> zSO}8rw$;io_XcegX(mE9;JTiPu_MQBYS{EDV0%MdXNBCh-qZ968zBLqDk2uE%$x#& zUb)+;H#d7~Xl!Dr&mmvp_ZMW7a%ZtuY)~N&lu@#$XqS-7vqh+mmutGQ&73rKO=I+j z6xh4~>Tm=4zThV8ZOI~}`K0xBqz87X(|^XJlWY)Bw8z=U7S2j5HLuJ4m}XsU$EnNm zm9gc6Q~H`p*j3qkKuxW3bJ>WH9f#sr!{~ItD@XWDcb)pJi!tDfLP#e$())mR?|2Rt zlau!Nhwg9{9d6z{_=*f5DoDWR#{+&77PK->k-94j*gkb$=upoc_*~?TAOTXy>iZfe4sBD2 zHgF#aWfL3mr6(_+o)kr0B@o;d=XD4DhB~+k0sM`a2X_$)2lYoL-fo4>-Nr$tOe55m zx?jg)9;wuDxl7iAx6btxvks&En+>?{1($ z9_2B4^V@TdI&X%AT*dTTtdb3zEm;w#{80_5KQa}~c9>9f8vk7YyVaUoV4;{a|NC%U zEU{vH$nD}BrBbNKoBTANAV6KV=ehV|A+QXc{|jO$9BuVrhNmpbW1$U}Z`k<$aS!dS zV5z?+zRP8_wcy0X{M59nXP5V8l~@7&;7hu-@2N8Tn$(Cb)U6}UAfoBSwVvEW@Vth` z#IcUH`;Gnj{RH#l1owR z-r{{C9n-Gm<_UB!D|r^CFL%msj8?z?{`k>ZjBE}5xW9*bm*avn(UUq&MPlS@(M5UC zmq$eu}XjYK)yuYv^)!z*?0j%J{tj@wsNsz3j$ zrLT@_>i_;9tw;zCDW#=B>1NdE9wi_RGLVuKgb|X`&FF3rr4d1p?v0Q}x|PoD{kxy< zpai%JkNRdi*9VYjVG$?dtQjR|B~UPxaecfC9M&sHuI6JL6tdvXpjS4u@?mm+31^== z5I_CUkJre!~plDHbPy2uu>BWA#4%;4}^c6cY`bM0-o~x z>eU{_uOs0AG~XDBv?uTP`lv|gB}0fWCJ3Bx={c_|t%9sh;<^(oB(YQ;af2=_1HFic z=6-I!q}<-N1bnV7P;u43*GpD=Pq=Z;7+tS$lG~l2na|M-(ZP1w`PpJv`z)NvbCeEo z4vRquAwyZF)uYBdnI)heE$M0W75wISf~wU`@*;$Tpsnw9I9n`Ku!D zL?S?5n4Q+eKXPd|Q14CC%fWD?TC;^4=>vI;zMF{1i0L2?Xn$RfbPiNl$Mtmb^dMc* zeti96VzOwu*h8nR;b-(}A{N(U8sj`JoT+3(i(B3Q!qCnm$8L>ECW_6t`dR?g`rYj~ zxcHnsq7nZ>lD^Q~H-QL%Ud1aA42khuPgEQEv)F5eJ1husa-tuSM|Ut(_HMgI^1dHQ zjr^)Hn-sLgW3)iB5mRgX)GA@mEZyBMmRM1SygTXLZd8 z0ggzUDE(8Lndgs`fdND=^l)=K0I$@h*+14Xu&V@@0ac^X=2 za{o6q{y$_=Z9?eqa&QB6gmDjN3C zpFv%@s!vmi2~`?z^T$($NvH8HKr9ZR01D7exRPfaMtVu{PXYs;I(QNe%w+XSPvfYqZAo^GR(s}n&U3o} z92O;eG_&v@Xi*N4S&ODUFA>OB|2WDUAK3Vo(FeP8VgNxEOKMHtwF7Z8L=I9|7d3FC z_tih<PC&kbei%Mqh@d0qf+*ocTeufr&llhv4aMjRq?WqPTCNuI#MuWjSx&kw-;&SfY503~Jy=xRq0 z%qB0Ks$PEh#1O8P#`Y*$=tw_FTmH;Q%h8m9XX?lEB}K9Iyh})Ey7s7x}u`t0beCgJfC9lH{^S7t<1S=dMlY5HP$#paDwE^;HEPZ?M+-t*?Tp7MHX zdyhgyVZG2ClW4>jCPts_o1hGQU@67!zPy*ry}h+7#5!xp&nLXlF|@qfgsCP-P?QBK z#8^-98z1k_0bsBgG6^F~sU$g^c??JvoXAhD)u$N0YmNdGRpg!2Ygpe=3i)O265?=% z-qcwGeJ6vhIlz$05L8+r3rkMCtL~Not-)t)$EKga%}ysh_*?|=)=zbvpQ0>;_WpI{ zie*I|r*3TFwKAlZzID@$kGrMotNb)0-7m;oDy|G3_N zps#aTrj*`nbDlqPGFavc>peoN1YVB@6He@DWf(W{oT-isoUoE_bANW0^kWk_C-#^e zB(V9z-yfrQ40oY1_oeax6F}65B8|Qz2Di+P29`2z824Q?nAe;zNQoCJya^d1&BUW- zh^QJ5l9L&rV#}cT{!z6&Y{`J~ixrvQM12##@%t1bCQbZtu8|P@ynT3-O1erZ@$XP= z$fxxjSo=c}2llZ4Kuq+I+f7KCq#=ugwcc*s6XhmY?2X2K6+DM}V^Tj*NgzZT!`)!c zx)UETlWrYL)?^`{=pXM$ONl`bi9C5pw5M-XF;S~*DbXj8WKt(mi7C+8cGkL{Kz-wt zm@S{U|EGg_Fq;|~>Qwr_n9XC4@n&W83Usn+8{$9}Nx#&-c*Vp|4nl^vY3}}VC{P5YGNC_JlChokM-2Qo5kYx)8SX%d z9pZa8?1aN5>`WL`=ma&5<+<{_#?g7S4~V6@D{$?>YlD(svEDRF$6c9Ys>JbOjNu@0 z$xAj2jtEN&aA9SUmU{X;FDvf%6SKI#A-u>d%j0(16MBXFDxr?CzYEp&kFG&nEBbjI z2{Unzbjp~27$BA`hx5Q>S&ZtO1-8?HPLBO1o_c&H!S=fpJNeX018z21-HzEebK!&E zyS6MaCoz*hvdhMa@cbDk=OVPo9&A)6?3ho^>R*Uv^nCXBTYO)P0+K0Ga_-Z0wi{c& z**C>y>#7_l-F3v1E^VggFUjlNB0F0FVdI!rpQFu{)C5jtMju_tK+>}ZVT-(4o>*DF z%f*V@KV#mO|4{fr)WSlnI$UiqXIuUY+hz~CS7K&Ce@L}Qp17YjGsP(Vj;(qT^H1A{lUWpYs zumC5xrjB9u)UXDnUpCrG-^;?xed8kU^^JiUga13r$CuG-j(!99^?NufPV&Fu-E5cK;XgfqE8b6qBGNlYHcgKrR-xJ;q6R$JV$_z|@rQ@_OJe1+Lsn5kw`e*l- zzkbMabnQDMhfvQ_<-jrc>`PDRmrSrtdW_-|+_=U@T|8pqP{ZzO_QLE^VGW^@weyPT z+v#u57*z3As%Z@{OD&Uzi3922<&D{5KIUDjpmNA?Vx`;n!@x*zcUlRQQ3&TkcvT4E z{krocDnz18dZJt^SMdGEM<=r~xQ5Teh$8Dj-N6HmDTHgE0${UTV;^pc%C z=REAF*o7xH(z@7Qt2wSQIMRiEINybR!56npUQg1)n0WeK-n2Q%9%X@bm~okU)p{-E z;Dd_6%BU*zlFFmQUg;7$6`B#of44{*Y4U!?Q!!xd_$eZ>hLR_suQa>QztVo8e{IV% z$6accqL%xkQm=Jusot5MdYAv#^!_LxDdWofSBxj@K;vXR5F`s?PA7S0WT_LbQ!GyY z1HHrp7(S{oNIXMVu~_8KmpU`gf5_}TD93`)6Si+uB=xCO5@h*rHR^AbvYDTb79N9U7-lh= z=%OOw(>bP`(dn5_N6Ni(x%ZHG#qcw);xsmDiPV3kRm%9D7EE}chi3_+20DK<8+a-$ z>zi#MW6o6%Lr_U6zo$#0G|2i+kKo5L14m` zUK;p4^O;br8|gliN5x`}{~{3A=c}In+b$ErW;v|>BeFcH`yCS(Ec{;Oi|4n<&k8d~ ze;MqYsd+;l2!2_kM0EUxiNrS&iO#y5w+&!N-uUT#N5W`i!a1~MLbTtE3)G6ZOQ6#_ z?lXuHd7S+f#kmY?7&d_2_-1Tn0|V;+#jnbOcJC_#I4>C+Gu~g@KR1lLC(L;+qk2yY z0T&oGGetEbRxQW#N_CWpcHt%!cfZIh%hErc>#jn_o>B&-5At?`pz!_wac?v`3!6C%=Z z87Mji5g-DoVa4gEK#G*2Nliii{SRV=2J6i-LeH441W^Vb;?2nI0)0h3TyhiIaQJT! zv3mh5xCA%G9UPj+1zO`;gF&TXG)1|M{=|;uVOkFpD9W#((o7fD=eAlfec~zFhMziD zi)ZDez1KkDT9uUF@o^jCQfE*7QNkahlaeuUI$MSs&num6?xTjA2*n^nE&ti@FPplo zkJhykzyI@8tSR8X(Z_w~YL-G4pR?Iuz;)8gI=}o?Ii@=BA zSSl-zy)g5sE2kmfxpPr}v`#|`HT5Hl$Up0{bBvu`*fEQbgo_~;b$7jyVtR-y$ zz9X2VRcr6!*{sOjC4bUc(`Z1@B$0kB)Y6C1VWOhKtvi0Va(CV$a4%GARsg6EJ-`|^ zzL_G~JIBdf18A47JFa@GfBP;|Opui!jPrmOA!eE(&l+;QS8p=aL{l`yN9`>1;p4`E zx>}PRg-AJmn1$_$`swU)!>m4s=^HLOM^BAC zcsY%Jzs0oCMsD@F!EZ|rm?(}c9Vf}Loerj->HQ379=9HZB;C%&nzlw_3>-tNetOP) zr{xm+tc;$UWzle-Q~?4p6;G(UsVI&L}R_l zB;$mBv(LP<0yHPSk7Yx?BGe1TVrp0h71<*9LT=|W;*^z-3B=96dHb7Jd>lO&JNaOH z*J?m+BvD(-fAQCaWL(+T7G#_z|B6>?uGG(JN7Vuni%M3bdOBZcsyIa`V=}ED!E_9` z^1xDbHT^^$yc@}PYLFre^q;Ziyt=P4E9<2zK?TfK(x%X6v<=LL262wPcQ#c(Ob?RP~L zb(^hfv{!wo$4=~M?1)_noeP^lYo|M^DssD8S4&^!@6%&%in>jg)k%%c=Gs{INhRJY z-aFq+S@G2g8Q0FmjKxhSoso|>xkQd2(}hjt`yjYU@EO9WSO4oMp|Oc2<}AR&O~D0> z!Zl&EPU`PsE5b8_iS!+xKqZXBUi)S|UI@1T`1G!d@O8qMXUHz;6iorpDH`kFy7P~5 zde(QosKv*dpwz9CHK*YoQR)~wuiG>;!Iy2))&4+`v2wR7LQ4%$#CF)|7MupEU2t@& zYrSgHO#qz$AuXoke*|4RR^bb&>oq_r_PzQ4$r=8c>CAO>M7NfttmVIoq4I7|=367d z-&0RQENcIC+bVWK&QAfaFAF0gk$nWo879&P$T?oh<{!pv`86r>1 zjx9|=3Pp(tp4z-!D~!49^2ZQKkTs*JtIWQ3W9DUnogJekDzp2}pg(h&6So1DH;p3m zH|R$~6&008$P3FT4xQd}ebRTdqq&5n39py2WWr5||0b9Ypb{2diqMZg5R3w7kFUk~ zb0XyxOOG@j9zPsN5ARYhFIs47{`@YPd`0jk0ekiykt+cQcXa(k{QSaaci&~Xm#$it zR;ivKuf-`tz{L8fPb9tUw*U$WJGep7;+WdpBRskywoo!L_efWOyX#479OPEHi)ou zIo04PsdfSNQ343yB(D&n1D?{zbHq84n7DDgq%PYELFIm3^U5XvAL$%Y zNg$a>5%yk6VKv(Fq&(W;xyu>$F!(=^G^|ftSAgIr-gc<Z1x9cw}`JOg- zEpT_5uy&Nh4+&HlzH|3HR>5a1RZwVI0b!KkT@JxHHv&7DB|~QFrzKRX@-%m)bC@}6 zjFTRHdgde1ZdgBNagbR_aZTiM$4HsRoAj_=J2+K&LfyJnx9 zFYZ~yIJMM^EM7YT-Z%1qI)ff?Y@r%>!#)(;&r|>J=YJrzFl=krnJExXUV66wTaEY9 zAy@RYMR;tDqp7#Fml%QebDb#37ZZ>7o1a|@dfv_j`{ldO9l!d8xE-@i+zea`DvBB= z4H(Rztq;!&S+B6wQg&$ZN~Y{fO5A+>VZA{t1w>o6J8n0bM@ju^*qx}@^Zrarnq6k! zs*e*ur&}PBPdW!NKHqH*j1@gR8{t)@nqd*g_ofx63NNU?1zQ>cN}4PB zy#4p6XSDPo1D?hKf;wuQ*SSBR`um5>m#07qfd9j`H#xnNX)w==QK#~27CFeTIiY5V zCPo|naaw*Jq5g5=OYL$7>3Qo{y7QR%cdea z$9=Se_iCl1ohlA;84q&1D+xB4Lc((-kRSI!hMn+Y_+^f~3o!;% z^tXsqD#x&)vzL^9+6LNdDx$<3cIwdAfnvIaTmFnICxlF!vmL{ukrYPkwVk!L;&DOGLpgZ{l zhOC7s>PL)f%=Xfa!pUpjbU_4aqG>{%4IEVMWgGK6iEGT!B$2bZ-EBKa=`(}xy(-zN zyl?KHoUK)|E(|Up%R9N}dmm2@USl0EO3#RDUkT%H%;s@0H07{?KjfX`l(1iwKMZ$> zlx_S2=*n@*UR+h;XZ?Mxpm?BPQ!b2C|BxUO@3gks-kdxQ!M)@1EQ4ywj4{<<(6N;& zkjvv3Aqf@UADwzQQCYe(8^c8+xH4_uNwfpBExO8vofUgv_sXQJBHefgisH+PPxCA< z2DY&4d5|Y7A?CgdV#zG}zua}KuhAcy*raPa2Q=e}CJojIXh0xI994KsAAD1*%P+_~ zMJjZO;%2t?=Z%)l5&=gV(LRrAV*isdW|2p1h-Jn|@N$6{FJw)*LM8P2XV-ciU{DGW zN6hATVj@i&VvjaR&EJ*+Ww=SBw%O#m>Z|C`$;{SDzRVAidj_#T&UecfcJgLo^CF_= zNG;0$jRGwO$!FLfh)X>S@fNlmk{whh?2rAqTe;=p!^h*_c*7sr5*y=P5PtD+^=Ls% zF7R=x?-Xq(XJzTAMgvVFPd#R75xjF+S>sZ|SrwyIl97xC!f0vBRlI{fycc6HlsRGn z9#9ELDJm7iCc~~L@uExElM^R#w3rxrPfN#yCvmAPk7HaxKphqxT{el602Iy15Z~dy z&{`m6UxXi)B~t^4V0qKM`u)P`fF>~WHSmjT3!~s9a0l$)1HEs+E}n>{3w|Z7ylui) zBPUh2V{(EWf4^EF^5xe{&|@OJm_B@q>oMahlWt2a;SxC-lIx0uVL1R>h#zjoxA?M; zt`O<@+Y0KpXy2DtkvsY+S`c;6TZW><7!S}TW12vs4;X~v`UHzfK#U`eF=h9=H2tR5 zrXNT#8(Pd=owvHw^vx;YRyVyHy|Vyao>-PlsH!i*E6A${F)!SmfrJoyzrZ8bg}&fp z0^6j*;Ep)cjZ0H<=HkA+bBp~m( z=$GP(FUliw&MC^L(;=vkSae8Pw9O}rMV zkzO9FOVCRApw`)XfK?<5v}x4Tx~CS~oTRKGP`_n@eTh!L>zSM4-b#}0V>lRdsaN-8 z8KP$;sV1Q~bf#D{q@96LTD(TT@n`4t59{yVSIu)gR|5*eX}O{_ zaGZW7{5>YI19@Ojq&}yPm*_WI!p&3CAq{PbPi|(y9-4XJ#wtlNtpyp#Tkyi%vP{nS zD1Oo99_OK0hc=I`>0KGQp5OXbQ!*gv4qb&mi~pKagA*@(u=`LIEGi>puJkqG)y8a8 zOP~r{ZQ&jRO_Si>WV0mTEibZV{Iq>}ywIC8J&VT$2_@@&)Ju_*W88QxfHUc`in1bK ze^6O8#qZFOX4T?Lypt$?aG=%l-mvCFV0m!UqbR+45%%aPf@C&YhIxtF0Lw^I{m1B= zE56my#Eny`-HqNWRS)Jk`n!%6#zDJj7@T^l{&-yvJ6-O0AE9$vG`AeSYK#2(Z#KgQ z)bb@1;vU-gJ+nY-RA1!zPRnf$7h*YFAz^eAz45IjF6=>SD^~0#`%FZgi*nK4 zFwG33Bsg~b84wWsh$k?Kr*9KT0kckiJi-v2ReQ6LBpJFl_-g+<^i^m0P?|EIodC4R z6EsKKy2X9rvAyfbDv4DfaEck$T0(I;_0o}Wb7Kg(76T`Cw2KN*z3u@5_fQ&OvA`+{ zjA(pq3-0$Dx0*4%*EY3zX%@CW%c9>(1a&`2?Ah%?U}CIw zisDQpS0G1a0`<)!=SR;nVX_@irP-vn!M&I{*4s&HRvxWIeR{X!5l#SL;obLfBysME zB39%voR4A`)tYl`bG!r?yAw`ISUhH5K-3_=bOlZLsxqs41g8|uT5yMq42F$l@v14>s#Px zfBp*VHbj0QCz15$rY&pyr*lU6g|Y%g)@W85y}3&16rG|NLhx)k%ruzW9D_E=;ENDx z@|c{Zn!kyIKVdg|Td`n{r)8_Fd1tUuYyKun@b}HCNf+|Kpd-(SQaVNA#WdxIZj8X| zGLIEDUWan{x znfXYdyMtH0O#{fIk)h|io;~lXl*ca5><{{Oj>xx6IPII*7#_(qhb^k0u44XI3B0-S zlWP|sG_kUOf7jJu8LdjK*eF9&Q|vKhB;jkw`Fe`u;kax(JkRQ)wi@mYV47jGDSj6Pik~%rS~VC05M}jhKiqB|5V%}FfOA&=%8{ShKiAWl>bjjq z3;9N2@#*0HaDRRQ&6S2h2nm{G7YL7r!r|ebtaFG4dv`(j|R*ytI8SRY?`aQX3YP-+$lOClYpjx z>~urOVfo1Y&)T!uufiIPhZwT))qrD@pSG>1I_!11y;#$(ZBlFPk5~C;qF789o|;Pf z)KMW&dJXdm!V}KTI)hSdM3sm6WWM7vo){wm&~m)D+&(6h6eCMr)T8 z@PqY`LUPCajwNH@2+0^ZINXGY*u9KcIbA%$0;+fHq1~;?1vR8jYPoG-%Rgow*g<8C zwYs$tGhG1R?SF7t7o z_XFp}M}lHY&53ty#E^mWLWP2!;NnRVSg$Ug%0hm1AI&8}!a2tr>Pmi;A zo)_HQJH3wSffk&DK=GUsAz+gKq$D5v=|_cvG@7}vSa^BB-e|A8v;_RNnzujjJg^~YJ4 zS^o6qkvn3FZs8=B-xnWTzMXUQZ8;&hC7BsA9aUZbX#eNH4D!G!L-+40m>}4E^UlLk z%A#pD%VTThC-1vn_aURf35$pImC`Efk6dv3;XxB3m?e*!o^f0{{j8sXUCYl08AIyR zzeB5`)yrR@R*Ao^UP_pp(tD)-T5h z_JoA&_ootTUTerp2*|Hls_Tg=d%UxTR0Soy$(6$k`@IgCK_*=B;iQ#j z`PCMKf^y7LUt0j0t&o)a{c+Ad93R6wsrZzxbsTK!R)j8Mmg~0snyw+{JsA||{c?=v zgLR?nq&K8(iJX|p`)5Yb45zs}CDAXmZy|g%CebfC~eN)fl zjPtQg2JvP<#qNUpvUvb&+KSou57b_M3_xtdg8-A-#2Ss*(@|{=M*V=ZV)jY zAmrz95pS%D%rB_hp@?p@#DE@Mo&dl2mPM!y9^sz+r{ zIp%x)S>Z(|w>M_C7;r;VgNkUs2_KE;=u33t^)LrdrP*2}tkc%!($q{f`@2{d;&;W=C3e`}D6S z8l1ms{t$3=G|u0NndNuJor?P#$L}CLU=NDZxzifLDpY4xy-z-J%brZ5ALrS*a`LwC zUh{ee)0hdwo@ejd7B%P?zwbZM;?vD2S=C-=U)JK&b1Uei#@VWTWDrpK2-Zrz{8tT` z5XgRuFJ8q^1>x4=gS2yXbnY(85$lqLM^42F*WbC_OtmK>OEjQpHLVX+vS&CK71&Ag zWh1wOvR_(ix*54Y4y_R@ZvRRIMtirj8IOsx3+GdIo&)mh7vg6)vSBHDpB-v@ zx`7^J;?Gbqym)8oq_1F=pnvfBiZcx)$WM3;9*l~yk_Ps78i3bG$H!9tl>}RU&!f&+yx{}Xo*O+CX}#^WgO4GHXF5xV z;^v4>O$AUl4MS*^vsm*boyg4Cbf`F`(6$sM$O0o%Eg{8|LFH@PAH+5CCtz9O*7<`{ z``5=AWbz;v&>wQe)BK5(dJeSy$|;tn-j$%KqmAK1!sz_Ku=-9?&fB~!ktx5%J#_Tj zh+UNmWs{~V9s;EaIUSN$$^xh_q~Bf2e}DPq)Tflpeay;{5;D|%bQClE@IoZ5S*7Zl z*yV}h$(tj44@G^6jnwsDU z6%d+!Ivj2!_ezn>58r41VSSmf)Sh<&u?lm_X!|LNezQOp=%@JA!S$SG?cq&P^Q*bM ziOP*2rs;PVj1*2uy@6ENO6C_y{bO~#quqO7rw+;lwA=#yYuNvUw^FK{NfQ5RQVAIXU$QOvQB z0Zs*dzXebo6hsTQ^Pd9V_A6()3D=tz;gR8|;9Dvbd&Uxy?zvkf!SUPBEZ~iXxz(J7 zL#`EKD^!#t717vxOm02VNjCH*j_1&&uZp_WSxc0$Y566Y{4Wp0kJU*}v-Y5ruh90c z>?2>>#9!7xMDPd*^q;1)ukB20H(TgQ>~B#F&u`qj`Vunq zw#B_GPvCYIrdlUhL*pNYj2eQ3r>@WLCa0}Fo!ia#?WnnJvKAY^ki+ z#_HEDZ5My}{{5%NR(U}!&l>`H5WkkU=z^K5%1Fq0ZHE@Oc=rvI-{^|W`JJba1%t(- zTDQf$A>x0b&6M*7?p!N6O(OW8Hb*a-%rR5fF`XoO;oBlWW)FRtF{VyPK+4<~y`_Tb zmQDSj@%ziIKskyoIpXGR<5%+Qr*Iz#Jo^MRg)~RXvA0cqJSx2?)JSp)+p6FlEo~;#26pA!9OuP+AE``i%@+1;b^*W5Cb8E*wbGb0+|Xb zBskI7y6u{r;!`!PJWBOyMfAy+uzR2kAO=9@Fg5y=-?e1fP;4BslLZh3x#9pUcM3`X zW)l`?5oEnCJ6SUlP+3~lwa9X;dRqy8RH370d{*vZ zk}vF9@u_0@xy~dif}ycHXkQE_q=Cej@@95&g+HuTc=-vsTa=14N5fl76jG>#_4QPJ zN%YrKgmtApE5n7LOs)IAPWIw-P_oi7G2uqg8XVqMT0lhB_(!uGPi`PaPYc{uf*)4; zf00QTbfQi5WaODfyNZF{ay)?%UG29VvuIZ{Y-XYL%{c}>AN@@U7XJ33H`kA?@1h3> zJjZTBi2jnNf&eqjDE3YKxB+{F&_w#keFTLL7TG!r9?85>%vL1lY)Q3geC9@9n1z=^vqP4l(aD!P9AgJOvt(d(O`L~H#|4?%1c%W`yo{HJ zoiioas;ygonhgAQ)NWDLGuQ$HW4n(DI=>#J(hzvDg1BjLj8L2DN zAGv}!Gcg+%doY!CdWnp{ripY+n8%p^VfxUkc@~}oW!f4o#h(k$FHK!U9|KbkpbH3f zln8gqF|(~0#LTd7Hv$+T15E)7G;c?0qE@%icv z9{(mZZ0J_jkC(!dnyx?yy0Pp(ArnJ2z!LLk#&CCjum4B%&FDHoZR#WRX{TG~RXHNx zapS}eqr#bQjQkh_zwQzh4dQMA28oNh{|dFBk4)C{E@oU1=uQ$bI@C?o>T zrS$>?ZcNq1L2`M#_Rs5pmkl>x2W0ON+BCM%mmqq-T#18G6lEsd={6udwv=EVoTkNG z3*+X0hdko?hJUw+24nAh6Yt2x4-_1IZ|BcGDzcO|zi+qi$j!f;X>YsSyQqs7-w1sX zLo+KrX{|>HV(2N)zmWxoG+6Fjr;WHpji7}e1w0D-ApO;1pXAjX?6h}Y1ttHlSmh0T0&S*WR61P0CUV+?@Zl65nTa;p>5tqGa)v`ONAG zy)3kB?AIM^lhXFcb4G0rh>*Fj#TTHxZc>cF4(K-2zVF=Q*AG60L`(|gHU0S^glfvh zysj%kUnx;Lo_gFQ4T$W+mZe*l0t{7t9vKf7Gnk}|F13~Tl{RNoM;j_<`*ujsXW&6# zZ?tm@>HY49YA4MlmbO$B-<&{YD&r{M9>>HhV>#aEWGHz)v}b)r&|N)y>xlo9s-b~M zj&r^0*HWlBm6TzsshfWT@ecT%fRRv)e&&%;D2cuxxjLxKD22*C5k@@yMaiUurF34^ zsI*vVutDwGpcLK#H{n-?3QvE6Jt(z~74Fri&|(Ru)e=7Cib(87!prHbD{;z@TH~PKkf-&Zz9yenZkl^pDt5>YtrLgOe zb(D`?@fShCb zkJCA3K;NM)3oV9Ql=6#Jj7R}#1=SllpB2X|kBw^l!DPW-E9=zbYibW&&lmfE2{OAR45)-l1@t#*9ItfA`A98#Y6 zKEK8e&fgNjsvFnaFBZ>4yu1dStR%{bZESy*hv~&6!q}3WpcUcjjla~J1Y^Rvy~^-U zYR#ja8ogb)K^?f~wGUD~46Kig*Z0&y?B#0mDHm$T(+#JEt+uA)i<(lWO=B&5@}&dk zAJjN2|8qoTHzuvk1yP`z%%ahUyX77W!>^#$>UqrnVm}GLx7gb#CVQ=NQLp$dYSB{l zYwYv>CSu;9rc}`Y3e$>l(OizV*N?ZT{icmokbNgFtQ)!B$uF&nd@l~xf$c+xcbZmd zs#iHKKble7WW4>j)p7FsZK=O@$66q(9*bJdl=(@UEbP{p%lF@^igHiWFotd7tjKVG z=x*vRRJig*@!hX~1F`JM+v0!!{%LNpJ(Ha}y*qU-pOH2$=)BB#v99jKbZ0)N_an>V7hT8CF8@5Z>yW@o%-OTvG3TNf!~Xz$cFG%F3DfjjC7Hh7QA=^^o|_R ztQom%d@M0id-|So@w1$scHlmfHuu)a2(s9eQautm`R%UfTSi4Ky=)`S-F+-*Bf!gVzyBn3e2SI;ly10L%&I3ppM9suK*dhf-4^7p`s|7Y^*5No~ z8sC4syvA#{d~i6W1{HX;Qls#ps;34z8-jF4cF-GnvDfwcQSsXMxpEsi)s&UO^rzHP z!ZeWo<2@oE!VcL0e@`|>Teh7dY(n(p^+IXaf)&^A8I%(jIe0eoEwxAC7u!$|12z=E zpuPD%{Geuj($k<$ZBWgg`?6{R|MQG{l5(3AaJL+n zhdKfOf|V%JNmJjZF?4#|P0=Vh9qFw2YK+~nM7Z00CnmW&WSQ>g zWh&ZXDIs<@(%o+ujObE8)e)o=h9HDTW`E$Xov?lO3-~YZazQB)UiK`UyMPb0{?=D% zZX^ECPBTd~&AAdvKGc0x!I5u?CS#is-XpSRH%yrHtv%8?E}w1q$OY`}qyro3b?mhw z#a*@e+i{$*@YkHJ^&_WWOb#sATIE)MzaQ*cq0#&gw5p`9RllD&)O?9@egUX0_;$|5 zc{*In8ZImv=D^?728{71jA&m-CZuT<*Mi9jJJN+mVVdZC4NVEzmWk%5yo3^RWH$irQ1JW zZRKvD;Y1m0ZkEQS?hH=-s3BFAG<0k?IIsv%DWrZcuMM8krXmarmqc~a8ji77kN6H!QFZ{0PEEy3*cQcC0cY9`ve}wY3SL!9}ru9+` zAYZqTCd;Ye{wXlPoFT0)3Q^U38)GyXsL4|+z`G&{p)0S@*=~;?<_R#p8#%ADL3GJI zEu#JX<6;%g(>z1e`>S^5NH$&GI za8(t3V8A>yZIo87_qq~zs@IdFf5kxXMzK$~WtV&ZoQl`3`$Qv;tGM)eC5b*f7QS2= zS^CP()#-bhlmR7-!c@JIsrtR)Ye}>M6Dv|c-G53`5&p28E)KhQtDDw6JR@?aC2819 zf9#eMbd17jSH71b_Y5)UkF0dIQ>)jx41f)yYt7eyYM{#gs(i>rO+GGS5M7U$Q~#@@ zuB|n$Yigz=hK0T_eK54Y^5fIf=JZR^JvV0(_wKMq&zysaP$e?({%i#`$7q)pA8=7J zfSUg|=svzl+8cBL8<3;65pfkVSU)gkJyI>|L1t;mbyivHYHIX73dZ^c8yS6^hf=tIk)0(d~r47%{^)ZKfP} z+kmx9^P>C@RP9!3b*+Mxzd~GGah#RH)GcaX&YgpUAV8B5XsM4GyrJrE5NlmVmxv}e zC47al$$@h|&MXF!iEd4pYB`{CdIelY+zSwQj1Sk-JnkgEI6$S4uV(g>W+?x^`Tqy1 CY4kGy literal 0 HcmV?d00001 diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index cc1703c8e..a4d2f2bee 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -1,7 +1,7 @@ # Fixing GPT-4 Turbo laziness with unified diffs -![robot flowchart](../assets/robot-ast.png) +![robot flowchart](../assets/udiff.jpg) Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc. From 99abb25f794f018e2d6a5f4258b48d90ced188d0 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 13:06:14 -0800 Subject: [PATCH 10/39] copy --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index a4d2f2bee..ec812be80 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -1,7 +1,7 @@ # Fixing GPT-4 Turbo laziness with unified diffs -![robot flowchart](../assets/udiff.jpg) +![robot flowchart](../assets/udiffs.jpg) Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc. From 7ab3b99914f2f4a72fdb9885ca7e28c775a1eed5 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 13:36:19 -0800 Subject: [PATCH 11/39] copy --- docs/unified-diffs.md | 71 ++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 32 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index ec812be80..3221e7102 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -4,15 +4,18 @@ ![robot flowchart](../assets/udiffs.jpg) -Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Unified-Format.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc. -This new support for unified diffs massively reduces GPT-4 Turbo's habit of being a "lazy" coder. +Aider now asks GPT-4 Turbo to use +[unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Example-Unified.html) +to edit your code when you request new features, improvements, bug fixes, test cases, etc. +This new support for unified diffs massively reduces GPT-4 Turbo's bad habit of "lazy" coding. There are abundant anecdotes about GPT-4 Turbo writing half completed code filled with comments that give homework assignments to the user like "...omitted for brevity..." or "...add logic here...". Aider's new unified diff edit format significantly reduces this sort of lazy coding, -producing much better quantitative scores on a new "laziness benchmark". +as quantified by dramatically improved scores +on a new "laziness benchmark". Before trying to reduce laziness, I needed a way to quantify and measure the problem. @@ -35,21 +38,22 @@ This new laziness benchmark produced the following results with `gpt-4-1106-prev - **A system prompt based on widely circulated folklore only scored 15%, same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. -The benchmark was designed to work with large source code files, many of -which exceeded GPT-4's 8k context window. -This meant that 28% of tasks exhausted the context window and were marked as a fail, -significantly dragging down GPT-4's performance on the benchmark. +The benchmark was designed to work with large source code files, and +many of them are too large to use with June GPT-4. +**About 28% of the tasks exhausted the 8k context window** and were automatically +marked as failures, +significantly dragging down June GPT-4's performance on the benchmark. -- **GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. -- **Aider's new unified diff edit format raised GPT-4's score to 59%**. +- **The June GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. +- **Aider's new unified diff edit format raised June GPT-4's score to 59%**. Before settling on unified diffs, I explored many other approaches to stop GPT-4 Turbo from eliding code and replacing it with comments. These efforts included prompts about being tireless and diligent, use of OpenAI's function/tool calling capabilities and numerous variations on -aider's existing editing formats and other diff-like formats. -All in all, the results shared here reflect +aider's existing editing formats, line number formats and other diff-like formats. +The results shared here reflect an extensive investigation of possible solutions and a large number of benchmarking runs of numerous varied approaches against GPT-4 Turbo. @@ -76,14 +80,14 @@ referencing old code like "...copy $USD formatting code here...". Based on this observation, I set out to build a benchmark based on refactoring -a non-trivial amount of code from within fairly large source files. +a non-trivial amount of code found in fairly large source files. To do this, I used python's `ast` module to analyze the -[Django repository](). +[Django repository](https://github.com/django/django). The goal was to search the Django repository to: - Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. -- Focus on methods that are a smaller piece of a larger class, so they don't represent the bulk of the code in their class or the file. We want to find methods which are less than half the AST nodes present in their containing class. +- Focus on methods that are part of a larger class. We want to find methods which are less than half the code present in their containing class. - Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function. We can then turn each of these source files into a task for the benchmark, @@ -112,7 +116,8 @@ And it correlates well with other laziness metrics gathered during benchmarking like the introduction of new comments that contain "...". -The result is a pragmatic benchmark suite that provokes, detects and quantifies laziness. +The result is a pragmatic +[benchmark suite that provokes, detects and quantifies GPT coding laziness](https://github.com/paul-gauthier/refactor-benchmark). ## Unified diff editing format @@ -126,12 +131,13 @@ GPT-4 code editing format: - HIGH LEVEL - Encourage GPT to structure edits as new versions of substantive code blocks (functions, methods, etc), not as a series of surgical/minimal changes to individual lines of code. - FLEXIBLE - Strive to be maximally flexible when interpreting GPT's edit instructions. -A helpful shortcut here is to have empathy for GPT, and imagine you are on -the other end of the conversation being tasked with specifying code edits. +A helpful shortcut here is to have empathy for GPT, and imagine you +are the one being tasked with specifying code edits. Would you want to hand type a properly escaped json data structure to specify surgical insert, delete, replace operations on specific code line numbers? -Would you want a typo, off-by-one line number or flubbed escape character to trigger an error -and force you to start over? +Would you want +to trigger an error and be forced to start over +after any typo, off-by-one line number or flubbed escape character? GPT is quantitatively better at code editing when you reduce the burden of formatting edits by using a familiar, simple, high level @@ -172,10 +178,11 @@ They need to *accurately* reflect the original and updated file contents, otherwise the patch command will fail to apply the changes. Having GPT specify changes in a well-known format that is usually consumed by a fairly rigid program like patch -seems to discourage it from -leaving informal editing instructions in comments -and being lazy -about writing all the needed code. +seems to encourage rigor. +GPT is less likely to +leave informal editing instructions in comments +or be lazy about writing all the needed code. + With unified diffs, GPT acts more like it's writing textual data intended to be read by a program, not talking to a person. @@ -215,8 +222,8 @@ A unified diff looks pretty much like the code it is modifying. The one complicated piece is the line numbers found at the start of each hunk that look something like this: `@@ -2,4 +3,5 @@`. This example is from a -hunk that will change lines 2-4 in the original file -into what will become lines 3-5 in the updated file. +hunk that would change lines 2-4 in the original file +into what would become lines 3-5 in the updated file. You've probably read a lot of unified diffs without ever caring about these line numbers, @@ -276,14 +283,14 @@ def main(args): Simple, right? -## Encourage high level edits +### Encourage high level edits The example unified diffs we've seen so far have all been single line changes, which makes them pretty easy to read and understand. Consider this slightly more complex change, which renames the variable `n` to `number`: -``` diff +```diff @@ ... @@ -def factorial(n): +def factorial(number): @@ -302,7 +309,7 @@ change is not as succinct as the minimal diff above, but it is much easier to see two different coherent versions of the `factorial()` function. -``` diff +```diff @@ ... @@ -def factorial(n): - "compute factorial" @@ -350,8 +357,8 @@ applied as edits to the source files. These imperfect diffs exhibit a variety of problems: - GPT forgets to include some semantically irrelevant lines or details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. -- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file, and incorrectly includes them with a leading *space* ` `. -- GPT jumps ahead to a new part of the file without starting a new hunk with a `@@ ... @@` divider. +- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already in the file. +- GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. As an example of the first issue, consider this source code: @@ -425,13 +432,13 @@ Any naive attempt to use actual unified diffs or any other strict diff format is certainly doomed, but the techniques described here and -now incorporated into aider provide +incorporated into aider provide a highly effective solution. There could be significant benefits to fine tuning models on the simpler, high level style of diffs that are described here. -Dropping the line numbers and focusing on diffs of +Dropping line numbers from the hunk headers and focusing on diffs of semantically coherent chunks of code seems to be an important part of successful GPT code editing. Most LLMs will have already seen plenty of unified diffs From 042a2b8a08c4fc9367b759c023c67d9f0582e218 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 15:19:34 -0800 Subject: [PATCH 12/39] pull the filename from the +++ line --- aider/coders/search_replace.py | 1 - aider/coders/udiff_coder.py | 4 ++-- docs/unified-diffs.md | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/aider/coders/search_replace.py b/aider/coders/search_replace.py index 47ae35d25..ac074e1b2 100755 --- a/aider/coders/search_replace.py +++ b/aider/coders/search_replace.py @@ -584,7 +584,6 @@ def flexible_search_and_replace(texts, strategies): for preproc in preprocs: res = try_strategy(texts, strategy, preproc) if res: - dump(strategy, preproc) return res diff --git a/aider/coders/udiff_coder.py b/aider/coders/udiff_coder.py index 14787b2fd..c5fe7bb6b 100644 --- a/aider/coders/udiff_coder.py +++ b/aider/coders/udiff_coder.py @@ -334,8 +334,8 @@ def process_fenced_block(lines, start_line_num): block = lines[start_line_num:line_num] block.append("@@ @@") - if block[0].startswith("--- "): - fname = block[0].split()[1] + if block[1].startswith("+++ "): + fname = block[1].split()[1] block = block[2:] else: fname = None diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 3221e7102..85f2d707f 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -444,4 +444,4 @@ seems to be an important part of successful GPT code editing. Most LLMs will have already seen plenty of unified diffs in their normal training data, and so should be very amenable to fining tuning towards this -particular style of diff. +particular diff style. From f0b60a0052a487fc07b898e9131b001e7924fe36 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 15:25:15 -0800 Subject: [PATCH 13/39] Add clear instruction on how to make a new file --- aider/coders/udiff_prompts.py | 2 ++ docs/unified-diffs.md | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/aider/coders/udiff_prompts.py b/aider/coders/udiff_prompts.py index 068949e15..4ab30bfc4 100644 --- a/aider/coders/udiff_prompts.py +++ b/aider/coders/udiff_prompts.py @@ -93,6 +93,8 @@ Hunks don't need to be in any particular order. When editing a function, method, loop, etc use a hunk to replace the *entire* code block. Delete the entire existing version with `-` lines and then add a new, updated version with `+` lines. This will help you generate correct code and correct diffs. + +To make a new file, show a diff from `--- /dev/null` to `+++ path/to/new/file.ext`. """ files_content_prefix = "These are the *read-write* files:\n" diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 85f2d707f..f679cbc68 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -127,7 +127,7 @@ helped clarify some general principles, which I think are applicable to any effe GPT-4 code editing format: - FAMILIAR - Choose an edit format that GPT is already familiar with. -- SIMPLE - Choose a simple format that avoid escaping, syntactic overhead and brittle specifiers like line numbers or line counts. +- SIMPLE - Choose a simple format that avoids escaping, syntactic overhead and brittle specifiers like line numbers or line counts. - HIGH LEVEL - Encourage GPT to structure edits as new versions of substantive code blocks (functions, methods, etc), not as a series of surgical/minimal changes to individual lines of code. - FLEXIBLE - Strive to be maximally flexible when interpreting GPT's edit instructions. From e27d5d26b7faad3efcdedb71aa14d0b3ed263eb8 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 15:27:31 -0800 Subject: [PATCH 14/39] copy --- docs/unified-diffs.md | 113 +++++++++++++++++++++--------------------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index f679cbc68..753e38054 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -66,60 +66,6 @@ We will discuss some key design decisions involved in this new format, and evaluate their significance using ablation experiments. -## Refactoring benchmark - -Aider has long used a -[benchmark suite based on 133 Exercism python exercises](). -But these are mostly small coding problems, -usually requiring only a few dozen lines of code to solve. -GPT-4 Turbo was typically only lazy on 2-3 of these exercises: -the ones with the largest amount of code and which involved refactoring. -Rather than fully completing the refactor, GPT would often -just add a comment -referencing old code like -"...copy $USD formatting code here...". - -Based on this observation, I set out to build a benchmark based on refactoring -a non-trivial amount of code found in fairly large source files. -To do this, I used python's `ast` module to analyze the -[Django repository](https://github.com/django/django). - -The goal was to search the Django repository to: - -- Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. -- Focus on methods that are part of a larger class. We want to find methods which are less than half the code present in their containing class. -- Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function. - -We can then turn each of these source files into a task for the benchmark, -using instructions like: - -> Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function. -> Name the new function `_set_csrf_cookie`, exactly the same name as the existing method. -> Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function. - -A [simple python AST scanning script]() found 39 of these source files in the Django repository -and packaged them up as benchmark tasks using -the same format as Exercism exercises. - -The tool also created a unit test for each task -which again uses the `ast` module to check that the refactor -was performed roughly correctly: - -- The updated source file must parse as correct python, without `SyntaxError` or `IndentationError` exceptions. This is a powerful check that will surface any mechanical errors made when attempting to edit the source code. -- The target method must now exist as a top-level function in the file. -- This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments. -- The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. - -To be clear, this is not a rigorous test that the refactor was performed correctly. -But it does serve as a basic sanity check that the refactor was essentially done as a cut & paste, without eliding any code as comments. -And it correlates well with other laziness metrics -gathered during benchmarking like the -introduction of new comments that contain "...". - -The result is a pragmatic -[benchmark suite that provokes, detects and quantifies GPT coding laziness](https://github.com/paul-gauthier/refactor-benchmark). - - ## Unified diff editing format The design and implementation of aider's new unified diff editing format @@ -374,8 +320,8 @@ def main(args): main(sys.argv[1:]) ``` -GPT might produce a unified diff like the one below, -which is missing the "show a greeting" comment line. +**The diff below is missing the "show a greeting" comment line**, +and represents a common type of mistake GPT might make. When we search for the *minus* `-` lines, we won't find them in the original file because of the missing comment. @@ -420,6 +366,61 @@ feature: - **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark. - **We see a 9X increase in editing errors** on aider's original Exercism benchmark. +## Refactoring benchmark + +Aider has long used a +[benchmark suite based on 133 Exercism python exercises](). +But these are mostly small coding problems, +usually requiring only a few dozen lines of code to solve. +GPT-4 Turbo was typically only lazy on 2-3 of these exercises: +the ones with the largest amount of code and which involved refactoring. +Rather than fully completing the refactor, GPT would often +just add a comment +referencing old code like +"...copy $USD formatting code here...". + +Based on this observation, I set out to build a benchmark based on refactoring +a non-trivial amount of code found in fairly large source files. +To do this, I used python's `ast` module to analyze the +[Django repository](https://github.com/django/django). + +The goal was to search the Django repository to: + +- Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. +- Focus on methods that are part of a larger class. We want to find methods which are less than half the code present in their containing class. +- Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function. + +We can then turn each of these source files into a task for the benchmark, +using instructions like: + +> Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function. +> Name the new function `_set_csrf_cookie`, exactly the same name as the existing method. +> Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function. + +A [simple python AST scanning script]() found 39 of these source files in the Django repository +and packaged them up as benchmark tasks using +the same format as Exercism exercises. + +The tool also created a unit test for each task +which again uses the `ast` module to check that the refactor +was performed roughly correctly: + +- The updated source file must parse as correct python, without `SyntaxError` or `IndentationError` exceptions. This is a powerful check that will surface any mechanical errors made when attempting to edit the source code. +- The target method must now exist as a top-level function in the file. +- This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments. +- The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. + +To be clear, this is not a rigorous test that the refactor was performed correctly. +But it does serve as a basic sanity check that the refactor was essentially done as a cut & paste, without eliding any code as comments. +And it correlates well with other laziness metrics +gathered during benchmarking like the +introduction of new comments that contain "...". + +The result is a pragmatic +[benchmark suite that provokes, detects and quantifies GPT coding laziness](https://github.com/paul-gauthier/refactor-benchmark). + + + ## Conclusions and future work Aider's new unified diff format seems very effective at stopping From 616aca8656aa7ee958e23617fec995fc4c1b5154 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 16:45:11 -0800 Subject: [PATCH 15/39] copy --- docs/unified-diffs.md | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 753e38054..44eb27fdc 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -12,22 +12,22 @@ This new support for unified diffs massively reduces GPT-4 Turbo's bad habit of There are abundant anecdotes about GPT-4 Turbo writing half completed code filled with comments that give homework assignments to the user -like "...omitted for brevity..." or "...add logic here...". +like +"...add logic here..." +or +"...omitted for brevity...". Aider's new unified diff edit format significantly reduces this sort of lazy coding, as quantified by dramatically improved scores -on a new "laziness benchmark". +on a new "laziness" benchmark suite. -Before trying to reduce laziness, I needed a way to quantify and measure -the problem. -I developed a new -benchmarking suite designed to both provoke and quantify lazy coding. +Aider's new benchmarking suite is +designed to both provoke and quantify lazy coding. It consists of 39 python refactoring tasks, which ask GPT to remove a non-trivial method from a class and make it a stand alone function. - GPT-4 Turbo is prone to being lazy on this sort of task, because it's mostly a "cut & paste" of code from one place in a file to another. -GPT often creates the new function with a body that is empty except for +Rather than writing out the code, GPT often just leaves a comment like "...include the body of the original method..." @@ -35,7 +35,7 @@ This new laziness benchmark produced the following results with `gpt-4-1106-prev - **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. This confirms the anecdotes that GPT-4 Turbo is quite lazy when coding, and serves as a baseline for comparison. - **Aider's new unified diff edit format raised the score to 65%**. -- **A system prompt based on widely circulated folklore only scored 15%, same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". +- **A system prompt based on widely circulated folklore performed same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". This prompt scored only 15% on the refactor benchmark. The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. The benchmark was designed to work with large source code files, and @@ -423,22 +423,23 @@ The result is a pragmatic ## Conclusions and future work -Aider's new unified diff format seems very effective at stopping +Based on the refactor benchmark results, +aider's new unified diff format seems very effective at stopping GPT-4 Turbo from being a lazy coder. -I suspect that anyone who has tried to have GPT edit code -started out asking for diffs of some kind. -I know I did. -Any naive attempt to use actual unified diffs -or any other strict diff format -is certainly doomed, -but the techniques described here and +Unified diffs were one of the very first edit formats I tried +when first building aider. +I think a lot of other AI coding assistant projects have also +tried going down this path. +It seems that any naive or direct use of structure diff formats +is pretty much doomed to failure. +But the techniques described here and incorporated into aider provide -a highly effective solution. +a highly effective way to harness GPT's knowledge of unified diffs. There could be significant benefits to fine tuning models on -the simpler, high level style of diffs that are described here. +aider's simple, high level style of unified diffs. Dropping line numbers from the hunk headers and focusing on diffs of semantically coherent chunks of code seems to be an important part of successful GPT code editing. From 200cb125045401413a16c4605c05ad8b87b87d3f Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 16:46:02 -0800 Subject: [PATCH 16/39] copy --- docs/unified-diffs.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 44eb27fdc..e5cedf8d3 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -25,10 +25,8 @@ designed to both provoke and quantify lazy coding. It consists of 39 python refactoring tasks, which ask GPT to remove a non-trivial method from a class and make it a stand alone function. -GPT-4 Turbo is prone to being lazy on this sort of task, because it's mostly a -"cut & paste" of code from one place in a file to another. -Rather than writing out the code, GPT often just leaves -a comment like +GPT-4 Turbo is prone to being lazy on this sort of copy/paste task, +by leaving a comment like "...include the body of the original method..." This new laziness benchmark produced the following results with `gpt-4-1106-preview`: From 33c337f407cad0fca0b04ff0485b90bb0c2e4fb7 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 17:10:33 -0800 Subject: [PATCH 17/39] copy --- docs/unified-diffs.md | 108 +++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 71 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index e5cedf8d3..1a0f283d6 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -31,35 +31,31 @@ by leaving a comment like This new laziness benchmark produced the following results with `gpt-4-1106-preview`: -- **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. This confirms the anecdotes that GPT-4 Turbo is quite lazy when coding, and serves as a baseline for comparison. +- **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised the score to 65%**. -- **A system prompt based on widely circulated folklore performed same as the baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". This prompt scored only 15% on the refactor benchmark. +- **A system prompt based on widely circulated folklore performed no better than baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". This prompt scored only 15% on the refactor benchmark. The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. The benchmark was designed to work with large source code files, and -many of them are too large to use with June GPT-4. -**About 28% of the tasks exhausted the 8k context window** and were automatically -marked as failures, -significantly dragging down June GPT-4's performance on the benchmark. +28% of them are too large to use with June GPT-4's 8k context window. +This significantly harmed June GPT-4's performance on the benchmark. - **The June GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised June GPT-4's score to 59%**. Before settling on unified diffs, -I explored many other approaches to stop GPT-4 Turbo from eliding code -and replacing it with comments. +I explored many other approaches. These efforts included prompts about being tireless and diligent, use of OpenAI's function/tool calling capabilities and numerous variations on aider's existing editing formats, line number formats and other diff-like formats. The results shared here reflect -an extensive investigation of possible solutions and -a large number of benchmarking runs of numerous varied approaches against -GPT-4 Turbo. +an extensive investigation and a large number of benchmarking runs of many approaches. -The result is aider's new support for a unified diff like -editing format which outperforms other potential solutions by a wide margin. -The rest of this article will describe aider's new refactoring benchmark -and the new unified diff editing format. +The result is aider's new support for a unified diff editing format +which outperforms other solutions by a wide margin. +The rest of this article will describe +the new unified diff editing format and +aider's new refactoring benchmark. We will discuss some key design decisions involved in this new format, and evaluate their significance using ablation experiments. @@ -78,7 +74,7 @@ GPT-4 code editing format: A helpful shortcut here is to have empathy for GPT, and imagine you are the one being tasked with specifying code edits. Would you want to hand type a properly escaped json data structure -to specify surgical insert, delete, replace operations on specific code line numbers? +to invoke surgical insert, delete, replace operations on specific code line numbers? Would you want to trigger an error and be forced to start over after any typo, off-by-one line number or flubbed escape character? @@ -90,8 +86,8 @@ and flexible editing format. ### Choose a familiar editing format Unified diffs are perhaps the most commonly used format for showing -how source code files have been changed. -This is because it is the default output format of `git diff`: +changes to code, because it's the +default output format of `git diff`: ```diff $ git diff hello.py @@ -119,7 +115,7 @@ usually intended to be consumed by the [patch](https://www.gnu.org/software/diffutils/manual/html_node/Merging-with-patch.html) program. They need to *accurately* reflect the original and updated file contents, -otherwise the patch command will fail to apply the changes. +otherwise the patch command will fail. Having GPT specify changes in a well-known format that is usually consumed by a fairly rigid program like patch seems to encourage rigor. @@ -127,7 +123,6 @@ GPT is less likely to leave informal editing instructions in comments or be lazy about writing all the needed code. - With unified diffs, GPT acts more like it's writing textual data intended to be read by a program, not talking to a person. @@ -156,40 +151,22 @@ unpacked from the JSON container or the JSON decode just fails entirely. On the other hand, the core of the unified diff format is extremely simple. You include a hunk of the file that needs to be changed, -with every line prefixed by ether a *space* ` `, a *plus* `+` or a *minus* `-`. -These markers indicate an unchanged line, a new line to add or an existing line to remove. -There is no escaping, and very little other structure needed -to create a unified diff. +with every line prefixed to indicate unchanged, new or deleted lines. A unified diff looks pretty much like the code it is modifying. The one complicated piece is the line numbers found at the start -of each hunk that look something like this: `@@ -2,4 +3,5 @@`. -This example is from a -hunk that would change lines 2-4 in the original file -into what would become lines 3-5 in the updated file. - -You've probably read a lot of unified diffs without ever -caring about these line numbers, -because the diffs are usually perfectly sensible without them. -This is good news, because we're going to discard these numbers. - +of each hunk. They look something like this: `@@ -2,4 +3,5 @@`, +which indicates that the hunk +will change lines 2-4 in original file +into lines 3-5 in the updated file. GPT is terrible at working accurately with source code line numbers. This is a general observation about any use of line numbers in editing formats, backed up by many quantitative benchmark -experiments. -Specifically regarding line numbers in unified diffs, -GPT is frequently off-by-one, or labels a hunk as -being line numbers 2-4 of the file but the hunk actually contains 6 lines, etc. -GPT-4 isn't even close to being able to consistently -produce valid -line number headers. -Doing so requires far too much attention to numerical details to ensure -correctness and self-consistency. -So aider tells GPT not to include line numbers. -Instead, aider just interprets each hunk from the unified diffs +So aider tells GPT not to include line numbers, +and just interprets each hunk from the unified diffs as a search and replace operation: This diff: @@ -272,35 +249,32 @@ but it is much easier to see two different coherent versions of the ``` Aider's system prompt strongly encourages -GPT to produce this kind of high level diff, and provides a few examples. -GPT is much more successful at code editing -with the addition of this "high level diff" prompting. -It is better at producing correct diffs, which can be successfully +GPT to produce these high level diffs. +This prompt makes GPT better at producing correct diffs, which can be successfully applied to the original file. **Experiments without "high level diff" prompting -measure a 30-50% increase in editing errors,** +produce a 30-50% increase in editing errors,** where diffs fail to apply or apply incorrectly and produce invalid code. -Each such editing error causes a round trip back to GPT, -asking for better diffs. -These extra round trips slow down the pair programming experience -and increase token costs. +When a patch fails, aider needs to ask GPT for an updated version of the diff. +This takes time, costs tokens and sometimes fails to result in a successful edit +even after multiple retries. There are probably a couple of reasons why high level diffs improve code editing performance: -- It is easier to produce diffs that both correctly match the original code and correctly produce the intended new code. There is less risk of getting confused while generating a rapid fire series of minimal, surgical edits mixed into existing code. +- It's easier to produce diffs that both correctly match the original code and correctly produces the intended new code. There is less risk of getting confused while generating a series of surgical edits mixed into existing code. - The high level hunks often contain more lines than a surgical version, so they are less likely to accidentally match unrelated parts of the original file. This is important because GPT can't reliably give us line numbers to specify exactly where in the file to make the change. ### Be flexible when applying edits GPT frequently makes errors when generating diffs, which can prevent them from being correctly -applied as edits to the source files. +applied to edit the code. These imperfect diffs exhibit a variety of problems: -- GPT forgets to include some semantically irrelevant lines or details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. +- GPT forgets to include semantically irrelevant details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. - GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already in the file. - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. @@ -338,10 +312,9 @@ because of the missing comment. ``` -Aider tries to be very flexible when applying unified diffs, -in order to handle all these sorts of defects. -If a hunk doesn't apply cleanly, aider uses a number of strategies -to try and apply the edit intended by GPT: +Aider tries to be very flexible when applying diffs, +in order to handle defects. +If a hunk doesn't apply cleanly, aider uses a number of strategies: - Normalize the hunk, by taking the *minus* `-` and *space* ` ` lines as one version of the hunk and the *space* ` ` and *plus* `+` lines as a second version and doing an actual unified diff on them. - Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* ` ` lines back against the original file. @@ -349,17 +322,10 @@ to try and apply the edit intended by GPT: - Vary the size and offset of the "context window" of *space* ` ` lines from the hunk that are used to localize the edit to a specific part of the file. - Combine the above mechanisms to progressively become more permissive about how to apply the hunk. -These flexible patching strategies are critical to successfully apply the -unified diffs that GPT produces. -Removing support for flexible patching +These flexible patching strategies are critical, and +removing them radically increases the number of hunks which fail to apply. -Each such editing error causes a round trip back to GPT, -asking for better diffs. -These extra round trips slow down the pair programming experience -and increase token costs. - -**Experiments where flexible patching is disabled** quantify the importance of this -feature: +**Experiments where flexible patching is disabled show**: - **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark. - **We see a 9X increase in editing errors** on aider's original Exercism benchmark. From 330b4140c7e05c37fed2bf02214ebbc177c86e62 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 17:17:34 -0800 Subject: [PATCH 18/39] copy --- docs/unified-diffs.md | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 1a0f283d6..0a5de46b0 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -346,30 +346,27 @@ referencing old code like Based on this observation, I set out to build a benchmark based on refactoring a non-trivial amount of code found in fairly large source files. To do this, I used python's `ast` module to analyze the -[Django repository](https://github.com/django/django). - -The goal was to search the Django repository to: +[Django repository](https://github.com/django/django) to: - Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. -- Focus on methods that are part of a larger class. We want to find methods which are less than half the code present in their containing class. -- Find methods that do not make any use of their `self` parameter. This means they can be trivially refactored out of the class and turned into a stand-alone top-level function. +- Focus on methods that are only part of a larger class, which has at least twice as much code as the method. +- Find methods that don't use their `self` parameter, so they can be trivially refactored out of the class. We can then turn each of these source files into a task for the benchmark, -using instructions like: +where we ask GPT to: > Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function. > Name the new function `_set_csrf_cookie`, exactly the same name as the existing method. > Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function. -A [simple python AST scanning script]() found 39 of these source files in the Django repository -and packaged them up as benchmark tasks using -the same format as Exercism exercises. - -The tool also created a unit test for each task -which again uses the `ast` module to check that the refactor +A [simple python AST scanning script](https://github.com/paul-gauthier/aider/blob/main/benchmark/refactor_tools.py) +found 39 of these source files +and packaged them up as benchmark tasks. +Each task has a test +which uses the `ast` module to check that the refactor was performed roughly correctly: -- The updated source file must parse as correct python, without `SyntaxError` or `IndentationError` exceptions. This is a powerful check that will surface any mechanical errors made when attempting to edit the source code. +- The updated source file must parse as valid python, to surface misapplied edits which corrupt the file. - The target method must now exist as a top-level function in the file. - This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments. - The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. @@ -391,11 +388,11 @@ Based on the refactor benchmark results, aider's new unified diff format seems very effective at stopping GPT-4 Turbo from being a lazy coder. -Unified diffs were one of the very first edit formats I tried -when first building aider. +Unified diffs was one of the very first edit formats I tried +when originally building aider. I think a lot of other AI coding assistant projects have also tried going down this path. -It seems that any naive or direct use of structure diff formats +It seems like any naive or direct use of structured diff formats is pretty much doomed to failure. But the techniques described here and incorporated into aider provide From 20c1f10c30593752dfb98be1c8b3f8c16227307f Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 18:18:40 -0800 Subject: [PATCH 19/39] copy --- docs/unified-diffs.md | 117 +++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 69 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 0a5de46b0..d6addddc9 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -7,38 +7,30 @@ Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Example-Unified.html) to edit your code when you request new features, improvements, bug fixes, test cases, etc. -This new support for unified diffs massively reduces GPT-4 Turbo's bad habit of "lazy" coding. +Using unified diffs massively reduces GPT-4 Turbo's bad habit of "lazy" coding, +where it writes half completed code filled with comments +like "...add logic here...". -There are abundant anecdotes -about GPT-4 Turbo writing half completed code filled with comments that give -homework assignments to the user -like -"...add logic here..." -or -"...omitted for brevity...". -Aider's new unified diff edit format significantly reduces this sort of lazy coding, -as quantified by dramatically improved scores -on a new "laziness" benchmark suite. - -Aider's new benchmarking suite is +Aider's also has a new benchmarking suite designed to both provoke and quantify lazy coding. -It consists of 39 python refactoring tasks, +It consists of +39 python refactoring tasks, which ask GPT to remove a non-trivial method from a class and make it a stand alone function. -GPT-4 Turbo is prone to being lazy on this sort of copy/paste task, -by leaving a comment like -"...include the body of the original method..." +GPT-4 Turbo is prone to being lazy on this sort of task, +often leaving comments like +"...include the original method body...". This new laziness benchmark produced the following results with `gpt-4-1106-preview`: - **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised the score to 65%**. -- **A system prompt based on widely circulated folklore performed no better than baseline.** This experiment used the existing "SEARCH/REPLACE block" format with an additional prompt that claims the user is blind, has no hands, will tip $2000 and has suffered from "truncated code trauma". This prompt scored only 15% on the refactor benchmark. +- **No benefit from the user being blind, without hands, tipping $2000 or fearing truncated code trauma.** These widely circulated folk remedies performed no better than baseline when added to the system prompt with aider's SEARCH/REPLACE edit format. Including *all* of them only scored at 15% The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. The benchmark was designed to work with large source code files, and 28% of them are too large to use with June GPT-4's 8k context window. -This significantly harmed June GPT-4's performance on the benchmark. +This significantly harmed the benchmark results. - **The June GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised June GPT-4's score to 59%**. @@ -49,14 +41,13 @@ These efforts included prompts about being tireless and diligent, use of OpenAI's function/tool calling capabilities and numerous variations on aider's existing editing formats, line number formats and other diff-like formats. The results shared here reflect -an extensive investigation and a large number of benchmarking runs of many approaches. +an extensive investigation and a large number of benchmark evaluations of many approaches. -The result is aider's new support for a unified diff editing format +The result is aider's new support for a unified diff editing format, which outperforms other solutions by a wide margin. The rest of this article will describe -the new unified diff editing format and -aider's new refactoring benchmark. -We will discuss some key design decisions involved in this new format, +aider's new editing format and refactoring benchmark. +We will discuss some key design decisions, and evaluate their significance using ablation experiments. @@ -75,9 +66,9 @@ A helpful shortcut here is to have empathy for GPT, and imagine you are the one being tasked with specifying code edits. Would you want to hand type a properly escaped json data structure to invoke surgical insert, delete, replace operations on specific code line numbers? -Would you want -to trigger an error and be forced to start over -after any typo, off-by-one line number or flubbed escape character? +Would it be ok to +trigger an error and be forced to start over +after any typo, off-by-one line number or flubbed escape sequence? GPT is quantitatively better at code editing when you reduce the burden of formatting edits by using a familiar, simple, high level @@ -86,12 +77,10 @@ and flexible editing format. ### Choose a familiar editing format Unified diffs are perhaps the most commonly used format for showing -changes to code, because it's the +code edits, because it's the default output format of `git diff`: ```diff -$ git diff hello.py -... --- a/hello.py +++ b/hello.py @@ -1,5 +1,5 @@ @@ -104,11 +93,9 @@ $ git diff hello.py ``` Choosing such a familiar, popular output format means that GPT has -seen *many* examples in its training data. -GPT has therefore been extensively trained to generate +seen *many* examples in its training dat. +It's been trained to generate text that conforms to the unified diff syntax. -We won't need to provide many details and examples -in the system prompt, as it knows this format by name. Unified diffs are usually intended to be consumed by the @@ -140,31 +127,28 @@ and again reached these same conclusions. Informally, this is probably because stuffing *source code* into JSON is complicated and error prone. -It likely takes a lot of the model's attention to escape and wrap code -in JSON containers. Wrapping the python code `print("On Windows use \"C:\\\"")` -as valid json is pretty painful and error prone: -`"print(\\"On Windows use \\"C:\\\\\\"\\")"` +as valid json is pretty painful and error prone. Due to escaping issues GPT's code is often syntactically incorrect when it's -unpacked from the JSON container or the JSON decode just fails entirely. +unpacked from JSON, +or the JSON decode just fails entirely. -On the other hand, the core of the unified diff format is extremely simple. +On the other hand, the core of the unified diff format is very simple. You include a hunk of the file that needs to be changed, -with every line prefixed to indicate unchanged, new or deleted lines. +with every line prefixed by a character +to indicate unchanged, new or deleted lines. A unified diff looks pretty much like the code it is modifying. The one complicated piece is the line numbers found at the start -of each hunk. They look something like this: `@@ -2,4 +3,5 @@`, -which indicates that the hunk -will change lines 2-4 in original file -into lines 3-5 in the updated file. -GPT is terrible at working accurately with source code line numbers. -This is a general observation about any use of line +of each hunk. They look something like this: `@@ -2,4 +3,5 @@`. +GPT is terrible at working with source code line numbers. +This is a general observation about *any* use of line numbers in editing formats, backed up by many quantitative benchmark +You've probably ignored the line numbers in every diff you've seen? So aider tells GPT not to include line numbers, and just interprets each hunk from the unified diffs as a search and replace operation: @@ -257,25 +241,23 @@ applied to the original file. produce a 30-50% increase in editing errors,** where diffs fail to apply or apply incorrectly and produce invalid code. -When a patch fails, aider needs to ask GPT for an updated version of the diff. +When a patch fails, aider needs to ask GPT for a corrected version of the diff. This takes time, costs tokens and sometimes fails to result in a successful edit even after multiple retries. There are probably a couple of reasons why high level diffs -improve code editing performance: +help: -- It's easier to produce diffs that both correctly match the original code and correctly produces the intended new code. There is less risk of getting confused while generating a series of surgical edits mixed into existing code. -- The high level hunks often contain more lines than a surgical version, so they are less likely to accidentally match unrelated parts of the original file. This is important because GPT can't reliably give us line numbers to specify exactly where in the file to make the change. +- It's easier to produce diffs that both correctly match the original code and correctly produces the intended new code. There is less risk of getting confused compared to generating a series of surgical edits mixed into existing code. +- High level hunks often contain more lines than a surgical hunk, so they are less likely to accidentally match unrelated parts of code. This is helpful because GPT can't reliably give us line numbers to specify exactly where in the file to make changes. ### Be flexible when applying edits -GPT frequently makes errors when generating diffs, which -can prevent them from being correctly -applied to edit the code. -These imperfect diffs exhibit a variety of problems: +GPT frequently makes imperfect diffs that won't apply cleanly. +They exhibit a variety of problems: - GPT forgets to include semantically irrelevant details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. -- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already in the file. +- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already there. - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. As an example of the first issue, consider this source code: @@ -325,6 +307,7 @@ If a hunk doesn't apply cleanly, aider uses a number of strategies: These flexible patching strategies are critical, and removing them radically increases the number of hunks which fail to apply. + **Experiments where flexible patching is disabled show**: - **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark. @@ -335,41 +318,37 @@ radically increases the number of hunks which fail to apply. Aider has long used a [benchmark suite based on 133 Exercism python exercises](). But these are mostly small coding problems, -usually requiring only a few dozen lines of code to solve. -GPT-4 Turbo was typically only lazy on 2-3 of these exercises: -the ones with the largest amount of code and which involved refactoring. -Rather than fully completing the refactor, GPT would often -just add a comment -referencing old code like -"...copy $USD formatting code here...". +usually requiring only a few dozen lines of code. +GPT-4 Turbo is typically only lazy on 2-3 of these exercises: +the ones with the most code and which involve refactoring. Based on this observation, I set out to build a benchmark based on refactoring -a non-trivial amount of code found in fairly large source files. +a non-trivial amount of code found in fairly large files. To do this, I used python's `ast` module to analyze the [Django repository](https://github.com/django/django) to: - Find source files that contain class methods which are non-trivial, having more than 100 AST nodes in their implementation. -- Focus on methods that are only part of a larger class, which has at least twice as much code as the method. +- Focus on methods that are part of a larger class, which has at least twice as much code as the method itself. - Find methods that don't use their `self` parameter, so they can be trivially refactored out of the class. We can then turn each of these source files into a task for the benchmark, -where we ask GPT to: +where we ask GPT to do something like: > Refactor the `_set_csrf_cookie` method in the `CsrfViewMiddleware` class to be a stand alone, top level function. > Name the new function `_set_csrf_cookie`, exactly the same name as the existing method. > Update any existing `self._set_csrf_cookie` calls to work with the new `_set_csrf_cookie` function. A [simple python AST scanning script](https://github.com/paul-gauthier/aider/blob/main/benchmark/refactor_tools.py) -found 39 of these source files +found 39 suitable files and packaged them up as benchmark tasks. Each task has a test -which uses the `ast` module to check that the refactor +that checks if refactor was performed roughly correctly: - The updated source file must parse as valid python, to surface misapplied edits which corrupt the file. - The target method must now exist as a top-level function in the file. - This new top-level function must contain approximately the same number of AST nodes as the original class method. This ensures that GPT didn't elide code and replace it with comments. -- The original class must still be present in the file, and it must be smaller by about the number of AST nodes of the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. +- The original class must still be present in the file, and it must be smaller by about the number of AST nodes in the method which was removed. This helps confirm that the method was removed from the class, without other significant modifications. To be clear, this is not a rigorous test that the refactor was performed correctly. But it does serve as a basic sanity check that the refactor was essentially done as a cut & paste, without eliding any code as comments. From 9807dbdeb1b2921112e8df7e8db7e4ba8ecb2a93 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 18:19:18 -0800 Subject: [PATCH 20/39] copy --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index d6addddc9..2f0a25604 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -146,7 +146,7 @@ of each hunk. They look something like this: `@@ -2,4 +3,5 @@`. GPT is terrible at working with source code line numbers. This is a general observation about *any* use of line numbers in editing formats, -backed up by many quantitative benchmark +backed up by many quantitative benchmark experiments. You've probably ignored the line numbers in every diff you've seen? So aider tells GPT not to include line numbers, From 8c9536bcdd95217050aedbb7d7bf566d907b1f4c Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 18:23:45 -0800 Subject: [PATCH 21/39] copy --- docs/unified-diffs.md | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 2f0a25604..c4ea92c83 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -27,9 +27,9 @@ This new laziness benchmark produced the following results with `gpt-4-1106-prev - **Aider's new unified diff edit format raised the score to 65%**. - **No benefit from the user being blind, without hands, tipping $2000 or fearing truncated code trauma.** These widely circulated folk remedies performed no better than baseline when added to the system prompt with aider's SEARCH/REPLACE edit format. Including *all* of them only scored at 15% -The older `gpt-4-0613` also did better on the laziness benchmark by using unified diffs. +The older `gpt-4-0613` also did better on the laziness benchmark using unified diffs. The benchmark was designed to work with large source code files, and -28% of them are too large to use with June GPT-4's 8k context window. +28% of them are too large to fit in June GPT-4's 8k context window. This significantly harmed the benchmark results. - **The June GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. @@ -86,7 +86,6 @@ default output format of `git diff`: @@ -1,5 +1,5 @@ def main(args): # show a greeting - - print("Hello!") + print("Goodbye!") return @@ -149,7 +148,7 @@ numbers in editing formats, backed up by many quantitative benchmark experiments. You've probably ignored the line numbers in every diff you've seen? -So aider tells GPT not to include line numbers, +So aider tells GPT not to include them, and just interprets each hunk from the unified diffs as a search and replace operation: @@ -159,19 +158,17 @@ This diff: @@ ... @@ def main(args): # show a greeting - - print("Hello!") + print("Goodbye!") return ``` -Means we want to search the original source file for all the +Means we want to search the file for all the *space* ` ` and *minus* `-` lines from the hunk: ```python def main(args): # show a greeting - print("Hello!") return ``` @@ -181,7 +178,6 @@ And then replace them with all the *space* ` ` and *plus* `+` lines: ```python def main(args): # show a greeting - print("Goodbye!") return ``` @@ -267,7 +263,6 @@ import sys def main(args): # show a greeting - print("Hello!") return @@ -284,11 +279,9 @@ because of the missing comment. ```diff @@ ... @@ -def main(args): -- - print("Hello!") - return +def main(args): -+ + print("Goodbye!") + return ``` From ed6d30c84952ea107b6f29fa97ee93acbb8ac3f9 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 18:25:29 -0800 Subject: [PATCH 22/39] copy --- docs/unified-diffs.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index c4ea92c83..50418bc59 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -92,7 +92,7 @@ default output format of `git diff`: ``` Choosing such a familiar, popular output format means that GPT has -seen *many* examples in its training dat. +seen *many* examples in its training data. It's been trained to generate text that conforms to the unified diff syntax. @@ -196,7 +196,6 @@ Consider this slightly more complex change, which renames the variable `n` to -def factorial(n): +def factorial(number): "compute factorial" - - if n == 0: + if number == 0: return 1 @@ -214,14 +213,12 @@ but it is much easier to see two different coherent versions of the @@ ... @@ -def factorial(n): - "compute factorial" -- - if n == 0: - return 1 - else: - return n * factorial(n-1) +def factorial(number): + "compute factorial" -+ + if number == 0: + return 1 + else: From 5c5025e6cfb56d0a94164d817fef7eb61afa4270 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 18:38:52 -0800 Subject: [PATCH 23/39] copy --- docs/unified-diffs.md | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 50418bc59..a083b5de7 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -6,8 +6,8 @@ Aider now asks GPT-4 Turbo to use [unified diffs](https://www.gnu.org/software/diffutils/manual/html_node/Example-Unified.html) -to edit your code when you request new features, improvements, bug fixes, test cases, etc. -Using unified diffs massively reduces GPT-4 Turbo's bad habit of "lazy" coding, +to edit your code. +This massively reduces GPT-4 Turbo's bad habit of "lazy" coding, where it writes half completed code filled with comments like "...add logic here...". @@ -25,29 +25,31 @@ This new laziness benchmark produced the following results with `gpt-4-1106-prev - **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised the score to 65%**. -- **No benefit from the user being blind, without hands, tipping $2000 or fearing truncated code trauma.** These widely circulated folk remedies performed no better than baseline when added to the system prompt with aider's SEARCH/REPLACE edit format. Including *all* of them only scored at 15% +- **No benefit from the user being blind, without hands, tipping $2000 or fearing truncated code trauma.** These widely circulated folk remedies performed no better than baseline when added to the system prompt with aider's SEARCH/REPLACE edit format. Including *all* of them still only scored at 15% -The older `gpt-4-0613` also did better on the laziness benchmark using unified diffs. -The benchmark was designed to work with large source code files, and -28% of them are too large to fit in June GPT-4's 8k context window. -This significantly harmed the benchmark results. +The older `gpt-4-0613` also did better on the laziness benchmark using unified diffs: - **The June GPT-4's baseline was 26%** using aider's existing "SEARCH/REPLACE block" edit format. - **Aider's new unified diff edit format raised June GPT-4's score to 59%**. +- The benchmark was designed to use large files, and +28% of them are too large to fit in June GPT-4's 8k context window. +This significantly harmed the benchmark results. Before settling on unified diffs, -I explored many other approaches. -These efforts included prompts about being tireless and diligent, -use of OpenAI's function/tool calling capabilities and numerous variations on -aider's existing editing formats, line number formats and other diff-like formats. +I explored many other approaches including: +prompts about being tireless and diligent, +OpenAI's function/tool calling capabilities, +numerous variations on aider's existing editing formats, +line number based formats +and other diff-like formats. The results shared here reflect -an extensive investigation and a large number of benchmark evaluations of many approaches. +an extensive investigation and benchmark evaluations of many approaches. -The result is aider's new support for a unified diff editing format, -which outperforms other solutions by a wide margin. +Aider's new unified diff editing format +outperforms other solutions by a wide margin. The rest of this article will describe aider's new editing format and refactoring benchmark. -We will discuss some key design decisions, +It will highlight some key design decisions, and evaluate their significance using ablation experiments. @@ -148,7 +150,7 @@ numbers in editing formats, backed up by many quantitative benchmark experiments. You've probably ignored the line numbers in every diff you've seen? -So aider tells GPT not to include them, +So aider tells GPT not to even include them, and just interprets each hunk from the unified diffs as a search and replace operation: @@ -163,8 +165,8 @@ This diff: return ``` -Means we want to search the file for all the -*space* ` ` and *minus* `-` lines from the hunk: +Means we need to search the file for the +*space* ` ` and *minus* `-` lines: ```python def main(args): @@ -173,7 +175,7 @@ def main(args): return ``` -And then replace them with all the *space* ` ` and *plus* `+` lines: +And replace them with the *space* ` ` and *plus* `+` lines: ```python def main(args): @@ -195,7 +197,6 @@ Consider this slightly more complex change, which renames the variable `n` to @@ ... @@ -def factorial(n): +def factorial(number): - "compute factorial" - if n == 0: + if number == 0: return 1 @@ -212,13 +213,11 @@ but it is much easier to see two different coherent versions of the ```diff @@ ... @@ -def factorial(n): -- "compute factorial" - if n == 0: - return 1 - else: - return n * factorial(n-1) +def factorial(number): -+ "compute factorial" + if number == 0: + return 1 + else: From f3b03d68fda90142792410ea0bf9bf00604b31cc Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Sun, 17 Dec 2023 19:00:45 -0800 Subject: [PATCH 24/39] copy --- docs/unified-diffs.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index a083b5de7..640791eeb 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -65,11 +65,11 @@ GPT-4 code editing format: - FLEXIBLE - Strive to be maximally flexible when interpreting GPT's edit instructions. A helpful shortcut here is to have empathy for GPT, and imagine you -are the one being tasked with specifying code edits. +are the one being asked to specify code edits. Would you want to hand type a properly escaped json data structure to invoke surgical insert, delete, replace operations on specific code line numbers? -Would it be ok to -trigger an error and be forced to start over +How would you feel about +errors firing after any typo, off-by-one line number or flubbed escape sequence? GPT is quantitatively better at code editing when you reduce the @@ -139,7 +139,6 @@ On the other hand, the core of the unified diff format is very simple. You include a hunk of the file that needs to be changed, with every line prefixed by a character to indicate unchanged, new or deleted lines. - A unified diff looks pretty much like the code it is modifying. The one complicated piece is the line numbers found at the start @@ -150,7 +149,7 @@ numbers in editing formats, backed up by many quantitative benchmark experiments. You've probably ignored the line numbers in every diff you've seen? -So aider tells GPT not to even include them, +Aider tells GPT not to include them, and just interprets each hunk from the unified diffs as a search and replace operation: From c0c3ef870121146dd20d123f3e6b719db42de64b Mon Sep 17 00:00:00 2001 From: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Date: Sun, 17 Dec 2023 19:12:21 -0800 Subject: [PATCH 25/39] Update unified-diffs.md --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 640791eeb..d4f993756 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -11,7 +11,7 @@ This massively reduces GPT-4 Turbo's bad habit of "lazy" coding, where it writes half completed code filled with comments like "...add logic here...". -Aider's also has a new benchmarking suite +Aider also has a new benchmarking suite designed to both provoke and quantify lazy coding. It consists of 39 python refactoring tasks, From 4ece24e8ff29b64128f107864c9f0d8f7cf15f9a Mon Sep 17 00:00:00 2001 From: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Date: Sun, 17 Dec 2023 19:17:01 -0800 Subject: [PATCH 26/39] Update unified-diffs.md --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index d4f993756..8400a3a35 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -78,7 +78,7 @@ and flexible editing format. ### Choose a familiar editing format -Unified diffs are perhaps the most commonly used format for showing +Unified diffs are perhaps the most common way to show code edits, because it's the default output format of `git diff`: From f0cd7a6c4c32afce2c86d051cca3142bf55de7dd Mon Sep 17 00:00:00 2001 From: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Date: Sun, 17 Dec 2023 19:20:40 -0800 Subject: [PATCH 27/39] Update unified-diffs.md --- docs/unified-diffs.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 8400a3a35..1d172a7f7 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -104,8 +104,8 @@ usually intended to be consumed by the program. They need to *accurately* reflect the original and updated file contents, otherwise the patch command will fail. -Having GPT specify changes in a well-known format that is usually consumed by a -fairly rigid program like patch +Having GPT specify changes in a format that is usually consumed by a +rigid program like patch seems to encourage rigor. GPT is less likely to leave informal editing instructions in comments @@ -119,11 +119,11 @@ not talking to a person. Aider's [previous benchmark results](https://aider.chat/docs/benchmarks.html) made it clear that simple editing formats -work much better than complex ones. +work best. Even though OpenAI provides extensive support for structured formats like json and function calls, GPT is worse at editing code if you use them. -I repeated these and many other similar benchmarks against GPT-4 Turbo, +I repeated these and other similar benchmarks against GPT-4 Turbo, and again reached these same conclusions. Informally, this is probably because stuffing *source code* into JSON is complicated From 1fa67f0aefeb222f3fea5dc3f75524cd2d352398 Mon Sep 17 00:00:00 2001 From: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Date: Mon, 18 Dec 2023 06:11:31 -0800 Subject: [PATCH 28/39] Update unified-diffs.md --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 1d172a7f7..53b153ee7 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -247,7 +247,7 @@ help: GPT frequently makes imperfect diffs that won't apply cleanly. They exhibit a variety of problems: -- GPT forgets to include semantically irrelevant details. Often GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. +- GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. - GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already there. - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. From 0de715461a66549ec1bd60e9006d34490a7e2226 Mon Sep 17 00:00:00 2001 From: paul-gauthier <69695708+paul-gauthier@users.noreply.github.com> Date: Mon, 18 Dec 2023 06:16:13 -0800 Subject: [PATCH 29/39] Update unified-diffs.md --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 53b153ee7..a67c7e114 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -299,7 +299,7 @@ radically increases the number of hunks which fail to apply. **Experiments where flexible patching is disabled show**: - **GPT-4 Turbo's performance drops from 65% down to 56%** on the refactoring benchmark. -- **We see a 9X increase in editing errors** on aider's original Exercism benchmark. +- **A 9X increase in editing errors** on aider's original Exercism benchmark. ## Refactoring benchmark From 6ab2db192cc5c98309e6556fb686d9407041859d Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 18 Dec 2023 09:53:28 -0800 Subject: [PATCH 30/39] Added udiff graph --- assets/benchmarks-udiff.svg | 1751 +++++++++++++++++++++++++++++++++++ benchmark/benchmark.py | 92 +- docs/unified-diffs.md | 18 +- 3 files changed, 1847 insertions(+), 14 deletions(-) create mode 100644 assets/benchmarks-udiff.svg diff --git a/assets/benchmarks-udiff.svg b/assets/benchmarks-udiff.svg new file mode 100644 index 000000000..014f04508 --- /dev/null +++ b/assets/benchmarks-udiff.svg @@ -0,0 +1,1751 @@ + + + + + + + + 2023-12-18T09:51:00.014416 + image/svg+xml + + + Matplotlib v3.8.2, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 9f934dc93..2eca49100 100755 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -77,6 +77,9 @@ def show_stats(dirnames, graphs): elif row.model.startswith(gpt4): row.model = gpt4 + "\n" + row.model[len(gpt4) :] + if "folk" in row.dir_name: + row.edit_format = "folk" + # if row.model == "gpt-4\n-1106-preview": # row.model += "\n(preliminary)" @@ -116,15 +119,16 @@ def show_stats(dirnames, graphs): # use the average in the main bar rows[repeat_row]["pass_rate_2"] = repeat_avg else: - repeat_hi = repeat_lo = repeat_avg = None + repeat_hi = repeat_lo = repeat_avg = None # noqa: F841 df = pd.DataFrame.from_records(rows) df.sort_values(by=["model", "edit_format"], inplace=True) # dump(df) if graphs: - plot_timing(df) - plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) + # plot_timing(df) + # plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg) + plot_refactoring(df) def plot_timing(df): @@ -283,6 +287,88 @@ def plot_outcomes(df, repeats, repeat_hi, repeat_lo, repeat_avg): # df.to_csv("tmp.benchmarks.csv") +def plot_refactoring(df): + tries = [df.groupby(["model", "edit_format"])["pass_rate_1"].mean()] + + plt.rcParams["hatch.linewidth"] = 0.5 + plt.rcParams["hatch.color"] = "#444444" + + from matplotlib import rc + + rc("font", **{"family": "sans-serif", "sans-serif": ["Helvetica"], "size": 10}) + + fig, ax = plt.subplots(figsize=(6, 4)) + ax.grid(axis="y", zorder=0, lw=0.2) + + zorder = 1 + for grouped in tries: + zorder += 1 + df = grouped.unstack() + num_models, num_formats = df.shape + + pos = np.array(range(num_models)) + width = 0.8 / num_formats + + formats = df.columns + models = df.index + + for i, fmt in enumerate(formats): + hatch = "" + + if fmt == "diff": + color = "#b3e6a8" + label = "Baseline (search/replace blocks)" + elif fmt == "udiff": + color = "#b3d1e6" + label = "Unified diffs" + elif fmt == "folk": + label = "Folk remedy prompt (blind, no hands, ...)" + color = "#b3e6a8" + hatch = "////" + + if zorder > 1: + edge = dict( + edgecolor="#ffffff", + linewidth=1.5, + ) + else: + edge = dict() + if zorder == 2: + edge["label"] = label + + rects = ax.bar( + pos + i * width, + df[fmt], + width * 0.95, + color=color, + hatch=hatch, + zorder=zorder, + **edge, + ) + + if zorder == 2: + ax.bar_label(rects, padding=4, labels=[f"{v:.0f}%" for v in df[fmt]], size=6) + + ax.set_xticks([p + 0.5 * width for p in pos]) + ax.set_xticklabels(models) + + ax.set_ylabel("Percent of exercises completed successfully") + # ax.set_xlabel("Model") + ax.set_title('Refactoring "Laziness" Benchmark\n(percent coding tasks correct)') + ax.legend( + title="Edit Format", + loc="upper left", + # bbox_to_anchor=(0.95, 0.95), + ) + ax.set_ylim(top=100) + + plt.tight_layout() + plt.savefig("tmp.svg") + imgcat(fig) + + # df.to_csv("tmp.benchmarks.csv") + + def resolve_dirname(dirname, use_single_prior, make_new): if len(dirname.parts) > 1: return dirname diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index a67c7e114..64e961e9c 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -1,7 +1,7 @@ # Fixing GPT-4 Turbo laziness with unified diffs -![robot flowchart](../assets/udiffs.jpg) +![robot flowchart](../assets/benchmarks-udiff.svg) Aider now asks GPT-4 Turbo to use @@ -15,10 +15,8 @@ Aider also has a new benchmarking suite designed to both provoke and quantify lazy coding. It consists of 39 python refactoring tasks, -which ask GPT to remove a non-trivial method from a class and make it -a stand alone function. -GPT-4 Turbo is prone to being lazy on this sort of task, -often leaving comments like +which tend to make GPT-4 Turbo very lazy, +often resulting in comments like "...include the original method body...". This new laziness benchmark produced the following results with `gpt-4-1106-preview`: @@ -56,8 +54,8 @@ and evaluate their significance using ablation experiments. ## Unified diff editing format The design and implementation of aider's new unified diff editing format -helped clarify some general principles, which I think are applicable to any effective -GPT-4 code editing format: +helped clarify some general principles +for GPT-4 code editing: - FAMILIAR - Choose an edit format that GPT is already familiar with. - SIMPLE - Choose a simple format that avoids escaping, syntactic overhead and brittle specifiers like line numbers or line counts. @@ -68,9 +66,7 @@ A helpful shortcut here is to have empathy for GPT, and imagine you are the one being asked to specify code edits. Would you want to hand type a properly escaped json data structure to invoke surgical insert, delete, replace operations on specific code line numbers? -How would you feel about -errors firing -after any typo, off-by-one line number or flubbed escape sequence? +How would you feel about any mistake causing all your work to be discarded? GPT is quantitatively better at code editing when you reduce the burden of formatting edits by using a familiar, simple, high level @@ -93,7 +89,7 @@ default output format of `git diff`: return ``` -Choosing such a familiar, popular output format means that GPT has +Choosing such a popular output format means that GPT has seen *many* examples in its training data. It's been trained to generate text that conforms to the unified diff syntax. From 7824faed64dfd2300555a94c5cb2c35106f20533 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 18 Dec 2023 09:54:51 -0800 Subject: [PATCH 31/39] space lines --- docs/unified-diffs.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index 64e961e9c..25ed0d595 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -161,7 +161,7 @@ This diff: ``` Means we need to search the file for the -*space* ` ` and *minus* `-` lines: +*space* and *minus* `-` lines: ```python def main(args): @@ -170,7 +170,7 @@ def main(args): return ``` -And replace them with the *space* ` ` and *plus* `+` lines: +And replace them with the *space* and *plus* `+` lines: ```python def main(args): @@ -244,7 +244,7 @@ GPT frequently makes imperfect diffs that won't apply cleanly. They exhibit a variety of problems: - GPT forgets things like comments, docstrings, blank lines, etc. Or it skips over some code that it doesn't intend to change. -- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* ` ` as if they were already there. +- GPT forgets the leading *plus* `+` character to mark novel lines that it wants to add to the file. It incorrectly includes them with a leading *space* as if they were already there. - GPT jumps ahead to show edits to a different part of the file without starting a new hunk with a `@@ ... @@` divider. As an example of the first issue, consider this source code: @@ -282,10 +282,10 @@ Aider tries to be very flexible when applying diffs, in order to handle defects. If a hunk doesn't apply cleanly, aider uses a number of strategies: -- Normalize the hunk, by taking the *minus* `-` and *space* ` ` lines as one version of the hunk and the *space* ` ` and *plus* `+` lines as a second version and doing an actual unified diff on them. -- Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* ` ` lines back against the original file. +- Normalize the hunk, by taking the *minus* `-` and *space* lines as one version of the hunk and the *space* and *plus* `+` lines as a second version and doing an actual unified diff on them. +- Try and discover new lines that GPT is trying to add but which it forgot to mark with *plus* `+` markers. This is done by diffing the *minus* `-` and *space* lines back against the original file. - Break a large hunk apart into an overlapping sequence of smaller hunks, which each contain only one contiguous run of *plus* `+` and *minus* `-` lines. Try and apply each of these sub-hunks independently. -- Vary the size and offset of the "context window" of *space* ` ` lines from the hunk that are used to localize the edit to a specific part of the file. +- Vary the size and offset of the "context window" of *space* lines from the hunk that are used to localize the edit to a specific part of the file. - Combine the above mechanisms to progressively become more permissive about how to apply the hunk. These flexible patching strategies are critical, and From 16534e914b256d4aae7777c656aa0c023386f273 Mon Sep 17 00:00:00 2001 From: Paul Gauthier Date: Mon, 18 Dec 2023 10:02:44 -0800 Subject: [PATCH 32/39] better graph --- assets/benchmarks-udiff.svg | 1147 +++++++++++++++++------------------ benchmark/benchmark.py | 23 +- 2 files changed, 580 insertions(+), 590 deletions(-) diff --git a/assets/benchmarks-udiff.svg b/assets/benchmarks-udiff.svg index 014f04508..92843d4a7 100644 --- a/assets/benchmarks-udiff.svg +++ b/assets/benchmarks-udiff.svg @@ -6,7 +6,7 @@ - 2023-12-18T09:51:00.014416 + 2023-12-18T10:01:24.059094 image/svg+xml @@ -30,8 +30,8 @@ z - - - + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + - - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + @@ -405,41 +799,41 @@ z - + - - + - + - + - + - + - + - + - + @@ -493,18 +887,18 @@ L 421.2 165.3552 - + - + - + @@ -512,59 +906,18 @@ L 421.2 123.0128 - + - + - - - - + @@ -574,11 +927,11 @@ z +" clip-path="url(#pdd4163ccce)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -592,7 +945,7 @@ L 421.2 38.328 - + - - - - - - - - - - +" clip-path="url(#pdd4163ccce)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#pdd4163ccce)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd4163ccce)" style="fill: url(#h89df5d58d1); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#pdd4163ccce)" style="fill: url(#h89df5d58d1); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#pdd4163ccce)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#pdd4163ccce)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - - @@ -982,7 +1154,7 @@ L 421.2 38.328 - + - + - + @@ -1083,7 +1255,7 @@ z - + - + @@ -1165,52 +1337,6 @@ L 1184 0 L 563 0 L 563 4591 z -" transform="scale(0.015625)"/> - - - @@ -1352,38 +1444,6 @@ z - - - - @@ -1418,78 +1478,29 @@ z - - - - - - - - - - - - - - - - - - - - - - - + - + - @@ -1560,17 +1545,30 @@ z - +" style="fill: url(#h89df5d58d1); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - - + + + + - @@ -1630,23 +1621,23 @@ z - - - - + + + + - - + - + - + + - + Date: Mon, 18 Dec 2023 10:15:07 -0800 Subject: [PATCH 33/39] copy --- assets/benchmarks-udiff.svg | 257 +++++++++++++++++++----------------- 1 file changed, 133 insertions(+), 124 deletions(-) diff --git a/assets/benchmarks-udiff.svg b/assets/benchmarks-udiff.svg index 92843d4a7..fbe71dcf4 100644 --- a/assets/benchmarks-udiff.svg +++ b/assets/benchmarks-udiff.svg @@ -6,7 +6,7 @@ - 2023-12-18T10:01:24.059094 + 2023-12-18T10:02:21.799701 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -272,8 +272,8 @@ z - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -743,35 +807,6 @@ L 1269 0 L 34 3347 L 688 3347 z -" transform="scale(0.015625)"/> - - @@ -801,16 +836,16 @@ z +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -824,11 +859,11 @@ L -3.5 0 +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -870,11 +905,11 @@ z +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -889,11 +924,11 @@ L 421.2 158.8752 +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -908,11 +943,11 @@ L 421.2 118.6928 +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -927,11 +962,11 @@ L 421.2 78.5104 +" clip-path="url(#pdd132098f0)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -967,32 +1002,6 @@ L 2406 2466 Q 2825 2466 3086 2644 Q 3347 2822 3347 3272 z -" transform="scale(0.015625)"/> - +" clip-path="url(#pdd132098f0)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd132098f0)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd132098f0)" style="fill: url(#h382b296759); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd132098f0)" style="fill: url(#h382b296759); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd132098f0)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#pdd132098f0)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" style="fill: url(#h382b296759); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> @@ -1679,12 +1688,12 @@ z - + - + - 2023-12-18T10:02:21.799701 + 2023-12-18T10:19:23.314287 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -791,7 +791,7 @@ z - + @@ -836,16 +836,16 @@ z +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -859,11 +859,11 @@ L -3.5 0 +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -905,11 +905,11 @@ z +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -924,11 +924,11 @@ L 421.2 158.8752 +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -943,11 +943,11 @@ L 421.2 118.6928 +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -962,11 +962,11 @@ L 421.2 78.5104 +" clip-path="url(#p9ce044dbfc)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1102,7 +1102,7 @@ L 112.912598 239.24 L 112.912598 187.724103 L 64.782273 187.724103 z -" clip-path="url(#pdd132098f0)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: url(#hecc1b38813); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: url(#hecc1b38813); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p9ce044dbfc)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - @@ -1554,16 +1554,16 @@ z - +" style="fill: url(#hecc1b38813); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - + + + @@ -1630,23 +1673,34 @@ z - - - - + + + + + + + + + + + + + + + - - + + - + Date: Mon, 18 Dec 2023 10:20:40 -0800 Subject: [PATCH 35/39] copy --- assets/benchmarks-udiff.svg | 186 ++++++++++++++++-------------------- benchmark/benchmark.py | 2 +- 2 files changed, 83 insertions(+), 105 deletions(-) diff --git a/assets/benchmarks-udiff.svg b/assets/benchmarks-udiff.svg index be042806b..04fd2bf60 100644 --- a/assets/benchmarks-udiff.svg +++ b/assets/benchmarks-udiff.svg @@ -6,7 +6,7 @@ - 2023-12-18T10:19:23.314287 + 2023-12-18T10:20:15.197695 image/svg+xml @@ -41,12 +41,12 @@ z - - + @@ -791,7 +791,7 @@ z - + @@ -836,16 +836,16 @@ z +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - - + @@ -859,11 +859,11 @@ L -3.5 0 +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -905,11 +905,11 @@ z +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -924,11 +924,11 @@ L 421.2 158.8752 +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -943,11 +943,11 @@ L 421.2 118.6928 +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -962,11 +962,11 @@ L 421.2 78.5104 +" clip-path="url(#p33f0efc2c4)" style="fill: none; stroke: #b0b0b0; stroke-width: 0.2; stroke-linecap: square"/> - + @@ -1102,7 +1102,7 @@ L 112.912598 239.24 L 112.912598 187.724103 L 64.782273 187.724103 z -" clip-path="url(#p9ce044dbfc)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: url(#hce5931e246); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: url(#hce5931e246); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p33f0efc2c4)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - @@ -1559,25 +1559,12 @@ L 76.81 69.457687 L 76.81 62.457687 L 56.81 62.457687 z -" style="fill: url(#hecc1b38813); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> +" style="fill: url(#hce5931e246); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - + Date: Mon, 18 Dec 2023 10:29:34 -0800 Subject: [PATCH 36/39] copy --- assets/benchmarks-udiff.svg | 1316 +++++++++++++++-------------------- 1 file changed, 568 insertions(+), 748 deletions(-) diff --git a/assets/benchmarks-udiff.svg b/assets/benchmarks-udiff.svg index 04fd2bf60..c2b3dda8a 100644 --- a/assets/benchmarks-udiff.svg +++ b/assets/benchmarks-udiff.svg @@ -6,7 +6,7 @@ - 2023-12-18T10:20:15.197695 + 2023-12-18T10:29:22.506290 image/svg+xml @@ -30,8 +30,8 @@ z - - - + - - + + + - - + + + @@ -267,29 +320,145 @@ z - - - - + + + + + + + + + + + + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +" clip-path="url(#p74111aa2fb)" style="fill: #b3e6a8; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#p74111aa2fb)" style="fill: url(#h3ccb2da400); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - +" clip-path="url(#p74111aa2fb)" style="fill: #b3d1e6; stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - - - - - - - - - - - - + - - + - + - - - + + + + - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + @@ -1346,6 +1028,52 @@ L 1184 0 L 563 0 L 563 4591 z +" transform="scale(0.015625)"/> + + + @@ -1453,6 +1215,38 @@ z + + + + @@ -1486,7 +1280,7 @@ z - + - + - + @@ -1517,6 +1311,32 @@ L 475 0 L 0 0 L 1456 4591 z +" transform="scale(0.015625)"/> + @@ -1553,15 +1373,15 @@ z - + +" style="fill: url(#h3ccb2da400); stroke: #ffffff; stroke-width: 1.5; stroke-linejoin: miter"/> - + @@ -1668,7 +1488,7 @@ z - + - + @@ -1720,12 +1540,12 @@ z - - + + - + Date: Mon, 18 Dec 2023 12:24:43 -0800 Subject: [PATCH 39/39] Update unified-diffs.md --- docs/unified-diffs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/unified-diffs.md b/docs/unified-diffs.md index fe28fd8f5..015d87e4e 100644 --- a/docs/unified-diffs.md +++ b/docs/unified-diffs.md @@ -22,7 +22,7 @@ often resulting in comments like This new laziness benchmark produced the following results with `gpt-4-1106-preview`: - **GPT-4 Turbo only scored 15% as a baseline** using aider's existing "SEARCH/REPLACE block" edit format. -- **Aider's new unified diff edit format raised the score to 65%**. +- **Aider's new unified diff edit format raised the score to 62%**. - **No benefit from the user being blind, without hands, tipping $2000 or fearing truncated code trauma.** These widely circulated folk remedies performed no better than baseline when added to the system prompt with aider's SEARCH/REPLACE edit format. Including *all* of them still only scored at 15% The older `gpt-4-0613` also did better on the laziness benchmark using unified diffs: