- dirname: 2024-05-01-20-05-59--direct-opus-filenames-outside-fence test_cases: 133 model: claude-3-opus-20240229 released: 2024-02-29 edit_format: diff commit_hash: f4b1797-dirty, f4b1797 pass_rate_1: 53.4 pass_rate_2: 68.4 percent_cases_well_formed: 100.0 error_outputs: 2 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 1 command: aider --opus date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 32.4 total_cost: 13.8395 - dirname: 2024-03-06-16-42-00--claude3-sonnet-whole test_cases: 133 model: claude-3-sonnet-20240229 released: 2024-02-29 edit_format: whole commit_hash: a5f8076-dirty pass_rate_1: 43.6 pass_rate_2: 54.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 1 syntax_errors: 2 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --sonnet date: 2024-03-06 versions: 0.25.1-dev seconds_per_case: 23.1 total_cost: 0.0000 - dirname: 2024-05-03-20-47-24--gemini-1.5-pro-diff-fenced test_cases: 133 model: gemini-1.5-pro-latest edit_format: diff-fenced commit_hash: 3a48dfb, 5d32dd7 pass_rate_1: 45.9 pass_rate_2: 57.1 percent_cases_well_formed: 87.2 error_outputs: 60 num_malformed_responses: 17 user_asks: 3 lazy_comments: 0 syntax_errors: 8 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gemini/gemini-1.5-pro-latest date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 21.3 total_cost: 0.0000 - dirname: 2024-05-08-20-59-15--may-gpt-3.5-turbo-whole test_cases: 133 model: gpt-3.5-turbo-0125 released: 2024-01-25 edit_format: whole commit_hash: 1d55f74 pass_rate_1: 41.4 pass_rate_2: 50.4 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 4 command: aider -3 date: 2024-05-08 versions: 0.33.1-dev seconds_per_case: 6.5 total_cost: 0.5032 - dirname: 2023-11-06-21-23-59--gpt-3.5-turbo-0301 test_cases: 133 model: gpt-3.5-turbo-0301 released: 2023-03-01 edit_format: whole commit_hash: 44388db-dirty pass_rate_1: 50.4 pass_rate_2: 57.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 8 command: aider --model gpt-3.5-turbo-0301 date: 2023-11-06 versions: 0.16.4-dev seconds_per_case: 6.5 total_cost: 0.4822 - dirname: 2023-11-07-02-41-07--gpt-3.5-turbo-0613 test_cases: 133 model: gpt-3.5-turbo-0613 released: 2023-06-13 edit_format: whole commit_hash: 93aa497-dirty pass_rate_1: 38.3 pass_rate_2: 50.4 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 5 command: aider --model gpt-3.5-turbo-0613 date: 2023-11-07 versions: 0.16.4-dev seconds_per_case: 18.0 total_cost: 0.5366 - dirname: 2024-04-30-21-40-51--litellm-gpt-3.5-turbo-1106-again test_cases: 132 model: gpt-3.5-turbo-1106 edit_format: whole commit_hash: 7b14d77 pass_rate_1: 45.5 pass_rate_2: 56.1 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 19 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model gpt-3.5-turbo-1106 date: 2024-04-30 versions: 0.30.2-dev seconds_per_case: 5.3 total_cost: 0.3261 - dirname: 2024-01-25-23-37-15--jan-exercism-gpt-4-0125-preview-udiff test_cases: 133 model: gpt-4-0125-preview released: 2024-01-25 edit_format: udiff commit_hash: edcf9b1 pass_rate_1: 55.6 pass_rate_2: 66.2 percent_cases_well_formed: 97.7 error_outputs: 6 num_malformed_responses: 3 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 4 command: aider --model gpt-4-0125-preview date: 2024-01-25 versions: 0.22.1-dev seconds_per_case: 44.8 total_cost: 14.6428 - dirname: 2024-05-04-15-07-30--redo-gpt-4-0314-diff-reminder-rules test_cases: 133 model: gpt-4-0314 released: 2023-03-14 edit_format: diff commit_hash: 0d43468 pass_rate_1: 50.4 pass_rate_2: 66.2 percent_cases_well_formed: 93.2 error_outputs: 28 num_malformed_responses: 9 user_asks: 1 lazy_comments: 3 syntax_errors: 9 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gpt-4-0314 date: 2024-05-04 versions: 0.31.2-dev seconds_per_case: 19.8 total_cost: 16.2689 - dirname: 2023-12-16-21-24-28--editblock-gpt-4-0613-actual-main test_cases: 133 model: gpt-4-0613 released: 2023-06-13 edit_format: diff commit_hash: 3aa17c4 pass_rate_1: 46.6 pass_rate_2: 67.7 percent_cases_well_formed: 100.0 error_outputs: 14 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 command: aider -4 date: 2023-12-16 versions: 0.18.2-dev seconds_per_case: 33.6 total_cost: 17.4657 - dirname: 2024-05-08-21-16-03--may-gpt-4-1106-preview-udiff test_cases: 133 model: gpt-4-1106-preview released: 2023-11-06 edit_format: udiff commit_hash: 87664dc pass_rate_1: 51.9 pass_rate_2: 65.4 percent_cases_well_formed: 92.5 error_outputs: 30 num_malformed_responses: 10 user_asks: 0 lazy_comments: 3 syntax_errors: 11 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model gpt-4-1106-preview date: 2024-05-08 versions: 0.33.1-dev seconds_per_case: 20.4 total_cost: 6.6061 - dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples test_cases: 133 model: gpt-4-turbo-2024-04-09 (udiff) released: 2024-04-09 edit_format: udiff commit_hash: e610e5b-dirty pass_rate_1: 48.1 pass_rate_2: 63.9 percent_cases_well_formed: 97.0 error_outputs: 12 num_malformed_responses: 4 user_asks: 0 lazy_comments: 0 syntax_errors: 4 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --gpt-4-turbo date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 22.8 total_cost: 6.3337 - dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg test_cases: 132 model: llama3-70b-8192 released: 2024-04-18 edit_format: diff commit_hash: b5bb453 pass_rate_1: 38.6 pass_rate_2: 49.2 percent_cases_well_formed: 73.5 error_outputs: 105 num_malformed_responses: 35 user_asks: 0 lazy_comments: 0 syntax_errors: 1 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model groq/llama3-70b-8192 date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 14.5 total_cost: 0.4311 - dirname: 2024-05-06-18-31-08--command-r-plus-whole-final test_cases: 133 model: command-r-plus released: 2024-04-04 edit_format: whole commit_hash: fc3a43e-dirty pass_rate_1: 21.8 pass_rate_2: 31.6 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 1 syntax_errors: 5 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --model command-r-plus date: 2024-05-06 versions: 0.31.2-dev seconds_per_case: 22.9 total_cost: 2.7494 - dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole test_cases: 133 model: deepseek-chat v2 (whole) edit_format: whole commit_hash: b1cae73, db994fb pass_rate_1: 50.4 pass_rate_2: 60.2 percent_cases_well_formed: 100.0 error_outputs: 3 num_malformed_responses: 0 user_asks: 3 lazy_comments: 13 syntax_errors: 0 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model deepseek/deepseek-chat --edit-format whole date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 42.4 total_cost: 0.0000 - dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2 test_cases: 133 model: deepseek-chat v2 (diff) released: 2024-05-06 edit_format: diff commit_hash: 80a3f6d pass_rate_1: 44.4 pass_rate_2: 60.9 percent_cases_well_formed: 97.0 error_outputs: 14 num_malformed_responses: 4 user_asks: 2 lazy_comments: 0 syntax_errors: 13 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model deepseek/deepseek-chat date: 2024-05-09 versions: 0.33.1-dev seconds_per_case: 86.8 total_cost: 0.0941 - dirname: 2024-05-07-20-32-37--qwen1.5-110b-chat-whole test_cases: 133 model: qwen1.5-110b-chat released: 2024-02-04 edit_format: whole commit_hash: 70b1c0c pass_rate_1: 30.8 pass_rate_2: 37.6 percent_cases_well_formed: 100.0 error_outputs: 3 num_malformed_responses: 0 user_asks: 3 lazy_comments: 20 syntax_errors: 0 indentation_errors: 6 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model together_ai/qwen/qwen1.5-110b-chat date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 46.9 total_cost: 0.0000 - dirname: 2024-05-07-20-57-04--wizardlm-2-8x22b-whole test_cases: 133 model: WizardLM-2 8x22B edit_format: whole commit_hash: 8e272bf, bbe8639 pass_rate_1: 27.8 pass_rate_2: 44.4 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 1 syntax_errors: 2 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model openrouter/microsoft/wizardlm-2-8x22b date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 36.6 total_cost: 0.0000 - dirname: 2024-05-13-17-39-05--gpt-4o-diff test_cases: 133 model: gpt-4o released: 2024-05-13 edit_format: diff commit_hash: b6cd852 pass_rate_1: 60.2 pass_rate_2: 72.9 percent_cases_well_formed: 96.2 error_outputs: 103 num_malformed_responses: 5 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider date: 2024-05-13 versions: 0.34.1-dev seconds_per_case: 6.0 total_cost: 0.0000 - dirname: 2024-04-12-22-18-20--gpt-4-turbo-2024-04-09-plain-diff test_cases: 33 model: gpt-4-turbo-2024-04-09 (diff) edit_format: diff commit_hash: 9b2e697-dirty pass_rate_1: 48.5 pass_rate_2: 57.6 percent_cases_well_formed: 100.0 error_outputs: 15 num_malformed_responses: 0 user_asks: 15 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model gpt-4-turbo-2024-04-09 date: 2024-04-12 versions: 0.28.1-dev seconds_per_case: 17.6 total_cost: 1.6205 - dirname: 2024-06-08-22-37-55--qwen2-72b-instruct-whole test_cases: 133 model: Qwen2 72B Instruct edit_format: whole commit_hash: 02c7335-dirty, 1a97498-dirty pass_rate_1: 44.4 pass_rate_2: 55.6 percent_cases_well_formed: 100.0 error_outputs: 3 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 3 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model together_ai/qwen/Qwen2-72B-Instruct date: 2024-06-08 versions: 0.37.1-dev seconds_per_case: 14.3 total_cost: 0.0000 - dirname: 2024-06-08-23-45-41--gemini-1.5-flash-latest-whole test_cases: 133 model: gemini-1.5-flash-latest edit_format: whole commit_hash: 86ea47f-dirty pass_rate_1: 33.8 pass_rate_2: 44.4 percent_cases_well_formed: 100.0 error_outputs: 16 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 12 lazy_comments: 0 syntax_errors: 9 indentation_errors: 1 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gemini/gemini-1.5-flash-latest date: 2024-06-08 versions: 0.37.1-dev seconds_per_case: 7.2 total_cost: 0.0000 - dirname: 2024-06-09-03-28-21--codestral-whole test_cases: 133 model: codestral-2405 edit_format: whole commit_hash: effc88a pass_rate_1: 35.3 pass_rate_2: 51.1 percent_cases_well_formed: 100.0 error_outputs: 4 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 4 lazy_comments: 1 syntax_errors: 0 indentation_errors: 1 exhausted_context_windows: 0 test_timeouts: 4 command: aider --model mistral/codestral-2405 date: 2024-06-09 versions: 0.37.1-dev seconds_per_case: 7.5 total_cost: 0.6805 - dirname: 2024-06-08-19-25-26--codeqwen:7b-chat-v1.5-q8_0-whole test_cases: 133 model: codeqwen:7b-chat-v1.5-q8_0 edit_format: whole commit_hash: be0520f-dirty pass_rate_1: 32.3 pass_rate_2: 34.6 percent_cases_well_formed: 100.0 error_outputs: 8 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 8 lazy_comments: 0 syntax_errors: 1 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model ollama/codeqwen:7b-chat-v1.5-q8_0 date: 2024-06-08 versions: 0.37.1-dev seconds_per_case: 15.6 total_cost: 0.0000 - dirname: 2024-06-08-16-12-31--codestral:22b-v0.1-q8_0-whole test_cases: 133 model: codestral:22b-v0.1-q8_0 edit_format: whole commit_hash: be0520f-dirty pass_rate_1: 35.3 pass_rate_2: 48.1 percent_cases_well_formed: 100.0 error_outputs: 8 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 8 lazy_comments: 2 syntax_errors: 0 indentation_errors: 1 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model ollama/codestral:22b-v0.1-q8_0 date: 2024-06-08 versions: 0.37.1-dev seconds_per_case: 46.4 total_cost: 0.0000 - dirname: 2024-06-08-17-54-04--qwen2:72b-instruct-q8_0-whole test_cases: 133 model: qwen2:72b-instruct-q8_0 edit_format: whole commit_hash: 74e51d5-dirty pass_rate_1: 43.6 pass_rate_2: 49.6 percent_cases_well_formed: 100.0 error_outputs: 27 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 27 lazy_comments: 0 syntax_errors: 5 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model ollama/qwen2:72b-instruct-q8_0 date: 2024-06-08 versions: 0.37.1-dev seconds_per_case: 280.6 total_cost: 0.0000 - dirname: 2024-06-20-15-09-26--claude-3.5-sonnet-whole test_cases: 133 model: claude-3.5-sonnet (whole) edit_format: whole commit_hash: 068609e pass_rate_1: 61.7 pass_rate_2: 78.2 percent_cases_well_formed: 100.0 error_outputs: 4 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 2 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model openrouter/anthropic/claude-3.5-sonnet --edit-format whole date: 2024-06-20 versions: 0.38.1-dev seconds_per_case: 15.4 total_cost: 0.0000 - dirname: 2024-06-20-15-16-41--claude-3.5-sonnet-diff test_cases: 133 model: claude-3.5-sonnet (diff) edit_format: diff commit_hash: 068609e-dirty pass_rate_1: 57.9 pass_rate_2: 74.4 percent_cases_well_formed: 97.0 error_outputs: 48 num_malformed_responses: 11 num_with_malformed_responses: 4 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model openrouter/anthropic/claude-3.5-sonnet date: 2024-06-20 versions: 0.38.1-dev seconds_per_case: 21.6 total_cost: 0.0000 - dirname: 2024-06-17-14-45-54--deepseek-coder2-whole test_cases: 133 model: DeepSeek Coder V2 (whole) edit_format: whole commit_hash: ca8672b pass_rate_1: 63.9 pass_rate_2: 75.2 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 num_with_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 1 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --model deepseek/deepseek-coder date: 2024-06-17 versions: 0.38.1-dev seconds_per_case: 21.1 total_cost: 0.0537 - dirname: 2024-06-21-15-29-08--deepseek-coder2-diff-again3 test_cases: 133 model: DeepSeek Coder V2 (diff) edit_format: diff commit_hash: 515ab3e pass_rate_1: 58.6 pass_rate_2: 66.2 percent_cases_well_formed: 98.5 error_outputs: 23 num_malformed_responses: 5 num_with_malformed_responses: 2 user_asks: 2 lazy_comments: 0 syntax_errors: 0 indentation_errors: 1 exhausted_context_windows: 0 test_timeouts: 2 command: aider --model deepseek/deepseek-coder date: 2024-06-21 versions: 0.39.1-dev seconds_per_case: 30.2 total_cost: 0.0857