- dirname: 2024-05-01-20-05-59--direct-opus-filenames-outside-fence test_cases: 133 model: claude-3-opus-20240229 released: 2024-02-29 edit_format: diff commit_hash: f4b1797-dirty, f4b1797 pass_rate_1: 53.4 pass_rate_2: 68.4 percent_cases_well_formed: 100.0 error_outputs: 2 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 1 command: aider --opus date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 32.4 total_cost: 13.8395 - dirname: 2024-03-06-16-42-00--claude3-sonnet-whole test_cases: 133 model: claude-3-sonnet-20240229 released: 2024-02-29 edit_format: whole commit_hash: a5f8076-dirty pass_rate_1: 43.6 pass_rate_2: 54.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 1 syntax_errors: 2 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --sonnet date: 2024-03-06 versions: 0.25.1-dev seconds_per_case: 23.1 total_cost: 0.0000 - dirname: 2024-04-29-19-17-28--deepseek-coder-whole test_cases: 132 model: deepseek-coder released: 2024-01-25 edit_format: whole commit_hash: c07f793-dirty pass_rate_1: 47.0 pass_rate_2: 54.5 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 2 syntax_errors: 13 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 command: aider --model deepseek/deepseek-coder date: 2024-04-29 versions: 0.30.2-dev seconds_per_case: 26.7 total_cost: 0.0000 - dirname: 2024-05-03-20-47-24--gemini-1.5-pro-diff-fenced test_cases: 133 model: gemini-1.5-pro-latest edit_format: diff-fenced commit_hash: 3a48dfb, 5d32dd7 pass_rate_1: 45.9 pass_rate_2: 57.1 percent_cases_well_formed: 87.2 error_outputs: 60 num_malformed_responses: 17 user_asks: 3 lazy_comments: 0 syntax_errors: 8 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gemini/gemini-1.5-pro-latest date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 21.3 total_cost: 0.0000 - dirname: 2024-05-08-20-59-15--may-gpt-3.5-turbo-whole test_cases: 133 model: gpt-3.5-turbo-0125 released: 2024-01-25 edit_format: whole commit_hash: 1d55f74 pass_rate_1: 41.4 pass_rate_2: 50.4 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 4 command: aider -3 date: 2024-05-08 versions: 0.33.1-dev seconds_per_case: 6.5 total_cost: 0.5032 - dirname: 2023-11-06-21-23-59--gpt-3.5-turbo-0301 test_cases: 133 model: gpt-3.5-turbo-0301 released: 2023-03-01 edit_format: whole commit_hash: 44388db-dirty pass_rate_1: 50.4 pass_rate_2: 57.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 8 command: aider --model gpt-3.5-turbo-0301 date: 2023-11-06 versions: 0.16.4-dev seconds_per_case: 6.5 total_cost: 0.4822 - dirname: 2023-11-07-02-41-07--gpt-3.5-turbo-0613 test_cases: 133 model: gpt-3.5-turbo-0613 released: 2023-06-13 edit_format: whole commit_hash: 93aa497-dirty pass_rate_1: 38.3 pass_rate_2: 50.4 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 5 command: aider --model gpt-3.5-turbo-0613 date: 2023-11-07 versions: 0.16.4-dev seconds_per_case: 18.0 total_cost: 0.5366 - dirname: 2024-04-30-21-40-51--litellm-gpt-3.5-turbo-1106-again test_cases: 132 model: gpt-3.5-turbo-1106 edit_format: whole commit_hash: 7b14d77 pass_rate_1: 45.5 pass_rate_2: 56.1 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 19 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model gpt-3.5-turbo-1106 date: 2024-04-30 versions: 0.30.2-dev seconds_per_case: 5.3 total_cost: 0.3261 - dirname: 2024-01-25-23-37-15--jan-exercism-gpt-4-0125-preview-udiff test_cases: 133 model: gpt-4-0125-preview released: 2024-01-25 edit_format: udiff commit_hash: edcf9b1 pass_rate_1: 55.6 pass_rate_2: 66.2 percent_cases_well_formed: 97.7 error_outputs: 6 num_malformed_responses: 3 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 4 command: aider --model gpt-4-0125-preview date: 2024-01-25 versions: 0.22.1-dev seconds_per_case: 44.8 total_cost: 14.6428 - dirname: 2024-05-04-15-07-30--redo-gpt-4-0314-diff-reminder-rules test_cases: 133 model: gpt-4-0314 released: 2023-03-14 edit_format: diff commit_hash: 0d43468 pass_rate_1: 50.4 pass_rate_2: 66.2 percent_cases_well_formed: 93.2 error_outputs: 28 num_malformed_responses: 9 user_asks: 1 lazy_comments: 3 syntax_errors: 9 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gpt-4-0314 date: 2024-05-04 versions: 0.31.2-dev seconds_per_case: 19.8 total_cost: 16.2689 - dirname: 2023-12-16-21-24-28--editblock-gpt-4-0613-actual-main test_cases: 133 model: gpt-4-0613 released: 2023-06-13 edit_format: diff commit_hash: 3aa17c4 pass_rate_1: 46.6 pass_rate_2: 67.7 percent_cases_well_formed: 100.0 error_outputs: 14 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 command: aider -4 date: 2023-12-16 versions: 0.18.2-dev seconds_per_case: 33.6 total_cost: 17.4657 - dirname: 2024-05-08-21-16-03--may-gpt-4-1106-preview-udiff test_cases: 133 model: gpt-4-1106-preview released: 2023-11-06 edit_format: udiff commit_hash: 87664dc pass_rate_1: 51.9 pass_rate_2: 65.4 percent_cases_well_formed: 92.5 error_outputs: 30 num_malformed_responses: 10 user_asks: 0 lazy_comments: 3 syntax_errors: 11 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model gpt-4-1106-preview date: 2024-05-08 versions: 0.33.1-dev seconds_per_case: 20.4 total_cost: 6.6061 - dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples test_cases: 133 model: gpt-4-turbo-2024-04-09 (udiff) released: 2024-04-09 edit_format: udiff commit_hash: e610e5b-dirty pass_rate_1: 48.1 pass_rate_2: 63.9 percent_cases_well_formed: 97.0 error_outputs: 12 num_malformed_responses: 4 user_asks: 0 lazy_comments: 0 syntax_errors: 4 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --gpt-4-turbo date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 22.8 total_cost: 6.3337 - dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg test_cases: 132 model: llama3-70b-8192 released: 2024-04-18 edit_format: diff commit_hash: b5bb453 pass_rate_1: 38.6 pass_rate_2: 49.2 percent_cases_well_formed: 73.5 error_outputs: 105 num_malformed_responses: 35 user_asks: 0 lazy_comments: 0 syntax_errors: 1 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model groq/llama3-70b-8192 date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 14.5 total_cost: 0.4311 - dirname: 2024-05-06-18-31-08--command-r-plus-whole-final test_cases: 133 model: command-r-plus released: 2024-04-04 edit_format: whole commit_hash: fc3a43e-dirty pass_rate_1: 21.8 pass_rate_2: 31.6 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 1 syntax_errors: 5 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --model command-r-plus date: 2024-05-06 versions: 0.31.2-dev seconds_per_case: 22.9 total_cost: 2.7494 - dirname: 2024-05-07-12-55-06--deepseek-chat-v2-whole test_cases: 133 model: deepseek-chat v2 (whole) edit_format: whole commit_hash: b1cae73, db994fb pass_rate_1: 50.4 pass_rate_2: 60.2 percent_cases_well_formed: 100.0 error_outputs: 3 num_malformed_responses: 0 user_asks: 3 lazy_comments: 13 syntax_errors: 0 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider --model deepseek/deepseek-chat --edit-format whole date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 42.4 total_cost: 0.0000 - dirname: 2024-05-09-18-57-52--deepseek-chat-v2-diff-reverted-and-helpful-assistant2 test_cases: 133 model: deepseek-chat v2 (diff) released: 2024-05-06 edit_format: diff commit_hash: 80a3f6d pass_rate_1: 44.4 pass_rate_2: 60.9 percent_cases_well_formed: 97.0 error_outputs: 14 num_malformed_responses: 4 user_asks: 2 lazy_comments: 0 syntax_errors: 13 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model deepseek/deepseek-chat date: 2024-05-09 versions: 0.33.1-dev seconds_per_case: 86.8 total_cost: 0.0941 - dirname: 2024-05-07-20-32-37--qwen1.5-110b-chat-whole test_cases: 133 model: qwen1.5-110b-chat released: 2024-02-04 edit_format: whole commit_hash: 70b1c0c pass_rate_1: 30.8 pass_rate_2: 37.6 percent_cases_well_formed: 100.0 error_outputs: 3 num_malformed_responses: 0 user_asks: 3 lazy_comments: 20 syntax_errors: 0 indentation_errors: 6 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model together_ai/qwen/qwen1.5-110b-chat date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 46.9 total_cost: 0.0000 - dirname: 2024-05-07-20-57-04--wizardlm-2-8x22b-whole test_cases: 133 model: WizardLM-2 8x22B edit_format: whole commit_hash: 8e272bf, bbe8639 pass_rate_1: 27.8 pass_rate_2: 44.4 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 1 syntax_errors: 2 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model openrouter/microsoft/wizardlm-2-8x22b date: 2024-05-07 versions: 0.31.2-dev seconds_per_case: 36.6 total_cost: 0.0000 - dirname: 2024-05-13-17-39-05--gpt-4o-diff test_cases: 133 model: gpt-4o released: 2024-05-13 edit_format: diff commit_hash: b6cd852 pass_rate_1: 60.2 pass_rate_2: 72.9 percent_cases_well_formed: 96.2 error_outputs: 103 num_malformed_responses: 5 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 1 command: aider date: 2024-05-13 versions: 0.34.1-dev seconds_per_case: 6.0 total_cost: 0.0000 - dirname: 2024-04-12-22-18-20--gpt-4-turbo-2024-04-09-plain-diff test_cases: 33 model: gpt-4-turbo-2024-04-09 (diff) edit_format: diff commit_hash: 9b2e697-dirty pass_rate_1: 48.5 pass_rate_2: 57.6 percent_cases_well_formed: 100.0 error_outputs: 15 num_malformed_responses: 0 user_asks: 15 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model gpt-4-turbo-2024-04-09 date: 2024-04-12 versions: 0.28.1-dev seconds_per_case: 17.6 total_cost: 1.6205