- dirname: 2024-05-01-20-05-59--direct-opus-filenames-outside-fence test_cases: 133 model: claude-3-opus-20240229 edit_format: diff commit_hash: f4b1797-dirty, f4b1797 pass_rate_1: 53.4 pass_rate_2: 68.4 percent_cases_well_formed: 100.0 error_outputs: 2 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 1 command: aider --opus date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 32.4 total_cost: 13.8395 - dirname: 2024-03-06-16-42-00--claude3-sonnet-whole test_cases: 133 model: claude-3-sonnet-20240229 edit_format: whole commit_hash: a5f8076-dirty pass_rate_1: 43.6 pass_rate_2: 54.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 1 syntax_errors: 2 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --sonnet date: 2024-03-06 versions: 0.25.1-dev seconds_per_case: 23.1 total_cost: 0.0000 - dirname: 2024-04-29-19-17-28--deepseek-coder-whole test_cases: 132 model: openai/deepseek-coder edit_format: whole commit_hash: c07f793-dirty pass_rate_1: 47.0 pass_rate_2: 54.5 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 2 syntax_errors: 13 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 command: aider --model openai/deepseek-coder date: 2024-04-29 versions: 0.30.2-dev seconds_per_case: 26.7 total_cost: 0.0000 - dirname: 2024-05-03-20-47-24--gemini-1.5-pro-diff-fenced test_cases: 133 model: gemini/gemini-1.5-pro-latest edit_format: diff-fenced commit_hash: 3a48dfb, 5d32dd7 pass_rate_1: 45.9 pass_rate_2: 57.1 percent_cases_well_formed: 87.2 error_outputs: 60 num_malformed_responses: 17 user_asks: 3 lazy_comments: 0 syntax_errors: 8 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gemini/gemini-1.5-pro-latest date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 21.3 total_cost: 0.0000 - dirname: 2024-02-02-02-07-28--exercism-gpt-3.5-turbo-0125-whole test_cases: 133 model: gpt-3.5-turbo-0125 edit_format: whole commit_hash: da14474 pass_rate_1: 39.8 pass_rate_2: 49.6 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 4 command: aider -3 date: 2024-02-02 versions: 0.22.1-dev seconds_per_case: 3.2 total_cost: 0.4701 - dirname: 2023-11-06-21-23-59--gpt-3.5-turbo-0301 test_cases: 133 model: gpt-3.5-turbo-0301 edit_format: whole commit_hash: 44388db-dirty pass_rate_1: 50.4 pass_rate_2: 57.9 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 8 command: aider --model gpt-3.5-turbo-0301 date: 2023-11-06 versions: 0.16.4-dev seconds_per_case: 6.5 total_cost: 0.4822 - dirname: 2023-11-07-02-41-07--gpt-3.5-turbo-0613 test_cases: 133 model: gpt-3.5-turbo-0613 edit_format: whole commit_hash: 93aa497-dirty pass_rate_1: 38.3 pass_rate_2: 50.4 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 5 command: aider --model gpt-3.5-turbo-0613 date: 2023-11-07 versions: 0.16.4-dev seconds_per_case: 18.0 total_cost: 0.5366 - dirname: 2024-04-30-21-40-51--litellm-gpt-3.5-turbo-1106-again test_cases: 132 model: gpt-3.5-turbo-1106 edit_format: whole commit_hash: 7b14d77 pass_rate_1: 45.5 pass_rate_2: 56.1 percent_cases_well_formed: 100.0 error_outputs: 1 num_malformed_responses: 0 user_asks: 1 lazy_comments: 0 syntax_errors: 19 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 0 command: aider --model gpt-3.5-turbo-1106 date: 2024-04-30 versions: 0.30.2-dev seconds_per_case: 5.3 total_cost: 0.3261 - dirname: 2024-01-25-23-37-15--jan-exercism-gpt-4-0125-preview-udiff test_cases: 133 model: gpt-4-0125-preview edit_format: udiff commit_hash: edcf9b1 pass_rate_1: 55.6 pass_rate_2: 66.2 percent_cases_well_formed: 97.7 error_outputs: 6 num_malformed_responses: 3 user_asks: 0 lazy_comments: 0 syntax_errors: 3 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 4 command: aider --model gpt-4-0125-preview date: 2024-01-25 versions: 0.22.1-dev seconds_per_case: 44.8 total_cost: 14.6428 - dirname: 2024-05-04-15-07-30--redo-gpt-4-0314-diff-reminder-rules test_cases: 133 model: gpt-4-0314 edit_format: diff commit_hash: 0d43468 pass_rate_1: 50.4 pass_rate_2: 66.2 percent_cases_well_formed: 93.2 error_outputs: 28 num_malformed_responses: 9 user_asks: 1 lazy_comments: 3 syntax_errors: 9 indentation_errors: 7 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model gpt-4-0314 date: 2024-05-04 versions: 0.31.2-dev seconds_per_case: 19.8 total_cost: 16.2689 - dirname: 2023-12-16-21-24-28--editblock-gpt-4-0613-actual-main test_cases: 133 model: gpt-4-0613 edit_format: diff commit_hash: 3aa17c4 pass_rate_1: 46.6 pass_rate_2: 67.7 percent_cases_well_formed: 100.0 error_outputs: 14 num_malformed_responses: 0 user_asks: 0 lazy_comments: 0 syntax_errors: 0 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 2 command: aider -4 date: 2023-12-16 versions: 0.18.2-dev seconds_per_case: 33.6 total_cost: 17.4657 - dirname: 2024-05-04-14-33-15--redo-gpt-4-1106-preview-udiff5 test_cases: 133 model: gpt-4-1106-preview edit_format: udiff commit_hash: 1981105-dirty pass_rate_1: 57.1 pass_rate_2: 63.2 percent_cases_well_formed: 94.0 error_outputs: 24 num_malformed_responses: 8 user_asks: 0 lazy_comments: 7 syntax_errors: 3 indentation_errors: 5 exhausted_context_windows: 0 test_timeouts: 2 command: aider date: 2024-05-04 versions: 0.31.2-dev seconds_per_case: 15.6 total_cost: 5.9468 - dirname: 2024-05-01-02-09-20--gpt-4-turbo-examples test_cases: 133 model: gpt-4-turbo-2024-04-09 edit_format: udiff commit_hash: e610e5b-dirty pass_rate_1: 48.1 pass_rate_2: 63.9 percent_cases_well_formed: 97.0 error_outputs: 12 num_malformed_responses: 4 user_asks: 0 lazy_comments: 0 syntax_errors: 4 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --gpt-4-turbo date: 2024-05-01 versions: 0.30.2-dev seconds_per_case: 22.8 total_cost: 6.3337 - dirname: 2024-05-03-22-24-48--openrouter--llama3-diff-examples-sys-msg test_cases: 132 model: groq/llama3-70b-8192 edit_format: diff commit_hash: b5bb453 pass_rate_1: 38.6 pass_rate_2: 49.2 percent_cases_well_formed: 73.5 error_outputs: 105 num_malformed_responses: 35 user_asks: 0 lazy_comments: 0 syntax_errors: 1 indentation_errors: 2 exhausted_context_windows: 0 test_timeouts: 3 command: aider --model groq/llama3-70b-8192 date: 2024-05-03 versions: 0.31.2-dev seconds_per_case: 14.5 total_cost: 0.4311 - dirname: 2024-05-06-18-31-08--command-r-plus-whole-final test_cases: 133 model: command-r-plus edit_format: whole commit_hash: fc3a43e-dirty pass_rate_1: 21.8 pass_rate_2: 31.6 percent_cases_well_formed: 100.0 error_outputs: 0 num_malformed_responses: 0 user_asks: 0 lazy_comments: 1 syntax_errors: 5 indentation_errors: 0 exhausted_context_windows: 0 test_timeouts: 7 command: aider --model command-r-plus date: 2024-05-06 versions: 0.31.2-dev seconds_per_case: 22.9 total_cost: 2.7494