From efcda12ddabc3bd32f4a8bd8c67efbdcba0aea5d Mon Sep 17 00:00:00 2001 From: "Paul Gauthier (aider)" Date: Tue, 11 Mar 2025 17:04:47 -0700 Subject: [PATCH] refactor: Maintain consistent timestamps during section skipping --- redact.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/redact.py b/redact.py index ce230664e..e10a39600 100755 --- a/redact.py +++ b/redact.py @@ -12,11 +12,15 @@ def process_file(input_path, output_path): If a text field contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent records until finding a text with "\u001b[ROW;(COL-1)H". + + Maintains consistent timestamps by not advancing time during skip sections. """ skip_mode = False target_pattern = None ansi_pattern = re.compile(r'\u001b\[(\d+);(\d+)H') is_first_line = True + last_timestamp = 0.0 + time_offset = 0.0 # Accumulator for time to subtract with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile: for line in infile: @@ -34,6 +38,7 @@ def process_file(input_path, output_path): outfile.write(line) continue + current_timestamp = float(record[0]) text = record[2] # The text content # If we're not in skip mode, check if we need to enter it @@ -46,16 +51,29 @@ def process_file(input_path, output_path): # Create pattern for the ending sequence target_pattern = f'\u001b[{row};{col-1}H' skip_mode = True + # Start tracking time to subtract + skip_start_time = current_timestamp continue # Skip this record - # If we're not skipping, write the record - outfile.write(line) + # If we're not skipping, write the record with adjusted timestamp + adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp) + last_timestamp = adjusted_timestamp + record[0] = adjusted_timestamp + outfile.write(json.dumps(record) + '\n') # If we're in skip mode, check if we should exit it else: if target_pattern in text: skip_mode = False - outfile.write(line) # Include the matching record + # Calculate how much time to subtract from future timestamps + time_offset += (current_timestamp - skip_start_time) + + # Write this record with adjusted timestamp + adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp) + last_timestamp = adjusted_timestamp + record[0] = adjusted_timestamp + outfile.write(json.dumps(record) + '\n') + # Otherwise we're still in skip mode, don't write anything except json.JSONDecodeError: # If we can't parse the line as JSON, include it anyway