refactor: Maintain consistent timestamps during section skipping

This commit is contained in:
Paul Gauthier (aider) 2025-03-11 17:04:47 -07:00
parent d7b4079ab5
commit efcda12dda

View file

@ -12,11 +12,15 @@ def process_file(input_path, output_path):
If a text field contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent If a text field contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent
records until finding a text with "\u001b[ROW;(COL-1)H". records until finding a text with "\u001b[ROW;(COL-1)H".
Maintains consistent timestamps by not advancing time during skip sections.
""" """
skip_mode = False skip_mode = False
target_pattern = None target_pattern = None
ansi_pattern = re.compile(r'\u001b\[(\d+);(\d+)H') ansi_pattern = re.compile(r'\u001b\[(\d+);(\d+)H')
is_first_line = True is_first_line = True
last_timestamp = 0.0
time_offset = 0.0 # Accumulator for time to subtract
with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile: with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile:
for line in infile: for line in infile:
@ -34,6 +38,7 @@ def process_file(input_path, output_path):
outfile.write(line) outfile.write(line)
continue continue
current_timestamp = float(record[0])
text = record[2] # The text content text = record[2] # The text content
# If we're not in skip mode, check if we need to enter it # If we're not in skip mode, check if we need to enter it
@ -46,16 +51,29 @@ def process_file(input_path, output_path):
# Create pattern for the ending sequence # Create pattern for the ending sequence
target_pattern = f'\u001b[{row};{col-1}H' target_pattern = f'\u001b[{row};{col-1}H'
skip_mode = True skip_mode = True
# Start tracking time to subtract
skip_start_time = current_timestamp
continue # Skip this record continue # Skip this record
# If we're not skipping, write the record # If we're not skipping, write the record with adjusted timestamp
outfile.write(line) adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp)
last_timestamp = adjusted_timestamp
record[0] = adjusted_timestamp
outfile.write(json.dumps(record) + '\n')
# If we're in skip mode, check if we should exit it # If we're in skip mode, check if we should exit it
else: else:
if target_pattern in text: if target_pattern in text:
skip_mode = False skip_mode = False
outfile.write(line) # Include the matching record # Calculate how much time to subtract from future timestamps
time_offset += (current_timestamp - skip_start_time)
# Write this record with adjusted timestamp
adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp)
last_timestamp = adjusted_timestamp
record[0] = adjusted_timestamp
outfile.write(json.dumps(record) + '\n')
# Otherwise we're still in skip mode, don't write anything
except json.JSONDecodeError: except json.JSONDecodeError:
# If we can't parse the line as JSON, include it anyway # If we can't parse the line as JSON, include it anyway