refactor: Maintain consistent timestamps during section skipping

2025-05-30 17:24:59 +00:00 · 2025-03-11 17:04:47 -07:00 · 2025-03-11 17:04:47 -07:00 · efcda12dda
commit efcda12dda
parent d7b4079ab5
1 changed files with 21 additions and 3 deletions
--- a/redact.py
+++ b/redact.py
@ -12,11 +12,15 @@ def process_file(input_path, output_path):
    If a text field contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent
    records until finding a text with "\u001b[ROW;(COL-1)H".
    Maintains consistent timestamps by not advancing time during skip sections.
    """
    skip_mode = False
    target_pattern = None
    ansi_pattern = re.compile(r'\u001b\[(\d+);(\d+)H')
    is_first_line = True
    last_timestamp = 0.0
    time_offset = 0.0  # Accumulator for time to subtract
    with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile:
        for line in infile:
@ -34,6 +38,7 @@ def process_file(input_path, output_path):
                    outfile.write(line)
                    continue
                current_timestamp = float(record[0])
                text = record[2]  # The text content
                # If we're not in skip mode, check if we need to enter it
@ -46,16 +51,29 @@ def process_file(input_path, output_path):
                            # Create pattern for the ending sequence
                            target_pattern = f'\u001b[{row};{col-1}H'
                            skip_mode = True
                            # Start tracking time to subtract
                            skip_start_time = current_timestamp
                            continue  # Skip this record
-                    # If we're not skipping, write the record
+                    # If we're not skipping, write the record with adjusted timestamp
-                    outfile.write(line)
+                    adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp)
                    last_timestamp = adjusted_timestamp
                    record[0] = adjusted_timestamp
                    outfile.write(json.dumps(record) + '\n')
                # If we're in skip mode, check if we should exit it
                else:
                    if target_pattern in text:
                        skip_mode = False
-                        outfile.write(line)  # Include the matching record
+                        # Calculate how much time to subtract from future timestamps
                        time_offset += (current_timestamp - skip_start_time)
                        # Write this record with adjusted timestamp
                        adjusted_timestamp = max(current_timestamp - time_offset, last_timestamp)
                        last_timestamp = adjusted_timestamp
                        record[0] = adjusted_timestamp
                        outfile.write(json.dumps(record) + '\n')
                    # Otherwise we're still in skip mode, don't write anything
            except json.JSONDecodeError:
                # If we can't parse the line as JSON, include it anyway