refactor: Update redact.py to handle asciinema cast v2 JSON format

This commit is contained in:
Paul Gauthier (aider) 2025-03-11 17:03:19 -07:00
parent 2eb1513612
commit d7b4079ab5

View file

@ -2,38 +2,64 @@
import re import re
import sys import sys
import os import os
import json
def process_file(input_path, output_path): def process_file(input_path, output_path):
""" """
Process a text file to filter out certain sections based on ANSI cursor commands. Process an asciinema cast v2 file to filter out certain sections based on ANSI cursor commands.
If a line contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent Format: First line is a JSON header. Subsequent lines are JSON arrays: [timestamp, "o", "text"]
lines until finding a line with "\u001b[ROW;(COL-1)H".
If a text field contains "\u001b[ROW;COL]H" followed by "Atuin", skip it and all subsequent
records until finding a text with "\u001b[ROW;(COL-1)H".
""" """
skip_mode = False skip_mode = False
target_pattern = None target_pattern = None
ansi_pattern = re.compile(r'\\u001b\[(\d+);(\d+)H') ansi_pattern = re.compile(r'\u001b\[(\d+);(\d+)H')
is_first_line = True
with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile: with open(input_path, 'r', encoding='utf-8') as infile, open(output_path, 'w', encoding='utf-8') as outfile:
for line in infile: for line in infile:
# If we're not in skip mode, check if we need to enter it # Always include the header (first line)
if not skip_mode: if is_first_line:
if '\\u001b[' in line and 'Atuin' in line: outfile.write(line)
match = ansi_pattern.search(line) is_first_line = False
if match: continue
row = match.group(1)
col = int(match.group(2)) # Parse the JSON record
# Create pattern for the line that will end the skip section try:
target_pattern = f'\\u001b[{row};{col-1}H' record = json.loads(line)
skip_mode = True if not isinstance(record, list) or len(record) != 3 or record[1] != "o":
continue # Skip this line # If not a valid record, just write it out
# If we're not skipping, write the line outfile.write(line)
continue
text = record[2] # The text content
# If we're not in skip mode, check if we need to enter it
if not skip_mode:
if '\u001b[' in text and 'Atuin' in text:
match = ansi_pattern.search(text)
if match:
row = match.group(1)
col = int(match.group(2))
# Create pattern for the ending sequence
target_pattern = f'\u001b[{row};{col-1}H'
skip_mode = True
continue # Skip this record
# If we're not skipping, write the record
outfile.write(line)
# If we're in skip mode, check if we should exit it
else:
if target_pattern in text:
skip_mode = False
outfile.write(line) # Include the matching record
except json.JSONDecodeError:
# If we can't parse the line as JSON, include it anyway
outfile.write(line) outfile.write(line)
# If we're in skip mode, check if we should exit it
else:
if target_pattern in line:
skip_mode = False
outfile.write(line) # Include the matching line
if __name__ == "__main__": if __name__ == "__main__":
if len(sys.argv) != 3: if len(sys.argv) != 3: