Refactor replace_most_similar_chunk function and update test cases.

This commit is contained in:
Paul Gauthier 2023-05-09 07:39:37 -07:00
parent 17589d5241
commit 8a8bc3e244
2 changed files with 8 additions and 7 deletions

View file

@ -4,8 +4,8 @@ from utils import replace_most_similar_chunk
class TestUtils(unittest.TestCase): class TestUtils(unittest.TestCase):
def test_replace_most_similar_chunk(self): def test_replace_most_similar_chunk(self):
whole = "This is a sample text.\nAnother line of text.\nYet another line." whole = "This is a sample text.\nAnother line of text.\nYet another line."
part = "sample text" part = "This is a sample text"
replace = "replaced text" replace = "This is a replaced text."
expected_output = "This is a replaced text.\nAnother line of text.\nYet another line." expected_output = "This is a replaced text.\nAnother line of text.\nYet another line."
result = replace_most_similar_chunk(whole, part, replace) result = replace_most_similar_chunk(whole, part, replace)
@ -13,12 +13,12 @@ class TestUtils(unittest.TestCase):
def test_replace_most_similar_chunk_not_perfect_match(self): def test_replace_most_similar_chunk_not_perfect_match(self):
whole = "This is a sample text.\nAnother line of text.\nYet another line." whole = "This is a sample text.\nAnother line of text.\nYet another line."
part = "sample text.\nAnother line" part = "This was a sample text.\nAnother line of txt"
replace = "replaced text.\nModified line" replace = "This is a replaced text.\nModified line of text."
expected_output = "This is a replaced text.\nModified line of text.\nYet another line." expected_output = "This is a replaced text.\nModified line of text.\nYet another line."
result = replace_most_similar_chunk(whole, part, replace) result = replace_most_similar_chunk(whole, part, replace)
self.assertEqual(result, expected_output) self.assertEqual(result, expected_output)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View file

@ -3,11 +3,12 @@ import math
from difflib import SequenceMatcher from difflib import SequenceMatcher
from pathlib import Path from pathlib import Path
# from dump import dump #from dump import dump
def replace_most_similar_chunk(whole, part, replace): def replace_most_similar_chunk(whole, part, replace):
similarity_thresh = 0.8 similarity_thresh = 0.8
max_similarity = 0 max_similarity = 0
most_similar_chunk_start = -1 most_similar_chunk_start = -1
most_similar_chunk_end = -1 most_similar_chunk_end = -1