diff --git a/test_utils.py b/test_utils.py index 2a6167d4c..db46d69b1 100644 --- a/test_utils.py +++ b/test_utils.py @@ -4,8 +4,8 @@ from utils import replace_most_similar_chunk class TestUtils(unittest.TestCase): def test_replace_most_similar_chunk(self): whole = "This is a sample text.\nAnother line of text.\nYet another line." - part = "sample text" - replace = "replaced text" + part = "This is a sample text" + replace = "This is a replaced text." expected_output = "This is a replaced text.\nAnother line of text.\nYet another line." result = replace_most_similar_chunk(whole, part, replace) @@ -13,12 +13,12 @@ class TestUtils(unittest.TestCase): def test_replace_most_similar_chunk_not_perfect_match(self): whole = "This is a sample text.\nAnother line of text.\nYet another line." - part = "sample text.\nAnother line" - replace = "replaced text.\nModified line" + part = "This was a sample text.\nAnother line of txt" + replace = "This is a replaced text.\nModified line of text." expected_output = "This is a replaced text.\nModified line of text.\nYet another line." result = replace_most_similar_chunk(whole, part, replace) self.assertEqual(result, expected_output) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/utils.py b/utils.py index d85a578c8..b78a4a03b 100644 --- a/utils.py +++ b/utils.py @@ -3,11 +3,12 @@ import math from difflib import SequenceMatcher from pathlib import Path -# from dump import dump - +#from dump import dump def replace_most_similar_chunk(whole, part, replace): + similarity_thresh = 0.8 + max_similarity = 0 most_similar_chunk_start = -1 most_similar_chunk_end = -1