Refactor replace_most_similar_chunk function and update test cases.

This commit is contained in:
Paul Gauthier 2023-05-09 07:39:37 -07:00
parent 17589d5241
commit 8a8bc3e244
2 changed files with 8 additions and 7 deletions

View file

@ -4,8 +4,8 @@ from utils import replace_most_similar_chunk
class TestUtils(unittest.TestCase):
def test_replace_most_similar_chunk(self):
whole = "This is a sample text.\nAnother line of text.\nYet another line."
part = "sample text"
replace = "replaced text"
part = "This is a sample text"
replace = "This is a replaced text."
expected_output = "This is a replaced text.\nAnother line of text.\nYet another line."
result = replace_most_similar_chunk(whole, part, replace)
@ -13,8 +13,8 @@ class TestUtils(unittest.TestCase):
def test_replace_most_similar_chunk_not_perfect_match(self):
whole = "This is a sample text.\nAnother line of text.\nYet another line."
part = "sample text.\nAnother line"
replace = "replaced text.\nModified line"
part = "This was a sample text.\nAnother line of txt"
replace = "This is a replaced text.\nModified line of text."
expected_output = "This is a replaced text.\nModified line of text.\nYet another line."
result = replace_most_similar_chunk(whole, part, replace)

View file

@ -5,9 +5,10 @@ from pathlib import Path
#from dump import dump
def replace_most_similar_chunk(whole, part, replace):
similarity_thresh = 0.8
max_similarity = 0
most_similar_chunk_start = -1
most_similar_chunk_end = -1