fix: revert regressions

gkampitakis · gkampitakis · commit 2047c804912f · 2022-09-04T10:35:56.000+01:00
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
@@ -34,8 +34,6 @@ const (
 	DiffInsert Operation = 1
 	// DiffEqual item represents an equal diff.
 	DiffEqual Operation = 0
-	// IndexSeparator is used to seperate the array indexes in an index string
-	IndexSeparator = ","
 )
 
 // Diff represents one diff operation
@@ -205,7 +203,7 @@ func (dmp *DiffMatchPatch) diffCompute(
 // then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
 func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
 	// Scan the text on a line-by-line basis first.
-	text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
+	text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
 
 	diffs := dmp.diffMainRunes(text1, text2, false, deadline)
 
@@ -406,28 +404,73 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
 // a string of hashes where each Unicode character represents one line.
 // It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
 func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return chars1, chars2, lineArray
+	chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
+	return string(chars1), string(chars2), lineArray
 }
 
-// DiffLinesToRunes splits two texts into a list of runes.
+// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
 func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return []rune(chars1), []rune(chars2), lineArray
+	// '\x00' is a valid character, but various debuggers don't like it.
+	// So we'll insert a junk entry to avoid generating a null character.
+	lineArray := []string{""}    // e.g. lineArray[4] == 'Hello\n'
+	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
+
+	chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
+	chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
+
+	return chars1, chars2, lineArray
+}
+
+func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
+	return dmp.DiffLinesToRunes(string(text1), string(text2))
+}
+
+// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune
+// where each Unicode character represents one line.
+// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
+func (dmp *DiffMatchPatch) diffLinesToRunesMunge(
+	text string,
+	lineArray *[]string,
+	lineHash map[string]int,
+) []rune {
+	// Walk the text, pulling out a substring for each line. text.split('\n')
+	//  would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
+	lineStart := 0
+	lineEnd := -1
+	runes := []rune{}
+
+	for lineEnd < len(text)-1 {
+		lineEnd = indexOf(text, "\n", lineStart)
+
+		if lineEnd == -1 {
+			lineEnd = len(text) - 1
+		}
+
+		line := text[lineStart : lineEnd+1]
+		lineStart = lineEnd + 1
+		lineValue, ok := lineHash[line]
+
+		if ok {
+			runes = append(runes, rune(lineValue))
+		} else {
+			*lineArray = append(*lineArray, line)
+			lineHash[line] = len(*lineArray) - 1
+			runes = append(runes, rune(len(*lineArray)-1))
+		}
+	}
+
+	return runes
 }
 
 // DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
-		chars := strings.Split(aDiff.Text, IndexSeparator)
+		chars := aDiff.Text
 		text := make([]string, len(chars))
 
 		for i, r := range chars {
-			i1, err := strconv.Atoi(r)
-			if err == nil {
-				text[i] = lineArray[i1]
-			}
+			text[i] = lineArray[r]
 		}
 
 		aDiff.Text = strings.Join(text, "")
@@ -1331,49 +1374,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err
 
 	return diffs, nil
 }
-
-// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
-func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
-	// '\x00' is a valid character, but various debuggers don't like it.
-	// So we'll insert a junk entry to avoid generating a null character.
-	lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
-
-	// Each string has the index of lineArray which it points to
-	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
-	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
-
-	return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
-}
-
-// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
-	// Walk the text, pulling out a substring for each line. text.split('\n')
-	// would would temporarily double our memory footprint.
-	// Modifying text would create many large strings to garbage collect.
-	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
-	lineStart := 0
-	lineEnd := -1
-	strs := []uint32{}
-
-	for lineEnd < len(text)-1 {
-		lineEnd = indexOf(text, "\n", lineStart)
-
-		if lineEnd == -1 {
-			lineEnd = len(text) - 1
-		}
-
-		line := text[lineStart : lineEnd+1]
-		lineStart = lineEnd + 1
-		lineValue, ok := lineHash[line]
-
-		if ok {
-			strs = append(strs, uint32(lineValue))
-		} else {
-			*lineArray = append(*lineArray, line)
-			lineHash[line] = len(*lineArray) - 1
-			strs = append(strs, uint32(len(*lineArray)-1))
-		}
-	}
-
-	return strs
-}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
@@ -10,8 +10,6 @@ package diffmatchpatch
 
 import (
 	"fmt"
-	"io"
-	"os"
 	"reflect"
 	"strconv"
 	"strings"
@@ -302,10 +300,10 @@ func TestDiffLinesToChars(t *testing.T) {
 	dmp := New()
 
 	for i, tc := range []TestCase{
-		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
-		{"a", "b", "1", "2", []string{"", "a", "b"}},
+		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
+		{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
 		// Omit final newline.
-		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -318,14 +316,14 @@ func TestDiffLinesToChars(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	var charList []string
+	var charList []rune
 	for x := 1; x < n+1; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	lines := strings.Join(lineList, "")
-	chars := strings.Join(charList[:], ",")
-	assertEqual(t, n, len(strings.Split(chars, ",")))
+	chars := string(charList)
+	assertEqual(t, n, utf8.RuneCountInString(chars))
 
 	actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
 	assertEqual(t, chars, actualChars1)
@@ -345,8 +343,8 @@ func TestDiffCharsToLines(t *testing.T) {
 	for i, tc := range []TestCase{
 		{
 			Diffs: []Diff{
-				{DiffEqual, "1,2,1"},
-				{DiffInsert, "2,1,2"},
+				{DiffEqual, "\u0001\u0002\u0001"},
+				{DiffInsert, "\u0002\u0001\u0002"},
 			},
 			Lines: []string{"", "alpha\n", "beta\n"},
 
@@ -365,15 +363,14 @@ func TestDiffCharsToLines(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	charList := []string{}
+	charList := []rune{}
 	for x := 1; x <= n; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	assertEqual(t, n, len(charList))
-	chars := strings.Join(charList[:], ",")
 
-	actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, chars}}, lineList)
+	actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, string(charList)}}, lineList)
 	assertEqual(t, []Diff{{DiffDelete, strings.Join(lineList, "")}}, actual)
 }
 
@@ -1507,19 +1504,6 @@ func TestDiffMainWithCheckLines(t *testing.T) {
 	}
 }
 
-func TestMassiveRuneDiffConversion(t *testing.T) {
-	sNew, err := os.ReadFile("../testdata/fixture.go")
-	if err != nil {
-		panic(err)
-	}
-
-	dmp := New()
-	t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
-	diffs := dmp.DiffMain(t1, t2, false)
-	diffs = dmp.DiffCharsToLines(diffs, tt)
-	assertEqual(t, true, len(diffs) > 0)
-}
-
 func BenchmarkDiffMain(bench *testing.B) {
 	var r []Diff
 
@@ -1579,22 +1563,3 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {
 
 	SinkSliceDiff = r
 }
-
-func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
-	var r []Diff
-
-	fp, _ := os.Open("../testdata/diff10klinestest.txt")
-	defer fp.Close()
-	data, _ := io.ReadAll(fp)
-	dmp := New()
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		text1, text2, linearray := dmp.DiffLinesToRunes(string(data), "")
-		r = dmp.DiffMainRunes(text1, text2, false)
-		r = dmp.DiffCharsToLines(r, linearray)
-	}
-
-	SinkSliceDiff = r
-}
diff --git a/diffmatchpatch/patch_test.go b/diffmatchpatch/patch_test.go
@@ -567,3 +567,28 @@ func TestPatchApply(t *testing.T) {
 		)
 	}
 }
+
+func TestPatchOutOfRangePanic(t *testing.T) {
+	text1 := `
+  1111111111111 000000
+  ------------- ------
+  xxxxxxxxxxxxx ------
+  xxxxxxxxxxxxx ------
+  xxxxxxxxxxxxx xxxxxx
+  xxxxxxxxxxxxx ......
+  xxxxxxxxxxxxx 111111
+  xxxxxxxxxxxxx ??????
+  xxxxxxxxxxxxx 333333
+  xxxxxxxxxxxxx 555555
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+`
+	text2 := `
+  2222222222222 000000
+  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+
+	patches := New().PatchMake(text1, text2)
+	assertEqual(t, 6, len(patches), "Issue https://github.com/sergi/go-diff/issues/127")
+}
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
@@ -9,7 +9,6 @@
 package diffmatchpatch
 
 import (
-	"strconv"
 	"strings"
 	"unicode/utf8"
 )
@@ -92,20 +91,3 @@ func runesIndex(r1, r2 []rune) int {
 	}
 	return -1
 }
-
-func intArrayToString(ns []uint32) string {
-	if len(ns) == 0 {
-		return ""
-	}
-
-	indexSeparator := IndexSeparator[0]
-
-	// Appr. 3 chars per num plus the comma.
-	b := []byte{}
-	for _, n := range ns {
-		b = strconv.AppendInt(b, int64(n), 10)
-		b = append(b, indexSeparator)
-	}
-	b = b[:len(b)-1]
-	return string(b)
-}
diff --git a/testdata/diff10klinestest.txt b/testdata/diff10klinestest.txt
diff --git a/testdata/fixture.go b/testdata/fixture.go