Skip to content

Commit 2047c80

Browse files
committed
fix: revert regressions
1 parent 46eaa6f commit 2047c80

File tree

6 files changed

+93
-164647
lines changed

6 files changed

+93
-164647
lines changed

diffmatchpatch/diff.go

Lines changed: 56 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ const (
3434
DiffInsert Operation = 1
3535
// DiffEqual item represents an equal diff.
3636
DiffEqual Operation = 0
37-
// IndexSeparator is used to seperate the array indexes in an index string
38-
IndexSeparator = ","
3937
)
4038

4139
// Diff represents one diff operation
@@ -205,7 +203,7 @@ func (dmp *DiffMatchPatch) diffCompute(
205203
// then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
206204
func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
207205
// Scan the text on a line-by-line basis first.
208-
text1, text2, linearray := dmp.DiffLinesToRunes(string(text1), string(text2))
206+
text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
209207

210208
diffs := dmp.diffMainRunes(text1, text2, false, deadline)
211209

@@ -406,28 +404,73 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
406404
// a string of hashes where each Unicode character represents one line.
407405
// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
408406
func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
409-
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
410-
return chars1, chars2, lineArray
407+
chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
408+
return string(chars1), string(chars2), lineArray
411409
}
412410

413-
// DiffLinesToRunes splits two texts into a list of runes.
411+
// DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
414412
func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
415-
chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
416-
return []rune(chars1), []rune(chars2), lineArray
413+
// '\x00' is a valid character, but various debuggers don't like it.
414+
// So we'll insert a junk entry to avoid generating a null character.
415+
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
416+
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
417+
418+
chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
419+
chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
420+
421+
return chars1, chars2, lineArray
422+
}
423+
424+
func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
425+
return dmp.DiffLinesToRunes(string(text1), string(text2))
426+
}
427+
428+
// diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune
429+
// where each Unicode character represents one line.
430+
// We use strings instead of []runes as input mainly because you can't use []rune as a map key.
431+
func (dmp *DiffMatchPatch) diffLinesToRunesMunge(
432+
text string,
433+
lineArray *[]string,
434+
lineHash map[string]int,
435+
) []rune {
436+
// Walk the text, pulling out a substring for each line. text.split('\n')
437+
// would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
438+
lineStart := 0
439+
lineEnd := -1
440+
runes := []rune{}
441+
442+
for lineEnd < len(text)-1 {
443+
lineEnd = indexOf(text, "\n", lineStart)
444+
445+
if lineEnd == -1 {
446+
lineEnd = len(text) - 1
447+
}
448+
449+
line := text[lineStart : lineEnd+1]
450+
lineStart = lineEnd + 1
451+
lineValue, ok := lineHash[line]
452+
453+
if ok {
454+
runes = append(runes, rune(lineValue))
455+
} else {
456+
*lineArray = append(*lineArray, line)
457+
lineHash[line] = len(*lineArray) - 1
458+
runes = append(runes, rune(len(*lineArray)-1))
459+
}
460+
}
461+
462+
return runes
417463
}
418464

419465
// DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
420466
func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
421467
hydrated := make([]Diff, 0, len(diffs))
422468
for _, aDiff := range diffs {
423-
chars := strings.Split(aDiff.Text, IndexSeparator)
469+
chars := aDiff.Text
424470
text := make([]string, len(chars))
425471

426472
for i, r := range chars {
427-
i1, err := strconv.Atoi(r)
428-
if err == nil {
429-
text[i] = lineArray[i1]
430-
}
473+
text[i] = lineArray[r]
431474
}
432475

433476
aDiff.Text = strings.Join(text, "")
@@ -1331,49 +1374,3 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1, delta string) (diffs []Diff, err
13311374

13321375
return diffs, nil
13331376
}
1334-
1335-
// diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
1336-
func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
1337-
// '\x00' is a valid character, but various debuggers don't like it.
1338-
// So we'll insert a junk entry to avoid generating a null character.
1339-
lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
1340-
1341-
// Each string has the index of lineArray which it points to
1342-
strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
1343-
strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
1344-
1345-
return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
1346-
}
1347-
1348-
// diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
1349-
func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
1350-
// Walk the text, pulling out a substring for each line. text.split('\n')
1351-
// would would temporarily double our memory footprint.
1352-
// Modifying text would create many large strings to garbage collect.
1353-
lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
1354-
lineStart := 0
1355-
lineEnd := -1
1356-
strs := []uint32{}
1357-
1358-
for lineEnd < len(text)-1 {
1359-
lineEnd = indexOf(text, "\n", lineStart)
1360-
1361-
if lineEnd == -1 {
1362-
lineEnd = len(text) - 1
1363-
}
1364-
1365-
line := text[lineStart : lineEnd+1]
1366-
lineStart = lineEnd + 1
1367-
lineValue, ok := lineHash[line]
1368-
1369-
if ok {
1370-
strs = append(strs, uint32(lineValue))
1371-
} else {
1372-
*lineArray = append(*lineArray, line)
1373-
lineHash[line] = len(*lineArray) - 1
1374-
strs = append(strs, uint32(len(*lineArray)-1))
1375-
}
1376-
}
1377-
1378-
return strs
1379-
}

diffmatchpatch/diff_test.go

Lines changed: 12 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,6 @@ package diffmatchpatch
1010

1111
import (
1212
"fmt"
13-
"io"
14-
"os"
1513
"reflect"
1614
"strconv"
1715
"strings"
@@ -302,10 +300,10 @@ func TestDiffLinesToChars(t *testing.T) {
302300
dmp := New()
303301

304302
for i, tc := range []TestCase{
305-
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
306-
{"a", "b", "1", "2", []string{"", "a", "b"}},
303+
{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\u0001\u0002\u0003\u0003", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
304+
{"a", "b", "\u0001", "\u0002", []string{"", "a", "b"}},
307305
// Omit final newline.
308-
{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
306+
{"alpha\nbeta\nalpha", "", "\u0001\u0002\u0003", "", []string{"", "alpha\n", "beta\n", "alpha"}},
309307
} {
310308
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
311309
assertEqual(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -318,14 +316,14 @@ func TestDiffLinesToChars(t *testing.T) {
318316
lineList := []string{
319317
"", // Account for the initial empty element of the lines array.
320318
}
321-
var charList []string
319+
var charList []rune
322320
for x := 1; x < n+1; x++ {
323321
lineList = append(lineList, strconv.Itoa(x)+"\n")
324-
charList = append(charList, strconv.Itoa(x))
322+
charList = append(charList, rune(x))
325323
}
326324
lines := strings.Join(lineList, "")
327-
chars := strings.Join(charList[:], ",")
328-
assertEqual(t, n, len(strings.Split(chars, ",")))
325+
chars := string(charList)
326+
assertEqual(t, n, utf8.RuneCountInString(chars))
329327

330328
actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
331329
assertEqual(t, chars, actualChars1)
@@ -345,8 +343,8 @@ func TestDiffCharsToLines(t *testing.T) {
345343
for i, tc := range []TestCase{
346344
{
347345
Diffs: []Diff{
348-
{DiffEqual, "1,2,1"},
349-
{DiffInsert, "2,1,2"},
346+
{DiffEqual, "\u0001\u0002\u0001"},
347+
{DiffInsert, "\u0002\u0001\u0002"},
350348
},
351349
Lines: []string{"", "alpha\n", "beta\n"},
352350

@@ -365,15 +363,14 @@ func TestDiffCharsToLines(t *testing.T) {
365363
lineList := []string{
366364
"", // Account for the initial empty element of the lines array.
367365
}
368-
charList := []string{}
366+
charList := []rune{}
369367
for x := 1; x <= n; x++ {
370368
lineList = append(lineList, strconv.Itoa(x)+"\n")
371-
charList = append(charList, strconv.Itoa(x))
369+
charList = append(charList, rune(x))
372370
}
373371
assertEqual(t, n, len(charList))
374-
chars := strings.Join(charList[:], ",")
375372

376-
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, chars}}, lineList)
373+
actual := dmp.DiffCharsToLines([]Diff{{DiffDelete, string(charList)}}, lineList)
377374
assertEqual(t, []Diff{{DiffDelete, strings.Join(lineList, "")}}, actual)
378375
}
379376

@@ -1507,19 +1504,6 @@ func TestDiffMainWithCheckLines(t *testing.T) {
15071504
}
15081505
}
15091506

1510-
func TestMassiveRuneDiffConversion(t *testing.T) {
1511-
sNew, err := os.ReadFile("../testdata/fixture.go")
1512-
if err != nil {
1513-
panic(err)
1514-
}
1515-
1516-
dmp := New()
1517-
t1, t2, tt := dmp.DiffLinesToChars("", string(sNew))
1518-
diffs := dmp.DiffMain(t1, t2, false)
1519-
diffs = dmp.DiffCharsToLines(diffs, tt)
1520-
assertEqual(t, true, len(diffs) > 0)
1521-
}
1522-
15231507
func BenchmarkDiffMain(bench *testing.B) {
15241508
var r []Diff
15251509

@@ -1579,22 +1563,3 @@ func BenchmarkDiffMainRunesLargeLines(b *testing.B) {
15791563

15801564
SinkSliceDiff = r
15811565
}
1582-
1583-
func BenchmarkDiffMainRunesLargeDiffLines(b *testing.B) {
1584-
var r []Diff
1585-
1586-
fp, _ := os.Open("../testdata/diff10klinestest.txt")
1587-
defer fp.Close()
1588-
data, _ := io.ReadAll(fp)
1589-
dmp := New()
1590-
1591-
b.ResetTimer()
1592-
1593-
for i := 0; i < b.N; i++ {
1594-
text1, text2, linearray := dmp.DiffLinesToRunes(string(data), "")
1595-
r = dmp.DiffMainRunes(text1, text2, false)
1596-
r = dmp.DiffCharsToLines(r, linearray)
1597-
}
1598-
1599-
SinkSliceDiff = r
1600-
}

diffmatchpatch/patch_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,3 +567,28 @@ func TestPatchApply(t *testing.T) {
567567
)
568568
}
569569
}
570+
571+
func TestPatchOutOfRangePanic(t *testing.T) {
572+
text1 := `
573+
1111111111111 000000
574+
------------- ------
575+
xxxxxxxxxxxxx ------
576+
xxxxxxxxxxxxx ------
577+
xxxxxxxxxxxxx xxxxxx
578+
xxxxxxxxxxxxx ......
579+
xxxxxxxxxxxxx 111111
580+
xxxxxxxxxxxxx ??????
581+
xxxxxxxxxxxxx 333333
582+
xxxxxxxxxxxxx 555555
583+
xxxxxxxxxx xxxxx
584+
xxxxxxxxxx xxxxx
585+
xxxxxxxxxx xxxxx
586+
xxxxxxxxxx xxxxx
587+
`
588+
text2 := `
589+
2222222222222 000000
590+
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
591+
592+
patches := New().PatchMake(text1, text2)
593+
assertEqual(t, 6, len(patches), "Issue https://github.com/sergi/go-diff/issues/127")
594+
}

diffmatchpatch/stringutil.go

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
package diffmatchpatch
1010

1111
import (
12-
"strconv"
1312
"strings"
1413
"unicode/utf8"
1514
)
@@ -92,20 +91,3 @@ func runesIndex(r1, r2 []rune) int {
9291
}
9392
return -1
9493
}
95-
96-
func intArrayToString(ns []uint32) string {
97-
if len(ns) == 0 {
98-
return ""
99-
}
100-
101-
indexSeparator := IndexSeparator[0]
102-
103-
// Appr. 3 chars per num plus the comma.
104-
b := []byte{}
105-
for _, n := range ns {
106-
b = strconv.AppendInt(b, int64(n), 10)
107-
b = append(b, indexSeparator)
108-
}
109-
b = b[:len(b)-1]
110-
return string(b)
111-
}

0 commit comments

Comments
 (0)