Skip to content

Commit 3cd9c86

Browse files
committed
SA1026: reimplement JSON checking using encoding/json rules
The previous version of SA1026 was very naive. - It didn't take addressability into consideration, leading to false positives. - It didn't cull fields with name conflicts, leading to false positives. - It didn't traverse into nested structures, leading to false negatives. The new check is based on a modified copy of encoding/json that applies all relevant rules. Updates gh-1088 (cherry picked from commit 467468a)
1 parent e762e06 commit 3cd9c86

File tree

4 files changed

+660
-11
lines changed

4 files changed

+660
-11
lines changed

staticcheck/fakejson/encode.go

Lines changed: 373 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,373 @@
1+
// Copyright 2010 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// This file contains a modified copy of the encoding/json encoder.
6+
// All dynamic behavior has been removed, and reflecttion has been replaced with go/types.
7+
// This allows us to statically find unmarshable types
8+
// with the same rules for tags, shadowing and addressability as encoding/json.
9+
// This is used for SA1026.
10+
11+
package fakejson
12+
13+
import (
14+
"go/token"
15+
"go/types"
16+
"sort"
17+
"strings"
18+
"unicode"
19+
20+
"honnef.co/go/tools/staticcheck/fakereflect"
21+
)
22+
23+
// parseTag splits a struct field's json tag into its name and
24+
// comma-separated options.
25+
func parseTag(tag string) string {
26+
if idx := strings.Index(tag, ","); idx != -1 {
27+
return tag[:idx]
28+
}
29+
return tag
30+
}
31+
32+
func Marshal(v types.Type) *UnsupportedTypeError {
33+
enc := encoder{
34+
seen: map[fakereflect.TypeAndCanAddr]struct{}{},
35+
}
36+
return enc.newTypeEncoder(fakereflect.TypeAndCanAddr{Type: v}, "x")
37+
}
38+
39+
// An UnsupportedTypeError is returned by Marshal when attempting
40+
// to encode an unsupported value type.
41+
type UnsupportedTypeError struct {
42+
Type types.Type
43+
Path string
44+
}
45+
46+
var marshalerType = types.NewInterfaceType([]*types.Func{
47+
types.NewFunc(token.NoPos, nil, "MarshalJSON", types.NewSignature(nil,
48+
types.NewTuple(),
49+
types.NewTuple(
50+
types.NewVar(token.NoPos, nil, "", types.NewSlice(types.Typ[types.Byte])),
51+
types.NewVar(0, nil, "", types.Universe.Lookup("error").Type())),
52+
false,
53+
)),
54+
}, nil).Complete()
55+
56+
var textMarshalerType = types.NewInterfaceType([]*types.Func{
57+
types.NewFunc(token.NoPos, nil, "MarshalText", types.NewSignature(nil,
58+
types.NewTuple(),
59+
types.NewTuple(
60+
types.NewVar(token.NoPos, nil, "", types.NewSlice(types.Typ[types.Byte])),
61+
types.NewVar(0, nil, "", types.Universe.Lookup("error").Type())),
62+
false,
63+
)),
64+
}, nil).Complete()
65+
66+
type encoder struct {
67+
seen map[fakereflect.TypeAndCanAddr]struct{}
68+
}
69+
70+
func (enc *encoder) newTypeEncoder(t fakereflect.TypeAndCanAddr, stack string) *UnsupportedTypeError {
71+
if _, ok := enc.seen[t]; ok {
72+
return nil
73+
}
74+
enc.seen[t] = struct{}{}
75+
76+
if t.Implements(marshalerType) {
77+
return nil
78+
}
79+
if !t.IsPtr() && t.CanAddr() && fakereflect.PtrTo(t).Implements(marshalerType) {
80+
return nil
81+
}
82+
if t.Implements(textMarshalerType) {
83+
return nil
84+
}
85+
if !t.IsPtr() && t.CanAddr() && fakereflect.PtrTo(t).Implements(textMarshalerType) {
86+
return nil
87+
}
88+
89+
switch t.Type.Underlying().(type) {
90+
case *types.Basic, *types.Interface:
91+
return nil
92+
case *types.Struct:
93+
return enc.typeFields(t, stack)
94+
case *types.Map:
95+
return enc.newMapEncoder(t, stack)
96+
case *types.Slice:
97+
return enc.newSliceEncoder(t, stack)
98+
case *types.Array:
99+
return enc.newArrayEncoder(t, stack)
100+
case *types.Pointer:
101+
// we don't have to express the pointer dereference in the path; x.f is syntactic sugar for (*x).f
102+
return enc.newTypeEncoder(t.Elem(), stack)
103+
default:
104+
return &UnsupportedTypeError{t.Type, stack}
105+
}
106+
}
107+
108+
func (enc *encoder) newMapEncoder(t fakereflect.TypeAndCanAddr, stack string) *UnsupportedTypeError {
109+
switch t.Key().Type.Underlying().(type) {
110+
case *types.Basic:
111+
default:
112+
if !t.Key().Implements(textMarshalerType) {
113+
return &UnsupportedTypeError{
114+
Type: t.Type,
115+
Path: stack,
116+
}
117+
}
118+
}
119+
return enc.newTypeEncoder(t.Elem(), stack+"[k]")
120+
}
121+
122+
func (enc *encoder) newSliceEncoder(t fakereflect.TypeAndCanAddr, stack string) *UnsupportedTypeError {
123+
// Byte slices get special treatment; arrays don't.
124+
basic, ok := t.Elem().Type.Underlying().(*types.Basic)
125+
if ok && basic.Kind() == types.Uint8 {
126+
p := fakereflect.PtrTo(t.Elem())
127+
if !p.Implements(marshalerType) && !p.Implements(textMarshalerType) {
128+
return nil
129+
}
130+
}
131+
return enc.newArrayEncoder(t, stack)
132+
}
133+
134+
func (enc *encoder) newArrayEncoder(t fakereflect.TypeAndCanAddr, stack string) *UnsupportedTypeError {
135+
return enc.newTypeEncoder(t.Elem(), stack+"[0]")
136+
}
137+
138+
func isValidTag(s string) bool {
139+
if s == "" {
140+
return false
141+
}
142+
for _, c := range s {
143+
switch {
144+
case strings.ContainsRune("!#$%&()*+-./:;<=>?@[]^_{|}~ ", c):
145+
// Backslash and quote chars are reserved, but
146+
// otherwise any punctuation chars are allowed
147+
// in a tag name.
148+
case !unicode.IsLetter(c) && !unicode.IsDigit(c):
149+
return false
150+
}
151+
}
152+
return true
153+
}
154+
155+
func typeByIndex(t fakereflect.TypeAndCanAddr, index []int) fakereflect.TypeAndCanAddr {
156+
for _, i := range index {
157+
if t.IsPtr() {
158+
t = t.Elem()
159+
}
160+
t = t.Field(i).Type
161+
}
162+
return t
163+
}
164+
165+
func pathByIndex(t fakereflect.TypeAndCanAddr, index []int) string {
166+
path := ""
167+
for _, i := range index {
168+
if t.IsPtr() {
169+
t = t.Elem()
170+
}
171+
path += "." + t.Field(i).Name
172+
t = t.Field(i).Type
173+
}
174+
return path
175+
}
176+
177+
// A field represents a single field found in a struct.
178+
type field struct {
179+
name string
180+
181+
tag bool
182+
index []int
183+
typ fakereflect.TypeAndCanAddr
184+
}
185+
186+
// byIndex sorts field by index sequence.
187+
type byIndex []field
188+
189+
func (x byIndex) Len() int { return len(x) }
190+
191+
func (x byIndex) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
192+
193+
func (x byIndex) Less(i, j int) bool {
194+
for k, xik := range x[i].index {
195+
if k >= len(x[j].index) {
196+
return false
197+
}
198+
if xik != x[j].index[k] {
199+
return xik < x[j].index[k]
200+
}
201+
}
202+
return len(x[i].index) < len(x[j].index)
203+
}
204+
205+
// typeFields returns a list of fields that JSON should recognize for the given type.
206+
// The algorithm is breadth-first search over the set of structs to include - the top struct
207+
// and then any reachable anonymous structs.
208+
func (enc *encoder) typeFields(t fakereflect.TypeAndCanAddr, stack string) *UnsupportedTypeError {
209+
// Anonymous fields to explore at the current level and the next.
210+
current := []field{}
211+
next := []field{{typ: t}}
212+
213+
// Count of queued names for current level and the next.
214+
var count, nextCount map[fakereflect.TypeAndCanAddr]int
215+
216+
// Types already visited at an earlier level.
217+
visited := map[fakereflect.TypeAndCanAddr]bool{}
218+
219+
// Fields found.
220+
var fields []field
221+
222+
for len(next) > 0 {
223+
current, next = next, current[:0]
224+
count, nextCount = nextCount, map[fakereflect.TypeAndCanAddr]int{}
225+
226+
for _, f := range current {
227+
if visited[f.typ] {
228+
continue
229+
}
230+
visited[f.typ] = true
231+
232+
// Scan f.typ for fields to include.
233+
for i := 0; i < f.typ.NumField(); i++ {
234+
sf := f.typ.Field(i)
235+
if sf.Anonymous {
236+
t := sf.Type
237+
if t.IsPtr() {
238+
t = t.Elem()
239+
}
240+
if !sf.IsExported() && !t.IsStruct() {
241+
// Ignore embedded fields of unexported non-struct types.
242+
continue
243+
}
244+
// Do not ignore embedded fields of unexported struct types
245+
// since they may have exported fields.
246+
} else if !sf.IsExported() {
247+
// Ignore unexported non-embedded fields.
248+
continue
249+
}
250+
tag := sf.Tag.Get("json")
251+
if tag == "-" {
252+
continue
253+
}
254+
name := parseTag(tag)
255+
if !isValidTag(name) {
256+
name = ""
257+
}
258+
index := make([]int, len(f.index)+1)
259+
copy(index, f.index)
260+
index[len(f.index)] = i
261+
262+
ft := sf.Type
263+
if ft.Name() == "" && ft.IsPtr() {
264+
// Follow pointer.
265+
ft = ft.Elem()
266+
}
267+
268+
// Record found field and index sequence.
269+
if name != "" || !sf.Anonymous || !ft.IsStruct() {
270+
tagged := name != ""
271+
if name == "" {
272+
name = sf.Name
273+
}
274+
field := field{
275+
name: name,
276+
tag: tagged,
277+
index: index,
278+
typ: ft,
279+
}
280+
281+
fields = append(fields, field)
282+
if count[f.typ] > 1 {
283+
// If there were multiple instances, add a second,
284+
// so that the annihilation code will see a duplicate.
285+
// It only cares about the distinction between 1 or 2,
286+
// so don't bother generating any more copies.
287+
fields = append(fields, fields[len(fields)-1])
288+
}
289+
continue
290+
}
291+
292+
// Record new anonymous struct to explore in next round.
293+
nextCount[ft]++
294+
if nextCount[ft] == 1 {
295+
next = append(next, field{name: ft.Name(), index: index, typ: ft})
296+
}
297+
}
298+
}
299+
}
300+
301+
sort.Slice(fields, func(i, j int) bool {
302+
x := fields
303+
// sort field by name, breaking ties with depth, then
304+
// breaking ties with "name came from json tag", then
305+
// breaking ties with index sequence.
306+
if x[i].name != x[j].name {
307+
return x[i].name < x[j].name
308+
}
309+
if len(x[i].index) != len(x[j].index) {
310+
return len(x[i].index) < len(x[j].index)
311+
}
312+
if x[i].tag != x[j].tag {
313+
return x[i].tag
314+
}
315+
return byIndex(x).Less(i, j)
316+
})
317+
318+
// Delete all fields that are hidden by the Go rules for embedded fields,
319+
// except that fields with JSON tags are promoted.
320+
321+
// The fields are sorted in primary order of name, secondary order
322+
// of field index length. Loop over names; for each name, delete
323+
// hidden fields by choosing the one dominant field that survives.
324+
out := fields[:0]
325+
for advance, i := 0, 0; i < len(fields); i += advance {
326+
// One iteration per name.
327+
// Find the sequence of fields with the name of this first field.
328+
fi := fields[i]
329+
name := fi.name
330+
for advance = 1; i+advance < len(fields); advance++ {
331+
fj := fields[i+advance]
332+
if fj.name != name {
333+
break
334+
}
335+
}
336+
if advance == 1 { // Only one field with this name
337+
out = append(out, fi)
338+
continue
339+
}
340+
dominant, ok := dominantField(fields[i : i+advance])
341+
if ok {
342+
out = append(out, dominant)
343+
}
344+
}
345+
346+
fields = out
347+
sort.Sort(byIndex(fields))
348+
349+
for i := range fields {
350+
f := &fields[i]
351+
err := enc.newTypeEncoder(typeByIndex(t, f.index), stack+pathByIndex(t, f.index))
352+
if err != nil {
353+
return err
354+
}
355+
}
356+
return nil
357+
}
358+
359+
// dominantField looks through the fields, all of which are known to
360+
// have the same name, to find the single field that dominates the
361+
// others using Go's embedding rules, modified by the presence of
362+
// JSON tags. If there are multiple top-level fields, the boolean
363+
// will be false: This condition is an error in Go and we skip all
364+
// the fields.
365+
func dominantField(fields []field) (field, bool) {
366+
// The fields are sorted in increasing index-length order, then by presence of tag.
367+
// That means that the first field is the dominant one. We need only check
368+
// for error cases: two fields at top level, either both tagged or neither tagged.
369+
if len(fields) > 1 && len(fields[0].index) == len(fields[1].index) && fields[0].tag == fields[1].tag {
370+
return field{}, false
371+
}
372+
return fields[0], true
373+
}

0 commit comments

Comments
 (0)