Skip to content

Commit 6e2c2aa

Browse files
committed
go.text/language: added function to parse the value of an HTTP Accept-Language
header. It supports a few non-standard language tags that appear relatively frequently in the Accept-Language headers. R=r CC=golang-dev, nigeltao https://golang.org/cl/13974043
1 parent 893a309 commit 6e2c2aa

File tree

4 files changed

+258
-65
lines changed

4 files changed

+258
-65
lines changed

language/maketables.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -673,7 +673,7 @@ func (b *builder) writeLanguage() {
673673
meta := b.supp.Metadata
674674

675675
b.writeConst("nonCanonicalUnd", b.lang.index("und"))
676-
b.writeConsts("lang_", b.lang.index, "de", "en", "fil", "mo", "nb", "no", "sh", "sr", "tl")
676+
b.writeConsts("lang_", b.lang.index, "de", "en", "fil", "fr", "it", "mo", "mul", "nb", "no", "sh", "sr", "tl")
677677
b.writeConst("langPrivateStart", b.langIndex("qaa"))
678678
b.writeConst("langPrivateEnd", b.langIndex("qtz"))
679679

language/parse.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ import (
88
"bytes"
99
"errors"
1010
"fmt"
11+
"regexp"
1112
"sort"
13+
"strconv"
1214
"strings"
1315
)
1416

@@ -550,3 +552,77 @@ func nextExtension(s string, p int) int {
550552
}
551553
return len(s)
552554
}
555+
556+
var (
557+
acceptErr = errors.New("ParseAcceptLanguage: syntax error")
558+
acceptRe = regexp.MustCompile(`^ *(?:([\w-]+|\*)(?: *; *q *= *([0-9\.]+))?)? *$`)
559+
)
560+
561+
// ParseAcceptLanguage parses the contents of a Accept-Language header as
562+
// defined in http://www.google.com/url?q=http://www.ietf.org/rfc/rfc2616.txt
563+
// and returns a list of Tags and a list of corresponding quality weights.
564+
// The Tags will be sorted by highest weight first and then by first occurrence.
565+
// Tags with a weight of zero will be dropped. An error will be returned if the
566+
// input could not be parsed.
567+
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
568+
for start, end := 0, 0; start < len(s); start = end + 1 {
569+
for end = start; end < len(s) && s[end] != ','; end++ {
570+
}
571+
m := acceptRe.FindStringSubmatch(s[start:end])
572+
if m == nil {
573+
return nil, nil, acceptErr
574+
}
575+
if len(m[1]) > 0 {
576+
w := 1.0
577+
if len(m[2]) > 0 {
578+
if w, err = strconv.ParseFloat(m[2], 32); err != nil {
579+
return nil, nil, err
580+
}
581+
// Drop tags with a quality weight of 0.
582+
if w <= 0 {
583+
continue
584+
}
585+
}
586+
t, err := Parse(m[1])
587+
if err != nil {
588+
id, ok := acceptFallback[m[1]]
589+
if !ok {
590+
return nil, nil, err
591+
}
592+
t = Tag{lang: id}
593+
}
594+
tag = append(tag, t)
595+
q = append(q, float32(w))
596+
}
597+
}
598+
sort.Stable(&tagSort{tag, q})
599+
return tag, q, nil
600+
}
601+
602+
// Add hack mapping to deal with a small number of cases that that occur
603+
// in Accept-Language (with reasonable frequency).
604+
var acceptFallback = map[string]langID{
605+
"english": lang_en,
606+
"deutsch": lang_de,
607+
"italian": lang_it,
608+
"french": lang_fr,
609+
"*": lang_mul, // defined in the spec to match all languages.
610+
}
611+
612+
type tagSort struct {
613+
tag []Tag
614+
q []float32
615+
}
616+
617+
func (s *tagSort) Len() int {
618+
return len(s.q)
619+
}
620+
621+
func (s *tagSort) Less(i, j int) bool {
622+
return s.q[i] > s.q[j]
623+
}
624+
625+
func (s *tagSort) Swap(i, j int) {
626+
s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
627+
s.q[i], s.q[j] = s.q[j], s.q[i]
628+
}

language/parse_test.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,3 +354,112 @@ func TestCompose2(t *testing.T) {
354354
}
355355
})
356356
}
357+
358+
func mk(s string) Tag {
359+
id, _ := Parse(s)
360+
return id
361+
}
362+
363+
func TestParseAcceptLanguage(t *testing.T) {
364+
type res struct {
365+
t Tag
366+
q float32
367+
}
368+
en := []res{{mk("en"), 1.0}}
369+
tests := []struct {
370+
out []res
371+
in string
372+
ok bool
373+
}{
374+
{en, "en", true},
375+
{en, " en", true},
376+
{en, "en ", true},
377+
{en, " en ", true},
378+
{en, "en,", true},
379+
{en, ",en", true},
380+
{en, ",,,en,,,", true},
381+
{en, ",en;q=1", true},
382+
// We allow an empty input, contrary to spec.
383+
{nil, "", true},
384+
385+
// errors
386+
{nil, ";", false},
387+
{nil, "$", false},
388+
{nil, "e;", false},
389+
{nil, "x;", false},
390+
{nil, "x", false},
391+
{nil, "ac", false}, // non-existing language
392+
{nil, "aa;", false},
393+
{nil, "aa;q", false},
394+
{nil, "aa;q=", false},
395+
{nil, "aa;q=.", false},
396+
397+
// odd fallbacks
398+
{
399+
[]res{{mk("en"), 0.1}},
400+
" english ;q=.1",
401+
true,
402+
},
403+
{
404+
[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
405+
" italian, deutsch, french",
406+
true,
407+
},
408+
409+
// lists
410+
{
411+
[]res{{mk("en"), 0.1}},
412+
"en;q=.1",
413+
true,
414+
},
415+
{
416+
[]res{{mk("mul"), 1.0}},
417+
"*",
418+
true,
419+
},
420+
{
421+
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
422+
"en,de",
423+
true,
424+
},
425+
{
426+
[]res{{mk("en"), 1.0}, {mk("de"), .5}},
427+
"en,de;q=0.5",
428+
true,
429+
},
430+
{
431+
[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
432+
" en ; q = 0.5 , , de;q=0.8",
433+
true,
434+
},
435+
{
436+
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
437+
"en,de,fr,i-klingon",
438+
true,
439+
},
440+
// sorting
441+
{
442+
[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
443+
"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
444+
true,
445+
},
446+
// dropping
447+
{
448+
[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
449+
"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
450+
true,
451+
},
452+
}
453+
for i, tt := range tests {
454+
tags, qs, e := ParseAcceptLanguage(tt.in)
455+
if e == nil != tt.ok {
456+
t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
457+
}
458+
for j, tag := range tags {
459+
if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
460+
t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
461+
break
462+
}
463+
}
464+
}
465+
}

0 commit comments

Comments
 (0)