Skip to content
This repository was archived by the owner on Jan 28, 2021. It is now read-only.

sql: function, added soundex #486

Merged
merged 3 commits into from
Oct 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ We support and actively test against certain third-party clients to ensure compa
- `FLOOR(number)`: Return the largest integer value that is less than or equal to `number`.
- `ROUND(number, decimals)`: Round the `number` to `decimals` decimal places.
- `CONNECTION_ID()`: Return the current connection ID.
- `SOUNDEX(str)`: Returns the soundex of a string.

## Example

Expand Down
1 change: 1 addition & 0 deletions SUPPORTED.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
- ROUND
- COALESCE
- CONNECTION_ID
- SOUNDEX

## Time functions
- DAY
Expand Down
9 changes: 5 additions & 4 deletions sql/expression/function/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ var Defaults = sql.Functions{
"coalesce": sql.FunctionN(NewCoalesce),
"json_extract": sql.FunctionN(NewJSONExtract),
"connection_id": sql.Function0(NewConnectionID),
"ln": sql.Function1(LogBaseMaker(float64(math.E))),
"log2": sql.Function1(LogBaseMaker(float64(2))),
"log10": sql.Function1(LogBaseMaker(float64(10))),
"log": sql.FunctionN(NewLog),
"soundex": sql.Function1(NewSoundex),
"ln": sql.Function1(LogBaseMaker(float64(math.E))),
"log2": sql.Function1(LogBaseMaker(float64(2))),
"log10": sql.Function1(LogBaseMaker(float64(10))),
"log": sql.FunctionN(NewLog),
}
102 changes: 102 additions & 0 deletions sql/expression/function/soundex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
package function

import (
"fmt"
"strings"
"unicode"

"gopkg.in/src-d/go-mysql-server.v0/sql"
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
)

// Soundex is a function that returns the soundex of a string. Two strings that
// sound almost the same should have identical soundex strings. A standard
// soundex string is four characters long, but the SOUNDEX() function returns
// an arbitrarily long string.
type Soundex struct {
expression.UnaryExpression
}

// NewSoundex creates a new Soundex expression.
func NewSoundex(e sql.Expression) sql.Expression {
return &Soundex{expression.UnaryExpression{Child: e}}
}

// Eval implements the Expression interface.
func (s *Soundex) Eval(ctx *sql.Context, row sql.Row) (interface{}, error) {
v, err := s.Child.Eval(ctx, row)
if err != nil {
return nil, err
}

if v == nil {
return nil, nil
}

v, err = sql.Text.Convert(v)
if err != nil {
return nil, err
}

var b strings.Builder
var last rune
for _, c := range strings.ToUpper(v.(string)) {
if last == 0 && !unicode.IsLetter(c) {
continue
}
code := s.code(c)
if last == 0 {
b.WriteRune(c)
last = code
continue
}
if code == '0' || code == last {
continue
}
b.WriteRune(code)
last = code
}
if b.Len() == 0 {
return "", nil
}
for i := len([]rune(b.String())); i < 4; i++ {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why 4 (because type rune = int32)?

Copy link
Contributor Author

@bake bake Oct 22, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even though the reference doesn't say so, MySQL returns at least four characters, if one of them is a letter:

mysql> select soundex('');
+-------------+
| soundex('') |
+-------------+
|             |
+-------------+
1 row in set (0,00 sec)

mysql> select soundex('Q');
+--------------+
| soundex('Q') |
+--------------+
| Q000         |
+--------------+
1 row in set (0,00 sec)

mysql> select soundex('Quadratically');
+--------------------------+
| soundex('Quadratically') |
+--------------------------+
| Q36324                   |
+--------------------------+
1 row in set (0,00 sec)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure how close this should follow MySQL, since it is not a replacement. Always returning a string of 0 or 4 characters would be a possibility, too.

b.WriteRune('0')
}
return b.String(), nil
}

func (s *Soundex) code(c rune) rune {
switch c {
case 'B', 'F', 'P', 'V':
return '1'
case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
return '2'
case 'D', 'T':
return '3'
case 'L':
return '4'
case 'M', 'N':
return '5'
case 'R':
return '6'
}
return '0'
}

func (s *Soundex) String() string {
return fmt.Sprintf("SOUNDEX(%s)", s.Child)
}

// TransformUp implements the Expression interface.
func (s *Soundex) TransformUp(f sql.TransformExprFunc) (sql.Expression, error) {
child, err := s.Child.TransformUp(f)
if err != nil {
return nil, err
}
return f(NewSoundex(child))
}

// Type implements the Expression interface.
func (s *Soundex) Type() sql.Type {
return s.Child.Type()
}
49 changes: 49 additions & 0 deletions sql/expression/function/soundex_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package function

import (
"testing"

"github.com/stretchr/testify/require"
"gopkg.in/src-d/go-mysql-server.v0/sql"
"gopkg.in/src-d/go-mysql-server.v0/sql/expression"
)

func TestSoundex(t *testing.T) {
testCases := []struct {
name string
rowType sql.Type
row sql.Row
expected interface{}
}{
{"text nil", sql.Text, sql.NewRow(nil), nil},
{"text empty", sql.Text, sql.NewRow(""), ""},
{"text ignored character", sql.Text, sql.NewRow("-"), ""},
{"text runes", sql.Text, sql.NewRow("日本語"), "日000"},
{"text Hello ok", sql.Text, sql.NewRow("Hello"), "H400"},
{"text Quadratically ok", sql.Text, sql.NewRow("Quadratically"), "Q36324"},
{"text Lee ok", sql.Text, sql.NewRow("Lee"), "L000"},
{"text McKnockitter ok", sql.Text, sql.NewRow("McKnockitter"), "M25236"},
{"text Honeyman ok", sql.Text, sql.NewRow("Honeyman"), "H500"},
{"text Munn ok", sql.Text, sql.NewRow("Munn"), "M000"},
{"text Poppett ok", sql.Text, sql.NewRow("Poppett"), "P300"},
{"text Peachman ok", sql.Text, sql.NewRow("Peachman"), "P250"},
{"text Cochrane ok", sql.Text, sql.NewRow("Cochrane"), "C650"},
{"text Chesley ok", sql.Text, sql.NewRow("Chesley"), "C400"},
{"text Tachenion ok", sql.Text, sql.NewRow("Tachenion"), "T250"},
{"text Wilcox ok", sql.Text, sql.NewRow("Wilcox"), "W420"},
{"binary ok", sql.Blob, sql.NewRow([]byte("Harvey")), "H610"},
{"other type", sql.Int32, sql.NewRow(int32(1)), ""},
}

for _, tt := range testCases {
f := NewSoundex(expression.NewGetField(0, tt.rowType, "", true))

t.Run(tt.name, func(t *testing.T) {
require.Equal(t, tt.expected, eval(t, f, tt.row))
})

req := require.New(t)
req.True(f.IsNullable())
req.Equal(tt.rowType, f.Type())
}
}