forked from x/icebergs
447 lines
12 KiB
Go
447 lines
12 KiB
Go
package yac
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
ice "shylinux.com/x/icebergs"
|
|
"shylinux.com/x/icebergs/base/cli"
|
|
"shylinux.com/x/icebergs/base/lex"
|
|
"shylinux.com/x/icebergs/base/mdb"
|
|
kit "shylinux.com/x/toolkits"
|
|
)
|
|
|
|
type Point struct {
|
|
s int
|
|
c int
|
|
}
|
|
type State struct {
|
|
star int
|
|
next int
|
|
hash int
|
|
}
|
|
|
|
type Matrix struct {
|
|
nlang int
|
|
ncell int
|
|
|
|
page map[string]int
|
|
hand map[int]string
|
|
hash map[string]int
|
|
word map[int]string
|
|
|
|
mat [][]*State
|
|
|
|
lex_key string
|
|
lex *lex.Matrix
|
|
}
|
|
|
|
func NewMatrix(m *ice.Message, nlang, ncell int) *Matrix {
|
|
mat := &Matrix{nlang: nlang, ncell: ncell}
|
|
mat.page = map[string]int{}
|
|
mat.hand = map[int]string{}
|
|
mat.hash = map[string]int{}
|
|
mat.word = map[int]string{}
|
|
|
|
m.Option("matrix.cb", func(key string, lex *lex.Matrix) { mat.lex, mat.lex_key = lex, key })
|
|
key := m.Cmdx("lex.matrix", mdb.CREATE, 32)
|
|
m.Cmd("lex.matrix", mdb.INSERT, key, "space", "space", "[\t \n]+")
|
|
|
|
mat.mat = make([][]*State, nlang)
|
|
return mat
|
|
}
|
|
|
|
func (mat *Matrix) name(which map[int]string, index int) string {
|
|
if name, ok := which[index]; ok {
|
|
return name
|
|
}
|
|
return fmt.Sprintf("m%d", index)
|
|
}
|
|
func (mat *Matrix) index(m *ice.Message, hash string, h string) int {
|
|
which, names := mat.hash, mat.word
|
|
if hash == NPAGE {
|
|
which, names = mat.page, mat.hand
|
|
}
|
|
|
|
if x, e := strconv.Atoi(h); e == nil {
|
|
if hash == NPAGE {
|
|
m.Assert(x <= len(mat.page))
|
|
} else {
|
|
mat.hash[h] = x
|
|
}
|
|
return x
|
|
}
|
|
|
|
if x, ok := which[h]; ok {
|
|
return x
|
|
}
|
|
|
|
m.Assert(hash != NPAGE || len(which)+1 < mat.nlang)
|
|
which[h], names[len(which)+1] = len(which)+1, h
|
|
return which[h]
|
|
}
|
|
func (mat *Matrix) isVoid(page int) bool {
|
|
for j := 1; j < len(mat.mat[page]); j++ {
|
|
if mat.mat[page][j] != nil {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
func (mat *Matrix) train(m *ice.Message, page, hash int, word []string, level int) (int, []*Point, []*Point) {
|
|
m.Debug("%s %s\\%d page: %v hash: %v word: %v", TRAIN, strings.Repeat("#", level), level, page, hash, word)
|
|
|
|
ss := []int{page}
|
|
sn := make([]bool, len(mat.mat))
|
|
points, ends := []*Point{}, []*Point{}
|
|
|
|
for i, mul := 0, false; i < len(word); i++ {
|
|
if !mul {
|
|
if hash <= 0 && word[i] == "}" {
|
|
m.Debug("%s %s/%d word: %d point: %d end: %d", TRAIN, strings.Repeat("#", level), level, len(word), len(points), len(ends))
|
|
return i + 2, points, ends
|
|
}
|
|
ends = ends[:0]
|
|
}
|
|
|
|
for _, s := range ss {
|
|
switch word[i] {
|
|
case "opt{", "rep{":
|
|
sn[s] = true
|
|
num, point, end := mat.train(m, s, 0, word[i+1:], level+1)
|
|
points = append(points, point...)
|
|
|
|
for _, p := range end {
|
|
state := mat.mat[p.s][p.c]
|
|
if len(sn) <= state.next {
|
|
sn = append(sn, make([]bool, state.next-len(sn)+1)...)
|
|
}
|
|
sn[state.next] = true
|
|
|
|
if points = append(points, p); word[i] == "rep{" {
|
|
state.star = s
|
|
m.Debug("REP(%d, %d): %v", p.s, p.c, state)
|
|
}
|
|
}
|
|
i += num - 1
|
|
case "mul{":
|
|
mul = true
|
|
goto next
|
|
case "}":
|
|
if mul {
|
|
mul = false
|
|
goto next
|
|
}
|
|
fallthrough
|
|
default:
|
|
c, ok := mat.page[word[i]]
|
|
if !ok {
|
|
if c, _ = mat.lex.Parse(m, mat.name(mat.hand, s), lex.NewStream(bytes.NewBufferString(word[i]))); c == 0 {
|
|
// c = mat.lex.Train(m, mat.name(s), fmt.Sprintf("%d", len(mat.mat[s])+1), []byte(word[i]))
|
|
c = kit.Int(m.Cmdx("lex.matrix", mdb.INSERT, mat.lex_key, mat.name(mat.hand, s), len(mat.mat[s]), word[i]))
|
|
mat.mat[s] = append(mat.mat[s], nil)
|
|
}
|
|
}
|
|
|
|
state := mat.mat[s][c]
|
|
if state == nil {
|
|
state = &State{}
|
|
}
|
|
m.Debug("GET(%d,%d): %#v", s, c, state)
|
|
|
|
if state.next == 0 {
|
|
state.next = len(mat.mat)
|
|
mat.mat = append(mat.mat, make([]*State, mat.ncell))
|
|
sn = append(sn, true)
|
|
} else {
|
|
sn[state.next] = true
|
|
}
|
|
|
|
mat.mat[s][c] = state
|
|
ends = append(ends, &Point{s, c})
|
|
points = append(points, &Point{s, c})
|
|
m.Debug("SET(%d,%d): %#v", s, c, state)
|
|
}
|
|
}
|
|
next:
|
|
if !mul {
|
|
ss = ss[:0]
|
|
for s, b := range sn {
|
|
if sn[s] = false; b {
|
|
ss = append(ss, s)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
trans := map[int]int{page: page}
|
|
for i := mat.nlang; i < len(mat.mat); i++ { // 去空
|
|
if !mat.isVoid(i) {
|
|
trans[i] = i
|
|
continue
|
|
}
|
|
|
|
for j := i + 1; j < len(mat.mat); j++ {
|
|
if !mat.isVoid(j) {
|
|
mat.mat[i] = mat.mat[j]
|
|
mat.mat[j] = nil
|
|
trans[j] = i
|
|
break
|
|
}
|
|
}
|
|
if mat.isVoid(i) {
|
|
mat.mat = mat.mat[:i]
|
|
break
|
|
}
|
|
}
|
|
m.Debug("DEL: %v", trans)
|
|
|
|
for _, p := range points { // 去尾
|
|
p.s = trans[p.s]
|
|
state := mat.mat[p.s][p.c]
|
|
m.Debug("GET(%d, %d): %#v", p.s, p.c, state)
|
|
if state.next = trans[state.next]; state.next == 0 {
|
|
state.hash = hash
|
|
}
|
|
m.Debug("SET(%d, %d): %#v", p.s, p.c, state)
|
|
}
|
|
|
|
m.Debug("%s %s/%d word: %d point: %d end: %d", TRAIN, strings.Repeat("#", level), level, len(word), len(points), len(ends))
|
|
return len(word), points, ends
|
|
}
|
|
|
|
func (mat *Matrix) Parse(m *ice.Message, rewrite Rewrite, page int, stream *lex.Stream, level int) (hash int, word []string) {
|
|
// m.Debug("%s %s\\%d %s(%d)", PARSE, strings.Repeat("#", level), level, mat.name(mat.hand, page), page)
|
|
|
|
begin := stream.P
|
|
h, w := 0, []byte{}
|
|
for p, i := 0, page; stream.Scan() && i > 0; {
|
|
// 解析空白
|
|
h, w = mat.lex.Parse(m, "space", stream)
|
|
// 解析单词
|
|
begin := stream.P
|
|
h, w = mat.lex.Parse(m, mat.name(mat.hand, i), stream)
|
|
// 解析状态
|
|
var s *State
|
|
if h < len(mat.mat[i]) {
|
|
s = mat.mat[i][h]
|
|
}
|
|
|
|
if s != nil { // 全局语法检查
|
|
hold := stream.P
|
|
stream.P = begin
|
|
if hh, ww := mat.lex.Parse(m, "key", stream); hh == 0 || len(ww) <= len(w) {
|
|
stream.P = hold
|
|
word = append(word, string(w))
|
|
} else {
|
|
s = nil
|
|
}
|
|
}
|
|
|
|
if s == nil { // 嵌套语法递归解析
|
|
for j := 1; j < len(mat.mat[i]); j++ {
|
|
if n := mat.mat[i][j]; j < mat.nlang && n != nil {
|
|
if h, w := mat.Parse(m, rewrite, j, stream, level+1); h != 0 {
|
|
s, word = n, append(word, w...)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// m.Debug("%s %s|%d GET \033[33m%s\033[0m %#v", PARSE, strings.Repeat("#", level), level, w, s)
|
|
}
|
|
|
|
//语法切换
|
|
if s == nil {
|
|
i, p = p, 0
|
|
} else if i, p, hash = s.next, s.star, s.hash; i == 0 {
|
|
i, p = p, 0
|
|
}
|
|
}
|
|
|
|
if hash == 0 {
|
|
word, stream.P = word[:0], begin
|
|
} else {
|
|
hash, word = rewrite(m, mat.word[hash], hash, word, begin, stream)
|
|
}
|
|
|
|
// m.Debug("%s %s/%d %s(%d): %v %v", PARSE, strings.Repeat("#", level), level, mat.hand[hash], hash, word, stream.P)
|
|
return hash, word
|
|
}
|
|
func (mat *Matrix) show(m *ice.Message) {
|
|
showCol := map[int]bool{} // 有效列
|
|
for i := 1; i < len(mat.mat); i++ {
|
|
for j := 1; j < len(mat.mat[i]); j++ {
|
|
if node := mat.mat[i][j]; node != nil {
|
|
showCol[j] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
for i := 1; i < len(mat.mat); i++ {
|
|
if mat.isVoid(i) { // 无效行
|
|
continue
|
|
}
|
|
|
|
m.Push("00", kit.Select(kit.Format("%02d", i), mat.hand[i]))
|
|
for j := 1; j < len(mat.mat[i]); j++ {
|
|
if !showCol[j] { // 无效列
|
|
continue
|
|
}
|
|
key, value := kit.Format("%v", mat.name(mat.hand, j)), []string{}
|
|
if node := mat.mat[i][j]; node != nil {
|
|
if node.star > 0 {
|
|
value = append(value, cli.ColorYellow(m, mat.name(mat.hand, node.star)))
|
|
}
|
|
if node.next > 0 {
|
|
value = append(value, cli.ColorGreen(m, node.next))
|
|
}
|
|
if node.hash > 0 {
|
|
value = append(value, cli.ColorRed(m, mat.name(mat.hand, node.hash)))
|
|
}
|
|
}
|
|
m.Push(key, strings.Join(value, ","))
|
|
}
|
|
}
|
|
|
|
m.Status(NLANG, mat.nlang, NCELL, mat.ncell, NPAGE, len(mat.page), NHASH, len(mat.hash))
|
|
}
|
|
|
|
func _yac_load(m *ice.Message) {
|
|
mdb.Richs(m, m.PrefixKey(), "", mdb.FOREACH, func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
|
|
mat := NewMatrix(m, kit.Int(kit.Select("32", value[NLANG])), kit.Int(kit.Select("32", value[NCELL])))
|
|
mdb.Grows(m, m.PrefixKey(), kit.Keys(mdb.HASH, key), "", "", func(index int, value ice.Map) {
|
|
page := mat.index(m, NPAGE, kit.Format(value[NPAGE]))
|
|
hash := mat.index(m, NHASH, kit.Format(value[NHASH]))
|
|
if mat.mat[page] == nil {
|
|
mat.mat[page] = make([]*State, mat.ncell)
|
|
}
|
|
|
|
mat.train(m, page, hash, kit.Simple(value[mdb.TEXT]), 1)
|
|
})
|
|
value[MATRIX] = mat
|
|
})
|
|
}
|
|
|
|
type Rewrite func(m *ice.Message, nhash string, hash int, word []string, begin int, stream *lex.Stream) (int, []string)
|
|
|
|
const (
|
|
NLANG = "nlang"
|
|
NCELL = "ncell"
|
|
)
|
|
const (
|
|
NPAGE = "npage"
|
|
NHASH = "nhash"
|
|
)
|
|
const (
|
|
TRAIN = "train"
|
|
PARSE = "parse"
|
|
)
|
|
const MATRIX = "matrix"
|
|
|
|
func init() {
|
|
Index.Merge(&ice.Context{Configs: ice.Configs{
|
|
MATRIX: {Name: MATRIX, Help: "魔方矩阵", Value: kit.Data(mdb.SHORT, mdb.NAME)},
|
|
}, Commands: ice.Commands{
|
|
MATRIX: {Name: "matrix name npage text auto", Help: "魔方矩阵", Actions: ice.Actions{
|
|
ice.CTX_INIT: {Hand: func(m *ice.Message, arg ...string) { _yac_load(m) }},
|
|
mdb.CREATE: {Name: "create name=shy nlang=32 ncell=32", Help: "创建", Hand: func(m *ice.Message, arg ...string) {
|
|
mat := NewMatrix(m, kit.Int(kit.Select("32", m.Option(NLANG))), kit.Int(kit.Select("32", m.Option(NCELL))))
|
|
h := mdb.Rich(m, m.PrefixKey(), "", kit.Data(
|
|
mdb.TIME, m.Time(), mdb.NAME, m.Option(mdb.NAME),
|
|
MATRIX, mat, NLANG, mat.nlang, NCELL, mat.ncell,
|
|
))
|
|
switch cb := m.OptionCB(MATRIX).(type) {
|
|
case func(string, *Matrix):
|
|
cb(h, mat)
|
|
default:
|
|
m.ErrorNotImplement(cb)
|
|
}
|
|
m.Echo(h)
|
|
}},
|
|
mdb.INSERT: {Name: "insert name=shy npage=num nhash=num text=123", Help: "添加", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", m.Option(mdb.NAME), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
|
|
mat, _ := value[MATRIX].(*Matrix)
|
|
|
|
page := mat.index(m, NPAGE, m.Option(NPAGE))
|
|
hash := mat.index(m, NHASH, m.Option(NHASH))
|
|
if len(mat.mat[page]) == 0 {
|
|
mat.mat[page] = make([]*State, mat.ncell)
|
|
}
|
|
|
|
m.Option(mdb.TEXT, strings.ReplaceAll(m.Option(mdb.TEXT), "\\", "\\\\"))
|
|
text := kit.Split(m.Option(mdb.TEXT), " ", " ", " ")
|
|
mat.train(m, page, hash, text, 1)
|
|
mdb.Grow(m, m.PrefixKey(), kit.Keys(mdb.HASH, key), kit.Dict(
|
|
mdb.TIME, m.Time(), NPAGE, m.Option(NPAGE), NHASH, m.Option(NHASH), mdb.TEXT, text,
|
|
))
|
|
|
|
value[NPAGE] = len(mat.page)
|
|
value[NHASH] = len(mat.hash)
|
|
})
|
|
}},
|
|
mdb.REMOVE: {Name: "create", Help: "删除", Hand: func(m *ice.Message, arg ...string) {
|
|
m.Cmdy(mdb.DELETE, m.PrefixKey(), "", mdb.HASH, mdb.NAME, m.Option(mdb.NAME))
|
|
}},
|
|
PARSE: {Name: "parse name npage text=123", Help: "解析", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", m.Option(mdb.NAME), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
mat, _ := value[MATRIX].(*Matrix)
|
|
|
|
for stream := lex.NewStream(bytes.NewBufferString(m.Option(mdb.TEXT))); stream.Scan(); {
|
|
hash, _ := mat.Parse(m, func(m *ice.Message, nhash string, hash int, word []string, begin int, stream *lex.Stream) (int, []string) {
|
|
switch cb := m.OptionCB(MATRIX).(type) {
|
|
case func(string, int, []string, int, *lex.Stream) (int, []string):
|
|
return cb(nhash, hash, word, begin, stream)
|
|
default:
|
|
m.ErrorNotImplement(cb)
|
|
}
|
|
return hash, word
|
|
}, mat.index(m, NPAGE, m.Option(NPAGE)), stream, 1)
|
|
|
|
if hash == 0 {
|
|
break
|
|
}
|
|
}
|
|
})
|
|
m.ProcessInner()
|
|
}},
|
|
"show": {Name: "show", Help: "矩阵", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", kit.Select(m.Option(mdb.NAME), arg, 0), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
value[MATRIX].(*Matrix).show(m)
|
|
})
|
|
m.ProcessInner()
|
|
}},
|
|
}, Hand: func(m *ice.Message, arg ...string) {
|
|
m.Option(mdb.CACHE_LIMIT, -1)
|
|
if m.Action(mdb.CREATE); len(arg) == 0 { // 矩阵列表
|
|
m.Fields(len(arg), "time,name,npage,nhash")
|
|
m.Cmdy(mdb.SELECT, m.PrefixKey(), "", mdb.HASH)
|
|
m.PushAction(mdb.INSERT, "show", mdb.REMOVE)
|
|
return
|
|
}
|
|
|
|
if m.Action(mdb.INSERT, "show"); len(arg) == 1 { // 词法列表
|
|
m.Fields(len(arg[1:]), "time,npage,nhash,text")
|
|
m.Cmdy(mdb.SELECT, m.PrefixKey(), kit.Keys(mdb.HASH, kit.Hashs(arg[0])), mdb.LIST)
|
|
m.PushAction(PARSE)
|
|
return
|
|
}
|
|
|
|
// 词法矩阵
|
|
mdb.Richs(m, m.PrefixKey(), "", arg[0], func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
value[MATRIX].(*Matrix).show(m)
|
|
})
|
|
}},
|
|
},
|
|
})
|
|
}
|