forked from x/icebergs
540 lines
13 KiB
Go
540 lines
13 KiB
Go
package lex
|
|
|
|
import (
|
|
"bytes"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
|
|
ice "shylinux.com/x/icebergs"
|
|
"shylinux.com/x/icebergs/base/mdb"
|
|
kit "shylinux.com/x/toolkits"
|
|
)
|
|
|
|
type Stream struct {
|
|
r io.Reader
|
|
b []byte
|
|
P int
|
|
}
|
|
|
|
func NewStream(r io.Reader) *Stream {
|
|
return &Stream{r: r}
|
|
}
|
|
func (s *Stream) Scan() bool {
|
|
if s.P < len(s.b) {
|
|
return true
|
|
}
|
|
buf := make([]byte, 1024)
|
|
if n, e := s.r.Read(buf); e == nil && n > 0 {
|
|
s.b = append(s.b, buf[:n]...)
|
|
}
|
|
return s.P < len(s.b)
|
|
}
|
|
func (s *Stream) Next() {
|
|
s.P++
|
|
}
|
|
func (s *Stream) Char() byte {
|
|
if s.Scan() {
|
|
return s.b[s.P]
|
|
}
|
|
return 0
|
|
}
|
|
|
|
type Point struct {
|
|
s int
|
|
c byte
|
|
}
|
|
type State struct {
|
|
star bool
|
|
next int
|
|
hash int
|
|
}
|
|
type Matrix struct {
|
|
nlang int
|
|
ncell int
|
|
|
|
page map[string]int
|
|
hand map[int]string
|
|
hash map[string]int
|
|
word map[int]string
|
|
|
|
trans map[byte][]byte
|
|
mat []map[byte]*State
|
|
}
|
|
|
|
func NewMatrix(m *ice.Message, nlang, ncell int) *Matrix {
|
|
mat := &Matrix{nlang: nlang, ncell: ncell}
|
|
mat.page = map[string]int{}
|
|
mat.hand = map[int]string{}
|
|
mat.hash = map[string]int{}
|
|
mat.word = map[int]string{}
|
|
|
|
mat.trans = map[byte][]byte{}
|
|
for k, v := range map[byte]string{
|
|
't': "\t", 'n': "\n", 'b': "\t ", 's': "\t \n",
|
|
'd': "0123456789", 'x': "0123456789ABCDEFabcdef",
|
|
} {
|
|
mat.trans[k] = []byte(v)
|
|
}
|
|
|
|
mat.mat = make([]map[byte]*State, nlang)
|
|
return mat
|
|
}
|
|
func (mat *Matrix) char(c byte) []byte {
|
|
if cs, ok := mat.trans[c]; ok {
|
|
return cs
|
|
}
|
|
return []byte{c}
|
|
}
|
|
func (mat *Matrix) index(m *ice.Message, hash string, h string) int {
|
|
which, names := mat.hash, mat.word
|
|
if hash == NPAGE {
|
|
which, names = mat.page, mat.hand
|
|
}
|
|
|
|
if x, e := strconv.Atoi(h); e == nil {
|
|
if hash == NPAGE {
|
|
m.Assert(x <= len(mat.page))
|
|
} else {
|
|
mat.hash[h] = x
|
|
}
|
|
return x
|
|
}
|
|
|
|
if x, ok := which[h]; ok {
|
|
return x
|
|
}
|
|
|
|
m.Assert(hash != NPAGE || len(which)+1 < mat.nlang)
|
|
which[h], names[len(which)+1] = len(which)+1, h
|
|
return which[h]
|
|
}
|
|
func (mat *Matrix) Train(m *ice.Message, npage, nhash string, seed string) int {
|
|
// m.Debug("%s %s page: %v hash: %v seed: %v", TRAIN, LEX, npage, nhash, seed)
|
|
|
|
page := mat.index(m, NPAGE, npage)
|
|
hash := mat.index(m, NHASH, nhash)
|
|
if mat.mat[page] == nil {
|
|
mat.mat[page] = map[byte]*State{}
|
|
}
|
|
|
|
ss := []int{page}
|
|
cn := make([]bool, mat.ncell)
|
|
cc := make([]byte, 0, mat.ncell)
|
|
sn := make([]bool, len(mat.mat))
|
|
|
|
points := []*Point{}
|
|
for i := 0; i < len(seed); i++ {
|
|
switch seed[i] { // 字符集
|
|
case '[':
|
|
set := true
|
|
if i++; seed[i] == '^' { // 补集
|
|
set, i = false, i+1
|
|
}
|
|
|
|
for ; seed[i] != ']'; i++ {
|
|
if seed[i] == '\\' { // 转义
|
|
i++
|
|
for _, c := range mat.char(seed[i]) {
|
|
cn[c] = true
|
|
}
|
|
continue
|
|
}
|
|
|
|
if seed[i+1] == '-' { // 区间
|
|
begin, end := seed[i], seed[i+2]
|
|
if begin > end {
|
|
begin, end = end, begin
|
|
}
|
|
for c := begin; c <= end; c++ {
|
|
cn[c] = true
|
|
}
|
|
i += 2
|
|
continue
|
|
}
|
|
|
|
cn[seed[i]] = true // 单个
|
|
}
|
|
|
|
for c := 1; c < len(cn); c++ { // 序列
|
|
if (set && cn[c]) || (!set && !cn[c]) {
|
|
cc = append(cc, byte(c))
|
|
}
|
|
cn[c] = false
|
|
}
|
|
|
|
case '.':
|
|
for c := 1; c < len(cn); c++ { // 全集
|
|
cc = append(cc, byte(c))
|
|
}
|
|
|
|
case '\\':
|
|
i++
|
|
for _, c := range mat.char(seed[i]) { // 转义
|
|
cc = append(cc, c)
|
|
}
|
|
default:
|
|
cc = append(cc, seed[i]) // 普通字符
|
|
}
|
|
|
|
// m.Debug("page: \033[31m%d %v\033[0m", len(ss), ss)
|
|
// m.Debug("cell: \033[32m%d %v\033[0m", len(cc), cc)
|
|
|
|
flag := '\000'
|
|
if i+1 < len(seed) { // 次数
|
|
switch flag = rune(seed[i+1]); flag {
|
|
case '?', '+', '*':
|
|
i++
|
|
}
|
|
}
|
|
|
|
add := func(s int, c byte, cb func(*State)) { // 添加节点
|
|
state := mat.mat[s][c]
|
|
if state == nil {
|
|
state = &State{}
|
|
}
|
|
// m.Debug("GET(%d,%d): %#v", s, c, state)
|
|
|
|
if cb(state); state.next == 0 {
|
|
sn = append(sn, true)
|
|
state.next = len(mat.mat)
|
|
mat.mat = append(mat.mat, make(map[byte]*State))
|
|
} else {
|
|
sn[state.next] = true
|
|
}
|
|
|
|
mat.mat[s][c] = state
|
|
points = append(points, &Point{s, c})
|
|
// m.Debug("SET(%d,%d): %#v", s, c, state)
|
|
}
|
|
|
|
for _, s := range ss {
|
|
for _, c := range cc {
|
|
add(s, c, func(state *State) {
|
|
switch flag { // 次数
|
|
case '+':
|
|
state.star = true
|
|
sn[s] = true
|
|
break
|
|
|
|
sn = append(sn, true)
|
|
state.next = len(mat.mat)
|
|
mat.mat = append(mat.mat, make(map[byte]*State))
|
|
for _, c := range cc {
|
|
add(state.next, c, func(state *State) { state.star = true })
|
|
}
|
|
case '*':
|
|
state.star = true
|
|
sn[s] = true
|
|
case '?':
|
|
sn[s] = true
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
cc, ss = cc[:0], ss[:0]
|
|
for s, b := range sn { // 迭代
|
|
if sn[s] = false; b && s > 0 {
|
|
ss = append(ss, s)
|
|
}
|
|
}
|
|
}
|
|
|
|
trans := map[int]int{page: page}
|
|
for i := mat.nlang; i < len(mat.mat); i++ { // 去空
|
|
if len(mat.mat[i]) > 0 {
|
|
trans[i] = i
|
|
continue
|
|
}
|
|
|
|
for j := i + 1; j < len(mat.mat); j++ {
|
|
if len(mat.mat[j]) > 0 {
|
|
mat.mat[i] = mat.mat[j]
|
|
mat.mat[j] = nil
|
|
trans[j] = i
|
|
break
|
|
}
|
|
}
|
|
if len(mat.mat[i]) == 0 {
|
|
mat.mat = mat.mat[:i]
|
|
break
|
|
}
|
|
}
|
|
// m.Debug("DEL: %v", trans)
|
|
|
|
for _, p := range points { // 去尾
|
|
p.s = trans[p.s]
|
|
state := mat.mat[p.s][p.c]
|
|
if state.next = trans[state.next]; state.next == 0 {
|
|
// m.Debug("GET(%d, %d): %#v", p.s, p.c, state)
|
|
state.hash = hash
|
|
// m.Debug("SET(%d, %d): %#v", p.s, p.c, state)
|
|
}
|
|
}
|
|
|
|
// m.Debug("%s %s npage: %v nhash: %v", TRAIN, LEX, len(mat.page), len(mat.hash))
|
|
return hash
|
|
}
|
|
func (mat *Matrix) Parse(m *ice.Message, npage string, stream *Stream) (hash int, word []byte) {
|
|
// m.Debug("%s %s page: %v pos: %v", LEX, PARSE, npage, stream.P)
|
|
page := mat.index(m, NPAGE, npage)
|
|
|
|
pos := stream.P
|
|
for star, s := 0, page; stream.Scan() && s != 0; stream.Next() {
|
|
c := stream.Char()
|
|
if c == '\\' { //跳过转义
|
|
if stream.Next(); !stream.Scan() {
|
|
break
|
|
}
|
|
c = mat.char(stream.Char())[0]
|
|
}
|
|
if c > 127 { //跳过中文
|
|
word = append(word, c)
|
|
continue
|
|
}
|
|
|
|
state := mat.mat[s][c]
|
|
if state == nil {
|
|
s, star, stream.P = star, 0, stream.P-1
|
|
continue
|
|
}
|
|
// m.Debug("GET (%d,%d): %v", s, c, state)
|
|
|
|
if word = append(word, c); state.star {
|
|
star = s
|
|
} else if x, ok := mat.mat[star][c]; !ok || !x.star {
|
|
star = 0
|
|
}
|
|
|
|
if s, hash = state.next, state.hash; s == 0 {
|
|
s, star = star, 0
|
|
}
|
|
}
|
|
|
|
if hash == 0 {
|
|
stream.P, word = pos, word[:0]
|
|
}
|
|
|
|
// m.Debug("%s %s hash: %v word: %v", LEX, PARSE, mat.word[hash], string(word))
|
|
return
|
|
}
|
|
func (mat *Matrix) show(m *ice.Message) {
|
|
showCol := map[int]bool{} // 有效列
|
|
for j := 1; j < mat.ncell; j++ {
|
|
for i := 1; i < len(mat.mat); i++ {
|
|
if node := mat.mat[i][byte(j)]; node != nil {
|
|
showCol[j] = true
|
|
}
|
|
}
|
|
}
|
|
|
|
for i := 1; i < len(mat.mat); i++ {
|
|
if len(mat.mat[i]) == 0 { // 无效行
|
|
continue
|
|
}
|
|
|
|
m.Push("00", kit.Select(kit.Format("%02d", i), mat.hand[i]))
|
|
for j := 1; j < mat.ncell; j++ {
|
|
if !showCol[j] { // 无效列
|
|
continue
|
|
}
|
|
key, value := kit.Format("%c", j), []string{}
|
|
if node := mat.mat[i][byte(j)]; node != nil {
|
|
if node.star {
|
|
value = append(value, "*")
|
|
}
|
|
if node.next > 0 {
|
|
// value = append(value, cli.ColorGreen(m, node.next))
|
|
}
|
|
if node.hash > 0 {
|
|
// value = append(value, cli.ColorRed(m, kit.Select(kit.Format("%d", node.hash), mat.word[node.hash])))
|
|
}
|
|
}
|
|
m.Push(key, strings.Join(value, ","))
|
|
}
|
|
}
|
|
|
|
m.Status(NLANG, mat.nlang, NCELL, mat.ncell, NPAGE, len(mat.page), NHASH, len(mat.hash))
|
|
}
|
|
func _lex_load(m *ice.Message) {
|
|
mdb.Richs(m, m.PrefixKey(), "", mdb.FOREACH, func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
|
|
mat := NewMatrix(m, kit.Int(kit.Select("32", value[NLANG])), kit.Int(kit.Select("256", value[NCELL])))
|
|
mdb.Grows(m, m.PrefixKey(), kit.Keys(mdb.HASH, key), "", "", func(index int, value ice.Map) {
|
|
mat.Train(m, kit.Format(value[NPAGE]), kit.Format(value[NHASH]), kit.Format(value[mdb.TEXT]))
|
|
})
|
|
value[MATRIX] = mat
|
|
})
|
|
}
|
|
|
|
const (
|
|
NLANG = "nlang"
|
|
NCELL = "ncell"
|
|
)
|
|
const (
|
|
NPAGE = "npage"
|
|
NHASH = "nhash"
|
|
)
|
|
const (
|
|
TRAIN = "train"
|
|
PARSE = "parse"
|
|
)
|
|
const MATRIX = "matrix"
|
|
|
|
func init() {
|
|
Index.MergeCommands(ice.Commands{
|
|
MATRIX: {Name: "matrix hash npage text auto", Help: "魔方矩阵", Actions: ice.Actions{
|
|
ice.CTX_INIT: {Hand: func(m *ice.Message, arg ...string) {
|
|
// _lex_load(m.Load())
|
|
}},
|
|
mdb.CREATE: {Name: "create nlang=32 ncell=128", Help: "创建", Hand: func(m *ice.Message, arg ...string) {
|
|
mat := NewMatrix(m, kit.Int(kit.Select("32", m.Option(NLANG))), kit.Int(kit.Select("128", m.Option(NCELL))))
|
|
h := mdb.Rich(m, m.PrefixKey(), "", kit.Data(mdb.TIME, m.Time(), MATRIX, mat, NLANG, mat.nlang, NCELL, mat.ncell))
|
|
switch cb := m.OptionCB(MATRIX).(type) {
|
|
case func(string, *Matrix):
|
|
cb(h, mat)
|
|
default:
|
|
m.ErrorNotImplement(cb)
|
|
}
|
|
m.Echo(h)
|
|
}},
|
|
mdb.INSERT: {Name: "insert hash npage=num nhash=num text=123", Help: "添加", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", m.Option(mdb.HASH), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
|
|
mat, _ := value[MATRIX].(*Matrix)
|
|
m.Echo("%d", mat.Train(m, m.Option(NPAGE), m.Option(NHASH), m.Option(mdb.TEXT)))
|
|
mdb.Grow(m, m.PrefixKey(), kit.Keys(mdb.HASH, key), kit.Dict(
|
|
mdb.TIME, m.Time(), NPAGE, m.Option(NPAGE), NHASH, m.Option(NHASH), mdb.TEXT, m.Option(mdb.TEXT),
|
|
))
|
|
|
|
value[NPAGE] = len(mat.page)
|
|
value[NHASH] = len(mat.hash)
|
|
})
|
|
}},
|
|
mdb.REMOVE: {Name: "create", Help: "删除", Hand: func(m *ice.Message, arg ...string) {
|
|
m.Cmdy(mdb.DELETE, m.PrefixKey(), "", mdb.HASH, mdb.HASH, m.Option(mdb.HASH))
|
|
}},
|
|
PARSE: {Name: "parse hash npage text=123", Help: "解析", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", m.Option(mdb.HASH), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
mat, _ := value[MATRIX].(*Matrix)
|
|
|
|
stream := NewStream(bytes.NewBufferString(m.Option(mdb.TEXT)))
|
|
hash, word := mat.Parse(m, m.Option(NPAGE), stream)
|
|
m.Push(NHASH, kit.Select(kit.Format("%d", hash), mat.word[hash]))
|
|
m.Push("word", string(word))
|
|
m.Push("rest", string(stream.b[stream.P:]))
|
|
})
|
|
m.ProcessInner()
|
|
}},
|
|
"show": {Name: "show", Help: "矩阵", Hand: func(m *ice.Message, arg ...string) {
|
|
mdb.Richs(m, m.PrefixKey(), "", kit.Select(m.Option(mdb.HASH), arg, 0), func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
value[MATRIX].(*Matrix).show(m)
|
|
})
|
|
m.ProcessInner()
|
|
}},
|
|
"location": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessLocation("https://baidu.com")
|
|
}},
|
|
"replace": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessReplace("https://baidu.com")
|
|
}},
|
|
"history": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessHistory()
|
|
}},
|
|
"confirms": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.Option("hi", "hello")
|
|
m.ProcessConfirm("hello world")
|
|
}},
|
|
"refresh": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessRefresh("3s")
|
|
}},
|
|
"rewrite": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessRewrite("hash", "1", "npage", "2")
|
|
}},
|
|
"display": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.Push("value", 1)
|
|
m.Push("value", 2)
|
|
m.ProcessDisplay("/plugin/story/pie.js")
|
|
}},
|
|
// "field": {Hand: func(m *ice.Message, arg ...string) {
|
|
// m.ProcessCommand("cli.system", []string{"pwd"}, arg...)
|
|
// }},
|
|
"inner": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.Push("value", 1)
|
|
m.Push("value", 2)
|
|
m.Push("good", 2)
|
|
m.Push("good", 2)
|
|
m.ProcessInner()
|
|
}},
|
|
"hold": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessHold()
|
|
}},
|
|
"back": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessBack()
|
|
}},
|
|
"rich": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.Push("hi", "hello")
|
|
m.Push("he", "world")
|
|
m.Push("value", "good")
|
|
m.ProcessRich("hello world\n")
|
|
}},
|
|
"grow": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessGrow("hello world\n")
|
|
}},
|
|
"open": {Hand: func(m *ice.Message, arg ...string) {
|
|
m.ProcessOpen("https://baidu.com")
|
|
}},
|
|
}, Hand: func(m *ice.Message, arg ...string) {
|
|
m.Action(
|
|
"location", "replace", "history",
|
|
"confirms", "refresh", "rewrite", "display", "field", "inner",
|
|
"hold", "back", "rich", "grow", "open",
|
|
"openLocation", "getLocation", "getClipboardData",
|
|
)
|
|
m.Push("hi", "hello")
|
|
m.Push("he", "world")
|
|
m.Push("value", "good")
|
|
m.Echo("hello world")
|
|
m.StatusTimeCount()
|
|
m.EchoButton("clear")
|
|
m.PushButton("close")
|
|
m.EchoButton("upload")
|
|
m.EchoButton("actions")
|
|
return
|
|
if m.Action(mdb.CREATE); len(arg) == 0 { // 矩阵列表
|
|
m.Fields(len(arg), "time,hash,npage,nhash")
|
|
m.Cmdy(mdb.SELECT, m.PrefixKey(), "", mdb.HASH)
|
|
m.PushAction(mdb.INSERT, "show", mdb.REMOVE)
|
|
return
|
|
}
|
|
|
|
if m.Action(mdb.INSERT, "show"); len(arg) == 1 { // 词法列表
|
|
m.Fields(len(arg[1:]), "time,npage,nhash,text")
|
|
m.Cmdy(mdb.SELECT, m.PrefixKey(), kit.Keys(mdb.HASH, arg[0]), mdb.LIST)
|
|
m.PushAction(PARSE)
|
|
return
|
|
}
|
|
|
|
mdb.Richs(m, m.PrefixKey(), "", arg[0], func(key string, value ice.Map) {
|
|
value = kit.GetMeta(value)
|
|
mat, _ := value[MATRIX].(*Matrix)
|
|
|
|
if len(arg) == 2 { // 词法矩阵
|
|
mat.show(m)
|
|
return
|
|
}
|
|
|
|
hash, word := mat.Parse(m, arg[1], NewStream(bytes.NewBufferString(arg[2])))
|
|
m.Push(mdb.TIME, m.Time())
|
|
m.Push(mdb.HASH, mat.word[hash])
|
|
m.Push("word", string(word))
|
|
})
|
|
}},
|
|
})
|
|
}
|