1
0
mirror of https://shylinux.com/x/ContextOS synced 2025-04-26 17:24:05 +08:00
2018-10-28 09:26:24 +08:00

441 lines
10 KiB
Go

package lex
import (
"contexts/ctx"
"fmt"
"strconv"
"strings"
)
type Seed struct {
page int
hash int
word string
}
type State struct {
star bool
next int
hash int
}
type Point struct {
s int
c byte
}
type LEX struct {
seed []*Seed
page map[string]int
hash map[string]int
mat []map[byte]*State
state map[State]*State
char map[byte][]byte
*ctx.Message
*ctx.Context
}
func (lex *LEX) index(hash string, h string) int {
which := lex.page
if hash == "nhash" {
which = lex.hash
}
if x, e := strconv.Atoi(h); e == nil {
lex.Assert(hash != "npage" || x < lex.Capi("npage"))
return x
}
if x, ok := which[h]; ok {
return x
}
which[h] = lex.Capi(hash, 1)
lex.Assert(hash != "npage" || lex.Capi("npage") < lex.Capi("nlang"))
return which[h]
}
func (lex *LEX) charset(c byte) []byte {
if cs, ok := lex.char[c]; ok {
return cs
}
return []byte{c}
}
func (lex *LEX) train(page int, hash int, seed []byte) int {
ss := []int{page}
cn := make([]bool, lex.Capi("ncell"))
cc := make([]byte, 0, lex.Capi("ncell"))
sn := make([]bool, lex.Capi("nline"))
points := []*Point{}
for p := 0; p < len(seed); p++ {
switch seed[p] {
case '[':
set := true
if p++; seed[p] == '^' {
set, p = false, p+1
}
for ; seed[p] != ']'; p++ {
if seed[p] == '\\' {
p++
for _, c := range lex.charset(seed[p]) {
cn[c] = true
}
continue
}
if seed[p+1] == '-' {
begin, end := seed[p], seed[p+2]
if begin > end {
begin, end = end, begin
}
for c := begin; c <= end; c++ {
cn[c] = true
}
p += 2
continue
}
cn[seed[p]] = true
}
for c := 0; c < len(cn); c++ {
if (set && cn[c]) || (!set && !cn[c]) {
cc = append(cc, byte(c))
}
cn[c] = false
}
case '.':
for c := 0; c < len(cn); c++ {
cc = append(cc, byte(c))
}
case '\\':
p++
for _, c := range lex.charset(seed[p]) {
cc = append(cc, c)
}
default:
cc = append(cc, seed[p])
}
lex.Log("debug", "page: \033[31m%d %v\033[0m", len(ss), ss)
lex.Log("debug", "cell: \033[32m%d %v\033[0m", len(cc), cc)
flag := '\000'
if p+1 < len(seed) {
flag = rune(seed[p+1])
switch flag {
case '+', '*', '?':
p++
}
}
for _, s := range ss {
line := 0
for _, c := range cc {
state := &State{}
if lex.mat[s][c] != nil {
*state = *lex.mat[s][c]
} else {
lex.Capi("nnode", 1)
}
lex.Log("debug", "GET(%d,%d): %v", s, c, state)
switch flag {
case '+':
state.star = true
case '*':
state.star = true
sn[s] = true
case '?':
sn[s] = true
}
if state.next == 0 {
if line == 0 || !lex.Confs("compact") {
lex.mat = append(lex.mat, make(map[byte]*State))
line = lex.Capi("nline", 1) - 1
sn = append(sn, false)
}
state.next = line
}
sn[state.next] = true
lex.mat[s][c] = state
points = append(points, &Point{s, c})
lex.Log("debug", "SET(%d,%d): %v(%s,%s)", s, c, state, lex.Cap("nnode"), lex.Cap("nreal"))
}
}
cc, ss = cc[:0], ss[:0]
for s, b := range sn {
if sn[s] = false; b {
ss = append(ss, s)
}
}
}
for _, s := range ss {
if s < lex.Capi("nlang") || s >= len(lex.mat) {
continue
}
if len(lex.mat[s]) == 0 {
lex.Log("debug", "DEL: %d-%d", lex.Capi("nline")-1, lex.Capi("nline", 0, s))
lex.mat = lex.mat[:s]
}
}
for _, s := range ss {
for _, p := range points {
state := &State{}
*state = *lex.mat[p.s][p.c]
if state.next == s {
lex.Log("debug", "GET(%d, %d): %v", p.s, p.c, state)
if state.hash = hash; state.next >= len(lex.mat) {
state.next = 0
}
lex.mat[p.s][p.c] = state
lex.Log("debug", "SET(%d, %d): %v", p.s, p.c, state)
}
if x, ok := lex.state[*state]; !ok {
lex.state[*state] = lex.mat[p.s][p.c]
lex.Capi("nreal", 1)
} else {
lex.mat[p.s][p.c] = x
}
}
}
return hash
}
func (lex *LEX) parse(m *ctx.Message, page int, line []byte) (hash int, rest []byte, word []byte) {
pos := 0
for star, s := 0, page; s != 0 && pos < len(line); pos++ {
c := line[pos]
if c == '\\' && pos < len(line)-1 { //跳过转义
pos++
c = lex.charset(line[pos])[0]
}
if c > 127 { //跳过中文
word = append(word, c)
continue
}
state := lex.mat[s][c]
lex.Log("debug", "(%d,%d): %v", s, c, state)
if state == nil {
s, star, pos = star, 0, pos-1
continue
}
word = append(word, c)
if state.star {
star = s
} else if x, ok := lex.mat[star][c]; !ok || !x.star {
star = 0
}
if s, hash = state.next, state.hash; s == 0 {
s, star = star, 0
}
}
if pos == len(line) {
// hash, pos, word = -1, 0, word[:0]
} else if hash == 0 {
pos, word = 0, word[:0]
}
rest = line[pos:]
return
}
func (lex *LEX) Spawn(m *ctx.Message, c *ctx.Context, arg ...string) ctx.Server {
lex.Message = m
c.Caches = map[string]*ctx.Cache{}
c.Configs = map[string]*ctx.Config{}
s := new(LEX)
s.Context = c
return s
}
func (lex *LEX) Begin(m *ctx.Message, arg ...string) ctx.Server {
lex.Message = m
lex.Caches["ncell"] = &ctx.Cache{Name: "字符上限", Value: "128", Help: "字符集合的最大数量"}
lex.Caches["nlang"] = &ctx.Cache{Name: "词法上限", Value: "64", Help: "词法集合的最大数量"}
lex.Caches["nseed"] = &ctx.Cache{Name: "种子数量", Value: "0", Help: "词法模板的数量"}
lex.Caches["npage"] = &ctx.Cache{Name: "集合数量", Value: "0", Help: "词法集合的数量"}
lex.Caches["nhash"] = &ctx.Cache{Name: "类型数量", Value: "0", Help: "单词类型的数量"}
lex.Caches["nline"] = &ctx.Cache{Name: "状态数量", Value: "64", Help: "状态机状态的数量"}
lex.Caches["nnode"] = &ctx.Cache{Name: "节点数量", Value: "0", Help: "状态机连接的逻辑数量"}
lex.Caches["nreal"] = &ctx.Cache{Name: "实点数量", Value: "0", Help: "状态机连接的存储数量"}
lex.Configs["compact"] = &ctx.Config{Name: "紧凑模式", Value: "true", Help: "词法状态的共用"}
if len(arg) > 0 {
if _, e := strconv.Atoi(arg[0]); lex.Assert(e) {
lex.Cap("nlang", arg[0])
lex.Cap("nline", arg[0])
}
}
lex.page = map[string]int{"nil": 0}
lex.hash = map[string]int{"nil": 0}
lex.mat = make([]map[byte]*State, lex.Capi("nlang"))
lex.state = make(map[State]*State)
lex.char = map[byte][]byte{
't': []byte{'\t'},
'n': []byte{'\n'},
'b': []byte{'\t', ' '},
's': []byte{'\t', ' ', '\n'},
'd': []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'},
'x': []byte{'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F'},
}
return lex
}
func (lex *LEX) Start(m *ctx.Message, arg ...string) bool {
lex.Message = m
return false
}
func (lex *LEX) Close(m *ctx.Message, arg ...string) bool {
switch lex.Context {
case m.Target():
case m.Source():
}
return true
}
var Index = &ctx.Context{Name: "lex", Help: "词法中心",
Caches: map[string]*ctx.Cache{
"nmat": &ctx.Cache{Name: "nmat", Value: "0", Help: "nmat"},
},
Configs: map[string]*ctx.Config{},
Commands: map[string]*ctx.Command{
"spawn": &ctx.Command{Name: "spawn", Help: "添加词法规则", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) {
if _, ok := m.Target().Server.(*LEX); m.Assert(ok) {
m.Start(fmt.Sprintf("matrix%d", m.Capi("nmat", 1)), "matrix")
}
}},
"train": &ctx.Command{Name: "train seed [hash [page]", Help: "添加词法规则", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) {
if lex, ok := m.Target().Server.(*LEX); m.Assert(ok) {
page, hash := 1, 1
if len(arg) > 2 {
page = lex.index("npage", arg[2])
m.Assert(page < m.Capi("nlang"), "词法集合过多")
}
if len(arg) > 1 {
hash = lex.index("nhash", arg[1])
}
if lex.mat[page] == nil {
lex.mat[page] = map[byte]*State{}
}
m.Result(0, lex.train(page, hash, []byte(arg[0])))
lex.seed = append(lex.seed, &Seed{page, hash, arg[0]})
lex.Cap("stream", fmt.Sprintf("%d,%s,%s", lex.Capi("nseed", 1), lex.Cap("npage"), lex.Cap("nhash")))
}
}},
"parse": &ctx.Command{Name: "parse line [page]", Help: "解析单词", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) {
if lex, ok := m.Target().Server.(*LEX); m.Assert(ok) {
page := 1
if len(arg) > 1 {
page = lex.index("npage", arg[1])
}
hash, rest, word := lex.parse(m, page, []byte(arg[0]))
m.Result(0, hash, string(rest), string(word))
}
}},
"show": &ctx.Command{Name: "show info", Help: "查看信息", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) {
if lex, ok := m.Target().Server.(*LEX); m.Assert(ok) {
switch arg[0] {
case "seed":
for _, v := range lex.seed {
m.Add("append", "page", fmt.Sprintf("%d", v.page))
m.Add("append", "hash", fmt.Sprintf("%d", v.hash))
m.Add("append", "word", fmt.Sprintf("%s", strings.Replace(strings.Replace(v.word, "\n", "\\n", -1), "\t", "\\t", -1)))
}
m.Table()
case "page":
for k, v := range lex.page {
m.Add("append", "page", k)
m.Add("append", "code", fmt.Sprintf("%d", v))
}
m.Sort("code", "int").Table()
case "hash":
for k, v := range lex.hash {
m.Add("append", "hash", k)
m.Add("append", "code", fmt.Sprintf("%d", v))
}
m.Table()
case "mat":
for _, v := range lex.mat {
for j := byte(0); j < byte(m.Capi("ncell")); j++ {
s := v[j]
if s == nil {
m.Add("append", fmt.Sprintf("%c", j), "")
} else {
// m.Add("append", fmt.Sprintf("%c", j), fmt.Sprintf("(%t,%d,%d)", s.star, s.next, s.hash))
star := 0
if s.star {
star = 1
}
m.Add("append", fmt.Sprintf("%c", j), fmt.Sprintf("%d,%d,%d", star, s.next, s.hash))
}
}
}
ncol := len(m.Meta["append"])
nrow := len(m.Meta[m.Meta["append"][0]])
for i := 0; i < ncol-1; i++ {
for j := i + 1; j < ncol; j++ {
same := true
for n := 0; n < nrow; n++ {
if m.Meta[m.Meta["append"][i]][n] != m.Meta[m.Meta["append"][j]][n] {
same = false
break
}
}
if same {
key = m.Meta["append"][i] + m.Meta["append"][j]
m.Meta[key] = m.Meta[m.Meta["append"][i]]
m.Meta["append"][i] = key
for k := j; k < ncol-1; k++ {
m.Meta["append"][k] = m.Meta["append"][k+1]
}
ncol--
j--
}
}
}
m.Meta["append"] = m.Meta["append"][:ncol]
m.Table()
}
}
}},
},
}
func init() {
lex := &LEX{}
lex.Context = Index
ctx.Index.Register(Index, lex)
}