1
0
forked from x/ContextOS

mac syn 0.4.0

This commit is contained in:
shaoying 2017-12-08 08:50:42 +08:00
parent f43435b7ef
commit 11a1d5e6ea
2 changed files with 280 additions and 292 deletions

View File

@ -3,7 +3,6 @@ package lex // {{{
import ( // {{{ import ( // {{{
"context" "context"
"fmt" "fmt"
"strconv"
) )
// }}} // }}}
@ -21,22 +20,47 @@ type Seed struct {
} }
type LEX struct { type LEX struct {
page int
cell int
seed []*Seed seed []*Seed
page map[string]int
hash map[string]int
state map[State]*State
mat []map[byte]*State
mat []map[byte]*State
*ctx.Message *ctx.Message
*ctx.Context *ctx.Context
} }
func (lex *LEX) train(page int, hash int, seed []byte) { // {{{ func (lex *LEX) train(seed []byte, arg ...string) { // {{{
cell, page, hash := 128, 1, 1 // {{{
if len(arg) > 0 {
if x, ok := lex.hash[arg[0]]; ok {
hash = x
} else {
hash = lex.Capi("nhash", 1)
lex.hash[arg[0]] = hash
}
}
if len(arg) > 1 {
if x, ok := lex.page[arg[1]]; ok {
page = x
} else {
lex.mat = append(lex.mat, make(map[byte]*State))
page = lex.Capi("nline", 1)
lex.page[arg[1]] = page
lex.Capi("npage", 1)
}
}
lex.Log("debug", "%s: %d %d %v", lex.Context.Name, page, hash, seed)
lex.seed = append(lex.seed, &Seed{page, hash, string(seed)})
lex.Capi("nseed", 1)
// }}}
s := []int{page} s := []int{page}
c := make([]byte, 0, lex.cell) c := make([]byte, 0, cell)
cn := make([]bool, lex.cell)
sn := make([]bool, len(lex.mat)) sn := make([]bool, len(lex.mat))
cn := make([]bool, cell)
ends := make([]*State, 0, len(seed)) ends := make([]*State, 0, len(seed))
for p := 0; p < len(seed); p++ { for p := 0; p < len(seed); p++ {
@ -78,7 +102,7 @@ func (lex *LEX) train(page int, hash int, seed []byte) { // {{{
} }
case '.': case '.':
for i := 0; i < lex.cell; i++ { for i := 0; i < cell; i++ {
c = append(c, byte(i)) c = append(c, byte(i))
} }
case '\\': case '\\':
@ -88,8 +112,8 @@ func (lex *LEX) train(page int, hash int, seed []byte) { // {{{
c = append(c, seed[p]) c = append(c, seed[p])
} }
lex.Log("debug", "page: %v", s) lex.Log("debug", "page: \033[31m%v\033[0m", s)
lex.Log("debug", "cell: %v", c) lex.Log("debug", "cell: \033[32m%v\033[0m", c)
flag := '\000' flag := '\000'
if p+1 < len(seed) { if p+1 < len(seed) {
@ -101,87 +125,60 @@ func (lex *LEX) train(page int, hash int, seed []byte) { // {{{
} }
for i := 0; i < len(s); i++ { for i := 0; i < len(s); i++ {
line := 0 for line, j := 0, byte(0); int(j) < len(c); j++ {
for j := byte(0); int(j) < len(c); j++ { state := lex.mat[s[i]][c[j]]
si := s[i] if state == nil {
state = new(State)
lex.Capi("nnode", 1)
}
lex.Log("debug", "GET(%d,%d): %v", s[i], c[j], state)
begin, end := j, j+1 switch flag {
case '+':
if false && flag == '+' { state.star = true
if lex.mat[si][c[j]] == nil { case '*':
lex.mat[si][c[j]] = new(State) state.star = true
fallthrough
case '?':
if sn[s[i]] = true; p == len(seed)-1 {
for _, n := range ends {
if n.next == s[i] && n.hash == 0 {
lex.Log("debug", "GET() state:%v", n)
n.hash = hash
lex.Log("debug", "END() state:%v", n)
}
}
} }
state := lex.mat[si][c[j]] }
lex.Log("debug", "GET(%d,%d) state:%v", si, c[j], state)
if p == len(seed)-1 {
state.hash = hash
} else {
if state.next == 0 { if state.next == 0 {
sn = append(sn, false) if line == 0 {
state.next = len(lex.mat) line = len(lex.mat)
lex.mat = append(lex.mat, make(map[byte]*State)) sn = append(sn, false)
lex.mat = append(lex.mat, make(map[byte]*State))
lex.Capi("nline", 1)
}
state.next = line
} }
if p == len(seed)-1 { sn[state.next] = true
state.hash = hash
}
ends = append(ends, state)
lex.Log("debug", "SET(%d,%d) state:%v", si, c[j], state)
si = state.next
begin, end = byte(0), byte(len(c))
} }
next := true if s, ok := lex.state[*state]; ok {
state = s
for j := begin; j < end; j++ {
if lex.mat[si][c[j]] == nil {
lex.mat[si][c[j]] = new(State)
}
state := lex.mat[si][c[j]]
lex.Log("debug", "GET(%d,%d) state:%v", si, c[j], state)
switch flag {
case '+', '*':
state.star = true
fallthrough
case '?':
sn[si] = true
if p < len(seed)-1 {
break
}
for _, s := range ends {
if s.next == si && s.hash == 0 {
lex.Log("debug", "GET() state:%v", s)
s.hash = hash
lex.Log("debug", "END() state:%v", s)
}
}
fallthrough
case '\000':
next = false
}
if next {
if state.next == 0 {
if line == 0 {
sn = append(sn, false)
line = len(lex.mat)
lex.mat = append(lex.mat, make(map[byte]*State))
}
state.next = line
}
sn[state.next] = true
} else {
state.hash = hash
}
ends = append(ends, state)
lex.Log("debug", "SET(%d,%d) state:%v", si, c[j], state)
} }
lex.state[*state] = state
lex.mat[s[i]][c[j]] = state
lex.Log("debug", "SET(%d,%d): %v", s[i], c[j], state)
ends = append(ends, state)
} }
} }
c = c[:0] c, s = c[:0], s[:0]
s = s[:0]
for i := 0; i < len(sn); i++ { for i := 0; i < len(sn); i++ {
if sn[i] { if sn[i] {
s = append(s, i) s = append(s, i)
@ -192,14 +189,19 @@ func (lex *LEX) train(page int, hash int, seed []byte) { // {{{
} }
// }}} // }}}
func (lex *LEX) parse(page int, line []byte) (word []byte, hash int, rest []byte) { // {{{ func (lex *LEX) parse(line []byte, arg ...string) (word []byte, hash int, rest []byte) { // {{{
page, begin, end := 1, 0, 0 // {{{
if len(arg) > 0 {
if x, ok := lex.page[arg[0]]; ok {
page = x
} else {
return line, 0, nil
}
}
// }}}
s := page for star, s, i := 0, page, 0; s != 0 && i < len(line); i++ {
star := 0
begin, end := 0, 0
for i := 0; s != 0 && i < len(line); i++ {
c := line[i] c := line[i]
if c == '\\' && i < len(line)-1 { if c == '\\' && i < len(line)-1 {
c = 'a' c = 'a'
@ -222,14 +224,11 @@ func (lex *LEX) parse(page int, line []byte) (word []byte, hash int, rest []byte
star = 0 star = 0
} }
end++ if end++; state.star {
hash = state.hash
if state.star {
star = s star = s
} }
s = state.next if s, hash = state.next, state.hash; s == 0 {
if s == 0 {
s, star = star, 0 s, star = star, 0
} }
} }
@ -238,47 +237,13 @@ func (lex *LEX) parse(page int, line []byte) (word []byte, hash int, rest []byte
begin, end = 0, 0 begin, end = 0, 0
} }
word = line[begin:end] word, rest = line[begin:end], line[end:]
rest = line[end:] lex.Log("debug", "\033[31m[%v]\033[0m %d [%v]", string(word), hash, string(rest))
lex.Log("debug", "%d %v %v", hash, word, rest)
return return
} }
// }}} // }}}
func (lex *LEX) Begin(m *ctx.Message, arg ...string) ctx.Server { // {{{
lex.Configs["page"] = &ctx.Config{Name: "词法集合", Value: "16", Help: "词法集合"}
lex.Configs["cell"] = &ctx.Config{Name: "字符集合", Value: "128", Help: "字符集合"}
if len(arg) > 0 {
lex.Configs["page"].Value = arg[0]
}
if len(arg) > 1 {
lex.Configs["cell"].Value = arg[1]
}
return lex
}
// }}}
func (lex *LEX) Start(m *ctx.Message, arg ...string) bool { // {{{
lex.Message = m
lex.page = m.Confi("page")
lex.cell = m.Confi("cell")
lex.mat = make([]map[byte]*State, m.Confi("page"))
for i := 0; i < len(lex.mat); i++ {
lex.mat[i] = make(map[byte]*State)
}
lex.seed = make([]*Seed, 0, 10)
return false
}
// }}}
func (lex *LEX) Spawn(m *ctx.Message, c *ctx.Context, arg ...string) ctx.Server { // {{{ func (lex *LEX) Spawn(m *ctx.Message, c *ctx.Context, arg ...string) ctx.Server { // {{{
c.Caches = map[string]*ctx.Cache{} c.Caches = map[string]*ctx.Cache{}
c.Configs = map[string]*ctx.Config{} c.Configs = map[string]*ctx.Config{}
@ -288,86 +253,109 @@ func (lex *LEX) Spawn(m *ctx.Message, c *ctx.Context, arg ...string) ctx.Server
return s return s
} }
// }}}
func (lex *LEX) Begin(m *ctx.Message, arg ...string) ctx.Server { // {{{
lex.Message = m
lex.Log("fuck", "why")
lex.Caches["nseed"] = &ctx.Cache{Name: "种子数量", Value: "0", Help: "种子数量"}
lex.Caches["npage"] = &ctx.Cache{Name: "集合数量", Value: "1", Help: "集合数量"}
lex.Caches["nhash"] = &ctx.Cache{Name: "类型数量", Value: "1", Help: "类型数量"}
lex.Caches["nline"] = &ctx.Cache{Name: "状态数量", Value: "1", Help: "状态数量"}
lex.Caches["nnode"] = &ctx.Cache{Name: "节点数量", Value: "0", Help: "节点数量"}
lex.Caches["npush"] = &ctx.Cache{Name: "节点数量", Value: "0", Help: "节点数量", Hand: func(m *ctx.Message, x *ctx.Cache, arg ...string) string {
lex := m.Target.Server.(*LEX) // {{{
return fmt.Sprintf("%d", len(lex.state))
// }}}
}}
return lex
}
// }}}
func (lex *LEX) Start(m *ctx.Message, arg ...string) bool { // {{{
lex.seed = make([]*Seed, 0, 10)
lex.page = map[string]int{"nil": 0}
lex.hash = map[string]int{"nil": 0}
lex.state = make(map[State]*State)
lex.mat = make([]map[byte]*State, 2, 10)
for i := 0; i < len(lex.mat); i++ {
lex.mat[i] = make(map[byte]*State)
}
lex.Message = m
return false
}
// }}} // }}}
func (lex *LEX) Close(m *ctx.Message, arg ...string) bool { // {{{ func (lex *LEX) Close(m *ctx.Message, arg ...string) bool { // {{{
return true return false
} }
// }}} // }}}
var Index = &ctx.Context{Name: "lex", Help: "词法解析", var Index = &ctx.Context{Name: "lex", Help: "词法解析",
Caches: map[string]*ctx.Cache{}, Caches: map[string]*ctx.Cache{},
Configs: map[string]*ctx.Config{ Configs: map[string]*ctx.Config{},
"page": &ctx.Config{Name: "词法集合", Value: "16", Help: "词法集合"},
"cell": &ctx.Config{Name: "字符集合", Value: "128", Help: "字符集合"},
},
Commands: map[string]*ctx.Command{ Commands: map[string]*ctx.Command{
"train": &ctx.Command{Name: "train seed [hash [page]", Help: "添加词法规则", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string { "train": &ctx.Command{Name: "train seed [hash [page]", Help: "添加词法规则", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string {
lex, ok := m.Target.Server.(*LEX) // {{{ lex, ok := m.Target.Server.(*LEX) // {{{
if !ok { m.Assert(ok, "模块类型错误")
return "" m.Assert(len(arg) > 0, "参数错误")
}
hash := 1
if len(arg) > 1 {
hash, _ = strconv.Atoi(arg[1])
}
page := 1
if len(arg) > 2 {
page, _ = strconv.Atoi(arg[2])
}
lex.train(page, hash, []byte(arg[0]))
lex.seed = append(lex.seed, &Seed{page, hash, arg[0]})
lex.train([]byte(arg[0]), arg[1:]...)
return "" return ""
// }}} // }}}
}}, }},
"parse": &ctx.Command{Name: "parse line [page]", Help: "解析单词", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string { "parse": &ctx.Command{Name: "parse line [page]", Help: "解析单词", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string {
lex, ok := m.Target.Server.(*LEX) // {{{ lex, ok := m.Target.Server.(*LEX) // {{{
if !ok { m.Assert(ok, "模块类型错误")
return "" m.Assert(len(arg) > 0, "参数错误")
}
page := 1 word, hash, rest := lex.parse([]byte(arg[0]), arg[1:]...)
if len(arg) > 1 { m.Add("result", string(word), fmt.Sprintf("%d", hash), string(rest))
page, _ = strconv.Atoi(arg[1])
}
word, hash, rest := lex.parse(page, []byte(arg[0]))
m.Echo(string(word))
m.Echo(fmt.Sprintf("%d", hash))
m.Echo(string(rest))
m.Log("debug", "%s %d %s", string(word), hash, string(rest))
return "" return ""
// }}} // }}}
}}, }},
"split": &ctx.Command{Name: "split line [page1 [page2]]", Help: "分割语句", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string { "split": &ctx.Command{Name: "split line page1 [page2]", Help: "分割语句", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string {
lex, ok := m.Target.Server.(*LEX) // {{{ lex, ok := m.Target.Server.(*LEX) // {{{
if !ok { m.Assert(ok, "模块类型错误")
return "" m.Assert(len(arg) > 1, "参数错误")
}
line := arg[0] for line := arg[0]; len(line) > 0; {
page1 := 1 word, hash, rest := lex.parse([]byte(line), arg[1:]...)
page2 := 2 line = string(rest)
if len(arg) > 1 { word, hash, rest = lex.parse([]byte(line), arg[2:]...)
page1, _ = strconv.Atoi(arg[1])
}
if len(arg) > 2 {
page2, _ = strconv.Atoi(arg[2])
}
for len(line) > 0 {
word, hash, rest := lex.parse(page1, []byte(line))
m.Log("debug", "\033[31mvoid [%s]\033[0m\n", string(word))
line = string(rest) line = string(rest)
word, hash, rest = lex.parse(page2, []byte(line))
m.Log("debug", "\033[31mword [%s]\033[0m\n", string(word))
if hash == 0 { if hash == 0 {
break break
} }
m.Echo(string(word)) m.Echo(string(word))
line = string(rest) }
return ""
// }}}
}},
"cache": &ctx.Command{Name: "cache", Help: "显示缓存", Hand: func(m *ctx.Message, c *ctx.Context, key string, arg ...string) string {
lex, ok := m.Target.Server.(*LEX) // {{{
m.Assert(ok, "模块类型错误")
for i, v := range lex.seed {
m.Echo("seed: %d %v\n", i, v)
}
for i, v := range lex.page {
m.Echo("page: %s %d\n", i, v)
}
for i, v := range lex.hash {
m.Echo("hash: %s %d\n", i, v)
}
for i, v := range lex.state {
m.Echo("node: %v %v\n", i, v)
}
for i, v := range lex.mat {
for k, v := range v {
m.Echo("node: %v %v %v\n", i, k, v)
}
} }
return "" return ""
// }}} // }}}
@ -375,9 +363,7 @@ var Index = &ctx.Context{Name: "lex", Help: "词法解析",
}, },
Index: map[string]*ctx.Context{ Index: map[string]*ctx.Context{
"void": &ctx.Context{Name: "void", "void": &ctx.Context{Name: "void",
Commands: map[string]*ctx.Command{ Commands: map[string]*ctx.Command{"split": &ctx.Command{}},
"split": &ctx.Command{},
},
}, },
}, },
} }

View File

@ -9,116 +9,118 @@ func TestLEX(t *testing.T) {
m := ctx.Pulse.Spawn(Index) m := ctx.Pulse.Spawn(Index)
seed := map[string]map[string]string{ seed := map[string]map[string]string{
// "shy?": map[string]string{ // "shy?": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "she": "sh", // "she": "sh",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shy", // "shyyy": "shy",
// }, // },
// "shy*": map[string]string{ // "shy*": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "she": "sh", // "she": "sh",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shyyy", // "shyyy": "shyyy",
// }, // },
// "shy+": map[string]string{ // "shy+": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "she": "", // "she": "",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shyyy", // "shyyy": "shyyy",
// }, // },
// "s?hy": map[string]string{ // "s?hy": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "she": "", // "she": "",
// "shy": "shy", // "shy": "shy",
// "hy": "hy", // "hy": "hy",
// }, // },
// "s*hy": map[string]string{ // "s*hy": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "she": "", // "she": "",
// "shy": "shy", // "shy": "shy",
// "ssshy": "ssshy", // "ssshy": "ssshy",
// "hy": "hy", // "hy": "hy",
// }, // },
// "s+hy": map[string]string{ // "s+hy": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "she": "", // "she": "",
// "shy": "shy", // "shy": "shy",
// "ssshy": "ssshy", // "ssshy": "ssshy",
// "hy": "", // "hy": "",
// }, // },
// "sh[xyz]?": map[string]string{ // "sh[xyz]?": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "she": "sh", // "she": "sh",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shy", // "shyyy": "shy",
// }, // },
// "sh[xyz]*": map[string]string{ // "sh[xyz]*": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "she": "sh", // "she": "sh",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shyyy", // "shyyy": "shyyy",
// "shyxz": "shyxz", // "shyxz": "shyxz",
// }, // },
// "sh[xyz]+": map[string]string{ // "sh[xyz]+": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "she": "", // "she": "",
// "shy": "shy", // "shy": "shy",
// "shyyy": "shyyy", // "shyyy": "shyyy",
// "shyxzy": "shyxzy", // "shyxzy": "shyxzy",
// }, // },
// "[xyz]?sh": map[string]string{ // "[xyz]?sh": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "zsh": "zsh", // "zsh": "zsh",
// "zxyshy": "", // "zxyshy": "",
// }, // },
// "[xyz]*sh": map[string]string{ // "[xyz]*sh": map[string]string{
// "s": "", // "s": "",
// "sh": "sh", // "sh": "sh",
// "zsh": "zsh", // "zsh": "zsh",
// "zxyshy": "zxysh", // "zxyshy": "zxysh",
// }, // },
// "[xyz]+sh": map[string]string{ // "[xyz]+sh": map[string]string{
// "s": "", // "s": "",
// "sh": "", // "sh": "",
// "zsh": "zsh", // "zsh": "zsh",
// "zxyshy": "zxysh", // "zxyshy": "zxysh",
// }, // },
// "[0-9]+": map[string]string{ // "[0-9]+": map[string]string{
// "hello": "", // "hello": "",
// "hi123": "", // "hi123": "",
// "123": "123", // "123": "123",
// "123hi": "123", // "123hi": "123",
// }, // },
// "0x[0-9a-fA-F]+": map[string]string{ // "0x[0-9a-fA-F]+": map[string]string{
// "hello": "", // "hello": "",
// "0xhi123": "", // "0xhi123": "",
// "0x123": "0x123", // "0x123": "0x123",
// "0xab123ab": "0xab123ab", // "0xab123ab": "0xab123ab",
// "0x123ab": "0x123ab", // "0x123ab": "0x123ab",
// }, // },
// "[a-zA-Z][a-zA-Z0-9]*": map[string]string{ "[a-zA-Z][a-zA-Z0-9]*": map[string]string{
// "hello": "hello", "hello": "hello",
// "hi123": "hi123", "hi123": "hi123",
// "123": "", "123": "",
// }, },
// "\"[^\"]*\"": map[string]string{ "\"[^\"]*\"": map[string]string{
// "hello": "", "hello": "",
// "0xhi123": "", "0xhi123": "",
// "\"hi\"": "\"hi\"", "\"hi\"": "\"hi\"",
// "\"\\\"hi\"": "\"\\\"hi\"", "\"\\\"hi\"": "\"\\\"hi\"",
// }, },
} }
m.Conf("debug", "on")
Index.Begin(m)
for k, s := range seed { for k, s := range seed {
Index.Start(m) Index.Start(m)
m.Cmd("train", k) m.Cmd("train", k)