一般 parse tree 是一個有序(ordered)、有唯一根(rooted)的樹。
因此,之前的 grammar 要再改變一下,加上<log> ::= <msg> {<msg>} 在最前面。
<log> ::= <msg> {<msg>}
<msg> ::= timestamp <secs_type_msg>
| timestamp <host_type_msg>
<secs_type_msg> ::= <desc> secs_comment <secs_msg>
<host_type_msg> ::= eap_log_1 {xml_transaction}
<desc> ::= <secs_msg_name> colon <secs_SxFy_sym>
| normal_word {normal_word}
<secs_SxFy_sym> ::= secs_SxFy {<secs_w_bit>}
<secs_msg_name> ::= normal_word
<secs_w_bit> ::= normal_word
<secs_msg> ::= {<secs_msg_body>} secs_end
<secs_msg_body> ::= <secs_msg_item> {secs_comment }
<secs_msg_item> ::= secs_item_a
| secs_item_u
| secs_item_b
| secs_item_bool
| secs_item_f
| secs_item_i
| secs_list_start {secs_comment} {<secs_msg_item>} secs_list_end
接下來是設計 tree node 存放物件。
之前的程式實際上只有判斷 input 是否為可接受。
所以每一個函式傳回 true 的地方都改為傳回 node ,傳回 false 的地方則改為傳回 false。
最基本的一個 node,就是只有一個 name 屬性的物件。
把它當做抽象類別。為了方便序列化,我們把其他的屬性都用一個 dict 裝起來,
為了讓屬性按加入順序序列化,把屬性名字加到一個 list 中。
class c_node:
def __init__(self):
self.keyvalue = {}
self.keyarray = []
def add_attr(self,k,v):
if self.keyvalue.has_key(k):
self.keyvalue[k] = v
else:
self.keyarray.append(k)
self.keyvalue[k] = v
def set_attr(self,k,v):
if self.keyvalue.has_key(k):
self.keyvalue[k] = v
else:
self.keyarray.append(k)
self.keyvalue[k] = v
def get_attr(self,k):
if self.keyvalue.has_key(k):
return self.keyvalue[k]
else:
return None
def __str__(self):
_str = "["
_str1 = []
for i in self.keyarray:
if self.keyvalue[i] != None:
if type(self.keyvalue[i]) != list:
_str1.append(i + ":" + str(self.keyvalue[i]))
else:
_str2 = []
for j in self.keyvalue[i]:
_str2.append( str(j) )
_str1.append(i + ":" + "[" + ",".join(_str2) + "]")
else:
_str1.append(i + ":" + "None")
_str += ",".join(_str1)
_str += "]"
return _str
我們為 log 設計一個 c_log 的類別。
class c_log(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','msgs')
self.add_attr('msg',None)
其他的類別也很單純,只要按 grammar 中的順序把 symbol (terminal 及 non-terminal) 當做屬性加入即可。
但是到了 c_secs_msg_item,就不太一樣,因為他有可能是 terminal 也可能是 non-terminal 的 symbol,
所以屬性除了當做 terminal 存放值的 value,也有當做 non-terminal 存放值的 children。
class c_secs_msg_item(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_msg_item')
self.add_attr('list_start',None)
self.add_attr('secs_comment',None)
self.add_attr('children',None)
self.add_attr('value',None)
self.add_attr('list_end',None)
在 parser 的程式裡,遇到 secs_msg_item 的時候,
如果不是遇到 secs_list_start,就把值放到 value 去。
否則就把值放到 children 裡面,就算該值是空的。(因為允許空的 list)
(secs_msg_item 的程式碼請參考底下)
老實說,我也沒辦法一開始就知道需要 c_node,也沒辦法知道 c_node 就要長這個樣子。
我是邊寫邊改的。因為要 debug 才生出 __str__ 這個方法。
因為要 __str__ 才知道要把屬性放到 list 及 dict 裡面方便列舉。
在寫的過程中,參考了 google code 裡面的 traceur。(它是用 javascript 寫的)
因為難以了解之後會遇到什麼,所以花了一點時間研究才隔了兩星期沒有產生(有一點找藉口)
完整的程式碼我分成兩個,一個是 node 們的定義。一個是 parser 的程式。
node_def.py
class c_node:
def __init__(self):
self.keyvalue = {}
self.keyarray = []
def add_attr(self,k,v):
if self.keyvalue.has_key(k):
self.keyvalue[k] = v
else:
self.keyarray.append(k)
self.keyvalue[k] = v
def set_attr(self,k,v):
if self.keyvalue.has_key(k):
self.keyvalue[k] = v
else:
self.keyarray.append(k)
self.keyvalue[k] = v
def get_attr(self,k):
if self.keyvalue.has_key(k):
return self.keyvalue[k]
else:
return None
def __str__(self):
_str = "["
_str1 = []
for i in self.keyarray:
if self.keyvalue[i] != None:
if type(self.keyvalue[i]) != list:
_str1.append(i + ":" + str(self.keyvalue[i]))
else:
_str2 = []
for j in self.keyvalue[i]:
_str2.append( str(j) )
_str1.append(i + ":" + "[" + ",".join(_str2) + "]")
else:
_str1.append(i + ":" + "None")
_str += ",".join(_str1)
_str += "]"
return _str
class c_log(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','msgs')
self.add_attr('msg',None)
class c_msg(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','msg')
self.add_attr('timestamp',None)
self.add_attr('host_type_msg',None)
self.add_attr('secs_type_msg',None)
class c_secs_type_msg(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_type_msg')
self.add_attr('desc',None)
self.add_attr('secs_comment',None)
self.add_attr('secs_msg',None)
class c_secs_SxFy_sym(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_SxFy_sym')
self.add_attr('secs_SxFy',None)
self.add_attr('secs_w_bit',None)
class c_desc(c_node):
def __init__(self):
c_node.__init__(self)
self.name = 'desc'
self.add_attr('name','desc')
self.add_attr('secs_msg_name',None)
self.add_attr('colon',None)
self.add_attr('secs_SxFy_sym',None)
self.add_attr('normal_word',None)
class c_secs_msg_name(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_msg_name')
self.add_attr('normal_word',None)
class c_colon(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','colon')
self.add_attr('value',':')
class c_secs_w_bit(c_node):
def __init__(self):
c_node.__init__(self)
self.name = 'secs_w_bit'
self.add_attr('name',self.name)
self.normal_word = None
self.add_attr('normal_word',self.normal_word)
class c_secs_msg(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_msg')
self.add_attr('secs_msg_body',None)
self.add_attr('secs_end',None)
class c_secs_msg_body(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_msg_body')
self.add_attr('secs_msg_item',None)
self.add_attr('secs_comment',None)
class c_secs_msg_item(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','secs_msg_item')
self.add_attr('list_start',None)
self.add_attr('secs_comment',None)
self.add_attr('children',None)
self.add_attr('value',None)
self.add_attr('list_end',None)
class c_host_type_msg(c_node):
def __init__(self):
c_node.__init__(self)
self.add_attr('name','host_type_msg')
self.add_attr('eap_log_1',None)
self.add_attr('xml_transaction',None)
------------------------------------------------------------------------------
ll_parser_v4.py # 看到這個 v4 就知道我大改了4次。
import node_def
t_file = open("tokens.txt","r")
t_file_content = t_file.read()
t_file.close()
tokens = t_file_content.split("\n\n\n")
global tokenindex
tokenindex = 0
global_stack = []
def token_cut(token):
r = []
sep = ":"
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
sep = ","
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
r.append(t1)
sep = ","
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
r.append(t1)
sep = ","
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
r.append(t1)
sep = ","
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
r.append(t1)
sep = ","
t1 = token[:token.find(sep)]
token = token[token.find(sep)+1:]
r.append(t1)
r.append(token)
return r
input_index = -1
sym = None
tokencount = len(tokens)
def getsym():
global input_index
input_index += 1
global sym
if input_index > tokencount - 1:
sym = None
else:
if len(tokens[input_index])==0:
sym = None
else:
sym = token_cut(tokens[input_index])
def lookahead():
global input_index
if input_index + 1 > tokencount - 1:
return None
else:
return token_cut(tokens[input_index + 1])
def accept(s):
global sym
global global_stack
if sym != None and sym[0] == s:
#print 'accept',sym
global_stack.append(sym[5])
getsym()
return True
return False
def expect(s):
if accept(s):
return True
print "expect: unexpected symbol"
print "expect:", s
global sym
print "encounter:",sym
sym = None
return False
def secs_w_bit():
print 'secs_w_bit()'
my = node_def.c_secs_w_bit()
r = accept("normal_word")
if r:
v = global_stack.pop()
my.set_attr('normal_word',v)
#print 'normal_word pop:',v
#print str(my)
print 'secs_w_bit() complete'
return my
def secs_SxFy_sym():
print 'secs_SxFy_sym()'
my = node_def.c_secs_SxFy_sym()
r = expect("secs_SxFy")
if r:
v = global_stack.pop()
my.set_attr("secs_SxFy",v)
#print 'secs_SxFy pop:',v
my.set_attr("secs_w_bit",secs_w_bit())
#print str(my)
print 'secs_SxFy_sym() complete'
return my
def secs_msg_name():
print 'secs_msg_name()'
my = node_def.c_secs_msg_name()
r = expect("normal_word")
if r:
v = global_stack.pop()
my.normal_word = v
#print 'normal_word pop:',v
#print str(my)
print 'secs_msg_name() complete'
return my
def desc():
print 'desc()'
my = node_def.c_desc()
global input_index
_sym = lookahead()
if _sym[0]=="colon":
my.set_attr("secs_msg_name",secs_msg_name())
r = expect("colon")
if r:
v = global_stack.pop()
my.set_attr("colon",v)
#print 'colon pop:', v
my.set_attr("secs_SxFy_sym",secs_SxFy_sym())
else:
r = accept("normal_word")
if r:
v = global_stack.pop()
my.set_attr("normal_word",v)
#print 'normal_word pop:',v
while r == True:
r = accept("normal_word")
if r:
v = global_stack.pop()
if type(my.get_attr("normal_word")) == list:
my.get_attr("normal_word").append(v)
else:
tmpv = my.get_attr("normal_word")
my.set_attr("normal_word",[])
my.get_attr("normal_word").append(tmpv)
my.get_attr("normal_word").append(v)
#print 'normal_word pop:',v
#print str(my)
print 'desc() complete'
return my
def secs_msg_item():
print 'secs_msg_item()'
my = node_def.c_secs_msg_item()
if accept("secs_item_a"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_item_u"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_item_b"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_item_bool"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_item_f"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_item_i"):
v = global_stack.pop()
my.set_attr("value", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
elif accept("secs_list_start"):
v = global_stack.pop()
my.set_attr("list_start", v)
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr("secs_comment", v)
r = secs_msg_item()
if r != None:
my.set_attr("children", [])
while r != None:
my.get_attr("children").append(r)
r = secs_msg_item()
r = expect("secs_list_end")
if r:
v = global_stack.pop()
my.set_attr("list_end", v)
else:
print 'secs_msg_item() complete'
return None
#print str(my)
print 'secs_msg_item() complete'
return my
def secs_msg_body():
print 'secs_msg_body()'
my = node_def.c_secs_msg_body()
my.secs_msg_item = secs_msg_item()
r = accept("secs_comment")
if r:
v = global_stack.pop()
self.secs_comment = v
#print 'secs_comment pop:',global_stack.pop()
#print str(my)
print 'secs_msg_body() complete'
return my
def secs_msg():
print 'secs_msg()'
my = node_def.c_secs_msg()
if accept("secs_end"):
v = global_stack.pop()
my.set_attr("secs_end",v)
#print 'secs_end pop:',v
else:
my.secs_msg_body = secs_msg_body()
r = accept("secs_end")
if r:
v = global_stack.pop()
my.set_attr("secs_end",v)
#print 'secs_end pop:',v
#print str(my)
print 'secs_msg() complete'
return my
def secs_type_msg():
print 'secs_type_msg()'
my = node_def.c_secs_type_msg()
my.set_attr('desc',desc())
r = accept("secs_comment")
if r:
v = global_stack.pop()
my.set_attr('secs_comment',v)
#print 'secs_comment pop:',v
my.set_attr('secs_msg',secs_msg())
#print str(my)
print 'secs_type_msg() complete'
return my
def host_type_msg():
print 'host_type_msg()'
my = node_def.c_host_type_msg()
r = expect("eap_log_1")
if r:
v = global_stack.pop()
my.set_attr('eap_log_1',v)
r = accept("eap_log_1")
while r:
v = global_stack.pop()
if type(my.get_attr('eap_log_1')) == list:
my.get_attr('eap_log_1').append(v)
else:
tmpv = my.get_attr('eap_log_1')
my.set_attr('eap_log_1',[])
my.get_attr('eap_log_1').append(tmpv)
my.get_attr('eap_log_1').append(v)
r = accept("eap_log_1")
#print 'eap_log_1 pop:',global_stack.pop()
r = accept("xml_transaction")
if r:
v = global_stack.pop()
my.set_attr('xml_transaction',v)
#print 'xml_transaction pop:',global_stack.pop()
print 'host_type_msg() complete'
return my
def msg():
print 'msg()'
my = node_def.c_msg()
if expect("timestamp"):
v = global_stack.pop()
my.add_attr('timestamp',v)
#print 'timestamp pop:',v
if sym[0]=="eap_log_1":
my.add_attr('host_type_msg',host_type_msg())
else:
my.add_attr('secs_type_msg',secs_type_msg())
print 'msg() complete'
return my
ms = node_def.c_log()
getsym()
while sym != None:
print 'token count',input_index
a = msg()
## print 'a.timestamp',a.timestamp
## print 'a.host_type_msg',a.host_type_msg
## print 'a.secs_type_msg',a.secs_type_msg
if type(ms.get_attr("msg")) != list:
ms.set_attr("msg",[])
ms.get_attr("msg").append(a)
f = open("v4output.txt","w")
f.write(str(ms))
f.close()
-------------------------
tokens_for_dev.txt #用來測試用的輸入檔
tokens(248743):timestamp,1,True,3302688,62420,2011-06-22-23:59:55
tokens(248745):normal_word,3,True,3302708,62420,AYT_Host
tokens(248746):colon,4,True,3302716,62420,:
tokens(248748):secs_SxFy,5,True,3302718,62420,'S1F1'
tokens(248750):normal_word,3,True,3302725,62420,W
tokens(248752):secs_comment,6,True,3302727,62420,/* Name=AreYouThere_Host Dir=2 Header=[00 00 81 01 00 00 00 01 CA 55] Rcvd=2 Time=23:59:55 TID=31094 */
tokens(248754):secs_end,8,True,3302831,62421,.
tokens(248756):timestamp,1,True,3302833,62422,2011-06-22-23:59:55
tokens(248758):normal_word,3,True,3302853,62422,OLD
tokens(248759):colon,4,True,3302856,62422,:
tokens(248761):secs_SxFy,5,True,3302858,62422,'S1F2'
tokens(248763):secs_comment,6,True,3302865,62422,/* Name=OnlineData Dir=1 Header=[00 00 01 02 00 00 00 01 CA 55] Rcvd=1 Time=23:59:55 TID=31094 */
tokens(248766):secs_list_start,9,True,3302967,62423,<L [2]
tokens(248769):secs_item_a,10,True,3302982,62424,<A [0] >
tokens(248771):secs_comment,6,True,3302991,62424,/* Name=MDLN Keyword=EquipmentModelType */
tokens(248774):secs_item_a,10,True,3303042,62425,<A [0] >
tokens(248776):secs_comment,6,True,3303051,62425,/* Name=SOFTREV Keyword=SoftwareRevision */
tokens(248779):secs_list_end,11,True,3303100,62426,>
tokens(248781):secs_end,8,True,3303102,62427,.
tokens(41):timestamp,1,True,416,8,2011-06-22-07:56:17
tokens(43):eap_log_1,12,True,436,8,Receiving PP_Upload request from TCS...
tokens(45):xml_transaction,13,True,476,9,<?xml version="1.0"?>
<Transaction TxName="PP_Upload" Type="Request" MessageKey="0629">
<Tool ToolID="CLB02" fromOPI="true" Type=""/>
<Recipes>
<Recipe RecipeID="APM_6MIN" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_00" FilePath="" FormatFlag="1" Type="" RecipeLevel="1" Option=""/>
<Recipe RecipeID="Production;MXIC_SC1_360;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_01" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
<Recipe RecipeID="Production;POR QDR-HCL;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_02" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
<Recipe RecipeID="Production;POR-philic;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_03" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
</Recipes>
</Transaction>
tokens(47):timestamp,1,True,1442,20,2011-06-22-07:56:17
tokens(49):eap_log_1,12,True,1462,20,[S7F5] Sent by Host
--------------------------------------------------------------
v4output.txt # 最後的結果給大家參考,有點像是 json。也許有天我會改成 json。
[name:msgs,msg:[[name:msg,timestamp:2011-06-22-23:59:55,host_type_msg:None,secs_type_msg:[name:secs_type_msg,desc:[name:desc,secs_msg_name:[name:secs_msg_name,normal_word:None],colon::,secs_SxFy_sym:[name:secs_SxFy_sym,secs_SxFy:'S1F1',secs_w_bit:[name:secs_w_bit,normal_word:W]],normal_word:None],secs_comment:/* Name=AreYouThere_Host Dir=2 Header=[00 00 81 01 00 00 00 01 CA 55] Rcvd=2 Time=23:59:55 TID=31094 */,secs_msg:[name:secs_msg,secs_msg_body:None,secs_end:.]]],[name:msg,timestamp:2011-06-22-23:59:55,host_type_msg:None,secs_type_msg:[name:secs_type_msg,desc:[name:desc,secs_msg_name:[name:secs_msg_name,normal_word:None],colon::,secs_SxFy_sym:[name:secs_SxFy_sym,secs_SxFy:'S1F2',secs_w_bit:[name:secs_w_bit,normal_word:None]],normal_word:None],secs_comment:/* Name=OnlineData Dir=1 Header=[00 00 01 02 00 00 00 01 CA 55] Rcvd=1 Time=23:59:55 TID=31094 */,secs_msg:[name:secs_msg,secs_msg_body:None,secs_end:.]]],[name:msg,timestamp:2011-06-22-07:56:17,host_type_msg:[name:host_type_msg,eap_log_1:Receiving PP_Upload request from TCS...,xml_transaction:<?xml version="1.0"?>
<Transaction TxName="PP_Upload" Type="Request" MessageKey="0629">
<Tool ToolID="CLB02" fromOPI="true" Type=""/>
<Recipes>
<Recipe RecipeID="APM_6MIN" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_00" FilePath="" FormatFlag="1" Type="" RecipeLevel="1" Option=""/>
<Recipe RecipeID="Production;MXIC_SC1_360;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_01" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
<Recipe RecipeID="Production;POR QDR-HCL;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_02" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
<Recipe RecipeID="Production;POR-philic;1" MachineRecipeID="CLB02.APM_6MIN.AA" LocalFilePath="D:\share\110622_075617_304_0082706463_03" FilePath="" FormatFlag="1" Type="" RecipeLevel="2" Option=""/>
</Recipes>
</Transaction>],secs_type_msg:None],[name:msg,timestamp:2011-06-22-07:56:17,host_type_msg:[name:host_type_msg,eap_log_1:[S7F5] Sent by Host
,xml_transaction:None],secs_type_msg:None]]]
--------------------------------------------------------------
能看到這的人不是正常人,恭喜你。
正常的編譯器接下來進到 code generation。
而我們這次的題目,接下來呢?
應該要上色了。