Mercurial > hg > Members > shinya > pyrect
view pyrect/regexp/lexer.py @ 53:1f8c474ca8b3
bug fix: modify escape character parsing rule.
author | Ryoma SHINYA <shinya@firefly.cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 25 Aug 2010 20:50:52 +0900 |
parents | abb0691e792a |
children | 5db856953793 |
line wrap: on
line source
#!/usr/bin/env python #-*- encoding: utf-8 -*- from ply import lex tokens = ( 'UNION', 'STAR', 'LPAREN', 'RPAREN', 'LBRACKET', 'RBRACKET', 'CARET', 'DOLLAR', 'NORMALCHAR', 'MBCHAR', 'DASH', 'ANYCHAR', 'ESCAPECHAR', 'PLUS', 'QMARK' ) def t_ESCAPECHAR(t): ur'\\[ -~]' t.value = t.value[-1] return t def t_UNION(t): ur'\|' return t def t_STAR(t): ur'\*' return t def t_QMARK(t): ur'\?' return t def t_PLUS(t): ur'\+' return t def t_LPAREN(t): ur'\(' return t def t_RPAREN(t): ur'\)' return t def t_DASH(t): ur'-' return t def t_CARET(t): ur'\^' return t def t_DOLLAR(t): ur'\$' return t def t_RBRACKET(t): ur'\[' return t def t_LBRACKET(t): ur'\]' return t def t_ANYCHAR(t): ur'\.' return t def t_NORMALCHAR(t): ur'[ -~]' # match ascii t.value return t def t_MBCHAR(t): # match multi byte code. -> see http://ja.wikipedia.org/wiki/UTF-8 u'(\xc2-\xdf].|[\xe0-\xef]..|[\xe0-\xef]..|[\xf0-\xf7]...|[\xf8-\xfb]....|[\xfc-\xfd].....)' return t def t_error(t): print "Illegal character '%s'" % t.value[0] raise t lex.lex()