Найти - Пользователи
Полная версия: PLY: >>> читается как >> и >
Начало » Python для экспертов » PLY: >>> читается как >> и >
1
nerezus
# -*- coding: cp1251 -*-
# HJava Programming Language, 2008 © nerezus


import sys
import ply.lex as lex

# Reserved words
reserved = (
'PUBLIC', 'PRIVATE', 'PROTECTED', 'ABSTRACT', 'FINAL',
'SYNCHRONIZED', 'TRANSIENT', 'NATIVE',
'ASSERT', 'PACKAGE', 'NEW', 'THIS', 'IMPLEMENTS', 'EXTENDS', 'INSTANCEOF',
'IMPORT', 'SUPER','STRICTFP',
'TRY', 'CATCH', 'FINALLY', 'THROWS', 'THROW',

'BREAK', 'CASE', 'CHAR', 'CONST', 'CONTINUE', 'DEFAULT', 'DO', 'DOUBLE',
'ELSE', 'ENUM', 'FLOAT', 'FOR', 'GOTO', 'IF', 'INT', 'LONG',
'RETURN', 'SHORT', 'STATIC', 'SWITCH',
'VOID', 'VOLATILE', 'WHILE',
)

tokens = reserved + (
# Literals (identifier, integer constant, float constant, string constant, char const)
'ID', 'TYPEID', 'ICONST', 'FCONST', 'SCONST', 'CCONST',

# Operators (+,-,*,/,%,|,&,~,^,<<,>>,>>>, ||, &&, !, <, <=, >, >=, ==, !=)
'PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'MOD',
'OR', 'AND', 'NOT', 'XOR', 'LSHIFT', 'RSHIFTRSHIFT','RSHIFT',
'LOR', 'LAND', 'LNOT',
'LT', 'LE', 'GT', 'GE', 'EQ', 'NE',

# Assignment (=, *=, /=, %=, +=, -=, <<=, >>=, >>>=, &=, ^=, |=)
'EQUALS', 'TIMESEQUAL', 'DIVEQUAL', 'MODEQUAL', 'PLUSEQUAL', 'MINUSEQUAL',
'LSHIFTEQUAL', 'RSHIFTRSHIFTEQUAL', 'RSHIFTEQUAL', 'ANDEQUAL', 'XOREQUAL',
'OREQUAL',

# Increment/decrement (++,--)
'PLUSPLUS', 'MINUSMINUS',

# Conditional operator (?)
'CONDOP',

# Delimeters ( ) [ ] { } , . ; :
'LPAREN', 'RPAREN',
'LBRACKET', 'RBRACKET',
'LBRACE', 'RBRACE',
'COMMA', 'PERIOD', 'SEMI', 'COLON'

)

# Completely ignored characters
t_ignore = ' \r\t\x0c'

# Newlines
def t_NEWLINE(t):
r'\n+'
t.lexer.lineno += t.value.count("\n")

# Operators
t_PLUS = r'\+'
t_MINUS = r'-'
t_TIMES = r'\*'
t_DIVIDE = r'/'
t_MOD = r'%'
t_OR = r'\|'
t_AND = r'&'
t_NOT = r'~'
t_XOR = r'\^'
t_LSHIFT = r'<<'
t_RSHIFTRSHIFT = r'>>>'
t_RSHIFT = r'>>'
t_LOR = r'\|\|'
t_LAND = r'&&'
t_LNOT = r'!'
t_LT = r'<'
t_GT = r'>'
t_LE = r'<='
t_GE = r'>='
t_EQ = r'=='
t_NE = r'!='

# Assignment operators

t_EQUALS = r'='
t_TIMESEQUAL = r'\*='
t_DIVEQUAL = r'/='
t_MODEQUAL = r'%='
t_PLUSEQUAL = r'\+='
t_MINUSEQUAL = r'-='
t_LSHIFTEQUAL = r'<<='
t_RSHIFTRSHIFTEQUAL= r'>>>='
t_RSHIFTEQUAL = r'>>='
t_ANDEQUAL = r'&='
t_OREQUAL = r'\|='
t_XOREQUAL = r'^='

# Increment/decrement
t_PLUSPLUS = r'\+\+'
t_MINUSMINUS = r'--'


# ?
t_CONDOP = r'\?'

# Delimeters
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LBRACKET = r'\['
t_RBRACKET = r'\]'
t_LBRACE = r'\{'
t_RBRACE = r'\}'
t_COMMA = r','
t_PERIOD = r'\.'
t_SEMI = r';'
t_COLON = r':'

# Identifiers and reserved words

reserved_map = { }
for r in reserved:
reserved_map[r.lower()] = r

def t_ID(t):
r'[A-Za-z_][\w_]*'
t.type = reserved_map.get(t.value,"ID")
return t

# Integer literal
t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'

# Floating literal
t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'

# String literal
t_SCONST = r'\"([^\\\n]|(\\.))*?\"'

# Character constant 'c' or L'c'
t_CCONST = r'(L)?\'([^\\\n]|(\\.))*?\''

# Comments
def t_comment(t):
r'/\*(.|\n)*?\*/'
t.lexer.lineno += t.value.count('\n')

# Preprocessor directive (ignored)
def t_preprocessor(t):
r'\#(.)*?\n'
t.lexer.lineno += 1

def t_error(t):
print "Illegal character %s" % repr(t.value[0])
t.lexer.skip(1)

input = """public class Test {

/**
* @param args
*/
public static void main(String[] args) {
int a = -8;
int b = a >>> 1;
System.out.println(b);

}

}"""

lexer = lex.lex(optimize = 1)
lexer.input(input)
while 1:
tok = lexer.token()
if not tok: break
print " " + str(tok)
Почему так?
shiza
почему что?
nerezus
>>> читается как >> и >
shiza
Запустил у себя:
….
LexToken(RSHIFTRSHIFT,'>>>',8,131)
….
Или я опять не понял вопроса =)
nerezus
LexToken(RSHIFT,'>>',8,131)
LexToken(GT,'>',8,133)

У меня так.
Какая версия библиотеки?
shiza
WinXP, python 2.5.2, ply 2.5.
nerezus
аналогично(ток питон 2.5.1)
жесть)

что делать? кто юзал antlr?
shiza
интересно, а что он у тебя будет делать, если убрать из описания эти токены? (>> и >)
This is a "lo-fi" version of our main content. To view the full version with more information, formatting and images, please click here.
Powered by DjangoBB