Fix grammar for multiple entry points for compile mode

ncw · ncw · commit 0adc14aa2ede · 2014-11-10T18:03:36.000Z
diff --git a/ast/ast.go b/ast/ast.go
@@ -1,5 +1,8 @@
 package ast
 
+// FIXME make base AstNode with position in
+// also keep a list of children in the parent node to simplify walking the tree?
+
 import (
 	"fmt"
 
diff --git a/parser/grammar.y b/parser/grammar.y
@@ -12,21 +12,21 @@ import (
 %}
 
 %union{
-	str string
-	obj py.Object
-	ast ast.Ast
-	mod ast.Mod
-	stmt ast.Stmt
-	stmts []ast.Stmt
-	stmts1 []ast.Stmt // nl_or_stmt accumulator
-	stmts2 []ast.Stmt // small_stmts accumulator
-	stmts3 []ast.Stmt // stmts accumulator
-	pos ast.Pos // kept up to date by the lexer
+	str	string
+	obj	py.Object
+	ast	ast.Ast
+	mod	ast.Mod
+	stmt	ast.Stmt
+	stmts	[]ast.Stmt
+	stmts1	[]ast.Stmt	// nl_or_stmt accumulator
+	stmts2	[]ast.Stmt	// small_stmts accumulator
+	stmts3	[]ast.Stmt	// stmts accumulator
+	pos	ast.Pos		// kept up to date by the lexer
 }
 
 %type <str> strings
 %type <ast> atom
-%type <mod> inputs file_input
+%type <mod> inputs file_input single_input eval_input
 %type <stmts> simple_stmt stmt 
 %type <stmts1> nl_or_stmt 
 %type <stmts2> small_stmts
@@ -101,6 +101,8 @@ import (
 
 %token '(' ')' '[' ']' ':' ',' ';' '+' '-' '*' '/' '|' '&' '<' '>' '=' '.' '%' '{' '}' '^' '~' '@'
 
+%token SINGLE_INPUT FILE_INPUT EVAL_INPUT
+
 // Note: Changing the grammar specified in this file will most likely
 // require corresponding changes in the parser module
 // (../Modules/parsermodule.c). If you can't make the changes to
@@ -119,18 +121,45 @@ import (
 
 %%
 
-// FIXME figure out how to tell the parser to start from a given node
-// inputs: single_input | file_input | eval_input
-// In the mean time just do file_input
-// inputs: single_input | file_input | eval_input
+// Start of grammar. This has 3 pseudo tokens which say which
+// direction through the rest of the grammar we take.
 inputs:
-	file_input
+	SINGLE_INPUT single_input
+	{
+		yylex.(*yyLex).mod = $2
+		return 0
+	}
+|	FILE_INPUT file_input
+	{
+		yylex.(*yyLex).mod = $2
+		return 0
+	}
+|	EVAL_INPUT eval_input
 	{
-		yylex.(*yyLex).mod = $1
+		yylex.(*yyLex).mod = $2
 		return 0
 	}
 
-single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
+single_input:
+	NEWLINE
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}}
+	}
+|	simple_stmt
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: $1}
+	}
+|	compound_stmt NEWLINE
+	{
+		$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: []ast.Stmt{$1}}
+	}
+
+//file_input: (NEWLINE | stmt)* ENDMARKER
+file_input:
+	nl_or_stmt ENDMARKER
+	{
+		$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
+	}
 
 // (NEWLINE | stmt)*
 nl_or_stmt:
@@ -145,19 +174,15 @@ nl_or_stmt:
 		$$ = append($$, $2...)
 	}
 
-//file_input: (NEWLINE | stmt)* ENDMARKER
-file_input:
-	nl_or_stmt ENDMARKER
+//eval_input: testlist NEWLINE* ENDMARKER
+eval_input:
+	testlist nls ENDMARKER
 	{
-		$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
 	}
 
 // NEWLINE*
 nls: | nls NEWLINE
 
-//eval_input: testlist NEWLINE* ENDMARKER
-eval_input: testlist nls ENDMARKER
-
 optional_arglist: | arglist
 
 optional_arglist_call: | '(' optional_arglist ')'
diff --git a/parser/lexer.go b/parser/lexer.go
@@ -46,15 +46,33 @@ type yyLex struct{
 	parenthesis int // number of open ( )
 	brace int // number of open{}
 	mod ast.Mod // output
+	startToken int // initial token to output
 }
 
-func NewLex(r io.Reader) *yyLex{
+// Create a new lexer
+//
+// The mode argument specifies what kind of code must be compiled; it
+// can be 'exec' if source consists of a sequence of statements,
+// 'eval' if it consists of a single expression, or 'single' if it
+// consists of a single interactive statement
+func NewLex(r io.Reader, mode string) (*yyLex, error){
 	x := &yyLex{
 		reader: bufio.NewReader(r),
 		indentStack: []int{0},
 		state: readString,
 	}
-	return x
+	switch mode{
+	case "exec":
+		x.startToken = FILE_INPUT
+	case "eval":
+		x.startToken = EVAL_INPUT
+	case "single":
+		x.startToken = SINGLE_INPUT
+		x.interactive = true
+	default:
+		return nil, py.ExceptionNewf(py.ValueError, "compile mode must be 'exec', 'eval' or 'single'")
+	}
+	return x, nil
 }
 
 // Refill line
@@ -222,6 +240,9 @@ func init(){
 	tokenToString[DEDENT] = "DEDENT"
 	tokenToString[STRING] = "STRING"
 	tokenToString[NUMBER] = "NUMBER"
+	tokenToString[FILE_INPUT] = "FILE_INPUT"
+	tokenToString[SINGLE_INPUT] = "SINGLE_INPUT"
+	tokenToString[EVAL_INPUT] = "EVAL_INPUT"
 }
 
 // True if there are any open brackets
@@ -309,6 +330,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int){
 		defer func(){fmt.Printf("LEX> %v\n", newLexToken(ret, yylval)) }()
 	}
 
+	// Return initial token
+	if x.startToken != eof{
+		token := x.startToken
+		x.startToken = eof
+		return token
+	}
+
 	// FIXME keep x.pos up to date
 	x.pos.Lineno = 42
 	x.pos.ColOffset = 43
@@ -318,13 +346,14 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int){
 			// Read x.line
 			x.refill()
 			x.state++
-			// an empty line while reading interactive input should return a NEWLINE
-			if x.interactive && (x.line == "" || x.line == "\n"){
+			if x.line == "" && x.eof{
+				x.state = checkEof
+				// an empty line while reading interactive input should return a NEWLINE
 				// Don't output NEWLINE if brackets are open
-				if x.openBrackets(){
-					continue
+				if x.interactive && !x.openBrackets(){
+					return NEWLINE
 				}
-				return NEWLINE
+				continue
 			}
 		case readIndent:
 			// Read the initial indent and get rid of it
@@ -799,20 +828,26 @@ func SetDebug(level int){
 }
 
 // Parse a file
-func Parse(in io.Reader) (ast.Mod, error){
-	lex := NewLex(in)
+func Parse(in io.Reader, mode string) (ast.Mod, error){
+	lex, err := NewLex(in, mode)
+	if err != nil{
+		return nil, err
+	}
 	yyParse(lex)
 	return lex.mod, lex.ErrorReturn()
 }
 
 // Parse a string
-func ParseString(in string) (ast.Ast, error){
-	return Parse(bytes.NewBufferString(in))
+func ParseString(in string, mode string) (ast.Ast, error){
+	return Parse(bytes.NewBufferString(in), mode)
 }
 
 // Lex a file only, returning a sequence of tokens
-func Lex(in io.Reader) (lts LexTokens, err error){
-	lex := NewLex(in)
+func Lex(in io.Reader, mode string) (lts LexTokens, err error){
+	lex, err := NewLex(in, mode)
+	if err != nil{
+		return nil, err
+	}
 	yylval := yySymType{}
 	for{
 		ret := lex.Lex(&yylval)
@@ -827,6 +862,6 @@ func Lex(in io.Reader) (lts LexTokens, err error){
 }
 
 // Lex a string
-func LexString(in string) (lts LexTokens, err error){
-	return Lex(bytes.NewBufferString(in))
+func LexString(in string, mode string) (lts LexTokens, err error){
+	return Lex(bytes.NewBufferString(in), mode)
 }
diff --git a/parser/lexer_test.go b/parser/lexer_test.go

-Original file line number
+Diff line change
 %}
 %union{
 - strstring
 - objpy.Object
 - astast.Ast
 - modast.Mod
 - stmtast.Stmt
 - stmts[]ast.Stmt
 - stmts1[]ast.Stmt// nl_or_stmt accumulator
 - stmts2[]ast.Stmt// small_stmts accumulator
 - stmts3[]ast.Stmt// stmts accumulator
 - posast.Pos// kept up to date by the lexer
 + strstring
 + objpy.Object
 + astast.Ast
 + modast.Mod
 + stmtast.Stmt
 + stmts[]ast.Stmt
 + stmts1[]ast.Stmt// nl_or_stmt accumulator
 + stmts2[]ast.Stmt// small_stmts accumulator
 + stmts3[]ast.Stmt// stmts accumulator
 + posast.Pos// kept up to date by the lexer
+}
 %type<str>strings
 %type<ast>atom
 -%type<mod>inputsfile_input
 +%type<mod>inputsfile_inputsingle_inputeval_input
 %type<stmts>simple_stmtstmt
 %type<stmts1>nl_or_stmt
 %type<stmts2>small_stmts
 %token'('')''['']'':'','';''+''-''*''/''|''&''<''>''=''.''%''{''}''^''~''@'
 +%tokenSINGLE_INPUTFILE_INPUTEVAL_INPUT
++
 // Note: Changing the grammar specified in this file will most likely
 // require corresponding changes in the parser module
 // (../Modules/parsermodule.c). If you can't make the changes to
 %%
 -// FIXME figure out how to tell the parser to start from a given node
 -// inputs: single_input | file_input | eval_input
 -// In the mean time just do file_input
 -// inputs: single_input | file_input | eval_input
 +// Start of grammar. This has 3 pseudo tokens which say which
 +// direction through the rest of the grammar we take.
 inputs:
 -file_input
 +SINGLE_INPUTsingle_input
 +{
 + yylex.(*yyLex).mod = $2
 +return0
 + }
 +|FILE_INPUTfile_input
 +{
 + yylex.(*yyLex).mod = $2
 +return0
 + }
 +|EVAL_INPUTeval_input
+{
 - yylex.(*yyLex).mod = $1
 + yylex.(*yyLex).mod = $2
 return0
+ }
 -single_input: NEWLINE|simple_stmt|compound_stmtNEWLINE
 +single_input:
 +NEWLINE
 +{
 +$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}}
 + }
 +|simple_stmt
 +{
 +$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: $1}
 + }
 +|compound_stmtNEWLINE
 +{
 +$$ = &ast.Interactive{ModBase: ast.ModBase{$<pos>$}, Body: []ast.Stmt{$1}}
 + }
++
 +//file_input: (NEWLINE|stmt)* ENDMARKER
 +file_input:
 +nl_or_stmtENDMARKER
 +{
 +$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
 + }
 // (NEWLINE|stmt)*
 nl_or_stmt:
 $$ = append($$, $2...)
+ }
 -//file_input: (NEWLINE|stmt)* ENDMARKER
 -file_input:
 -nl_or_stmtENDMARKER
 +//eval_input: testlistNEWLINE* ENDMARKER
 +eval_input:
 +testlistnlsENDMARKER
+{
 -$$ = &ast.Module{ModBase: ast.ModBase{$<pos>$}, Body: $1}
+ }
 // NEWLINE*
 nls: |nlsNEWLINE
 -//eval_input: testlistNEWLINE* ENDMARKER
 -eval_input: testlistnlsENDMARKER
+-
 optional_arglist: |arglist
 optional_arglist_call: |'('optional_arglist')'
-Original file line number
+Diff line change
 parenthesisint// number of open ( )
 braceint// number of open{}
 mod ast.Mod// output
 +startTokenint// initial token to output
+}
 -funcNewLex(r io.Reader) *yyLex{
 +// Create a new lexer
 +//
 +// The mode argument specifies what kind of code must be compiled; it
 +// can be 'exec' if source consists of a sequence of statements,
 +// 'eval' if it consists of a single expression, or 'single' if it
 +// consists of a single interactive statement
 +funcNewLex(r io.Reader, modestring) (*yyLex, error){
 x:=&yyLex{
 reader: bufio.NewReader(r),
 indentStack: []int{0},
 state: readString,
+ }
 -returnx
 +switchmode{
 +case"exec":
 +x.startToken=FILE_INPUT
 +case"eval":
 +x.startToken=EVAL_INPUT
 +case"single":
 +x.startToken=SINGLE_INPUT
 +x.interactive=true
 +default:
 +returnnil, py.ExceptionNewf(py.ValueError, "compile mode must be 'exec', 'eval' or 'single'")
 + }
 +returnx, nil
+}
 // Refill line
 tokenToString[DEDENT] ="DEDENT"
 tokenToString[STRING] ="STRING"
 tokenToString[NUMBER] ="NUMBER"
 +tokenToString[FILE_INPUT] ="FILE_INPUT"
 +tokenToString[SINGLE_INPUT] ="SINGLE_INPUT"
 +tokenToString[EVAL_INPUT] ="EVAL_INPUT"
+}
 // True if there are any open brackets
 deferfunc(){fmt.Printf("LEX> %v\n", newLexToken(ret, yylval)) }()
+ }
 +// Return initial token
 +ifx.startToken!=eof{
 +token:=x.startToken
 +x.startToken=eof
 +returntoken
 + }
++
 // FIXME keep x.pos up to date
 x.pos.Lineno=42
 x.pos.ColOffset=43
 // Read x.line
 x.refill()
 x.state++
 -// an empty line while reading interactive input should return a NEWLINE
 -ifx.interactive&& (x.line==""||x.line=="\n"){
 +ifx.line==""&&x.eof{
 +x.state=checkEof
 +// an empty line while reading interactive input should return a NEWLINE
 // Don't output NEWLINE if brackets are open
 -ifx.openBrackets(){
 -continue
 +ifx.interactive&&!x.openBrackets(){
 +returnNEWLINE
+ }
 -returnNEWLINE
 +continue
+ }
 casereadIndent:
 // Read the initial indent and get rid of it
+}
 // Parse a file
 -funcParse(in io.Reader) (ast.Mod, error){
 -lex:=NewLex(in)
 +funcParse(in io.Reader, modestring) (ast.Mod, error){
 +lex, err:=NewLex(in, mode)
 +iferr!=nil{
 +returnnil, err
 + }
 yyParse(lex)
 returnlex.mod, lex.ErrorReturn()
+}
 // Parse a string
 -funcParseString(instring) (ast.Ast, error){
 -returnParse(bytes.NewBufferString(in))
 +funcParseString(instring, modestring) (ast.Ast, error){
 +returnParse(bytes.NewBufferString(in), mode)
+}
 // Lex a file only, returning a sequence of tokens
 -funcLex(in io.Reader) (ltsLexTokens, errerror){
 -lex:=NewLex(in)
 +funcLex(in io.Reader, modestring) (ltsLexTokens, errerror){
 +lex, err:=NewLex(in, mode)
 +iferr!=nil{
 +returnnil, err
 + }
 yylval:=yySymType{}
 for{
 ret:=lex.Lex(&yylval)
+}
 // Lex a string
 -funcLexString(instring) (ltsLexTokens, errerror){
 -returnLex(bytes.NewBufferString(in))
 +funcLexString(instring, modestring) (ltsLexTokens, errerror){
 +returnLex(bytes.NewBufferString(in), mode)
+}