mirror of https://github.com/ethereum/go-ethereum
cmd/evm, core/asm: add EVM assembler (#3686)
The evm compile command implements a simple assembly language that compiles to EVM bytecode.pull/3729/head
parent
7ff75ac2f2
commit
230cf2ec91
@ -0,0 +1,55 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of go-ethereum.
|
||||
//
|
||||
// go-ethereum is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// go-ethereum is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"io/ioutil" |
||||
|
||||
"github.com/ethereum/go-ethereum/cmd/evm/internal/compiler" |
||||
|
||||
cli "gopkg.in/urfave/cli.v1" |
||||
) |
||||
|
||||
var compileCommand = cli.Command{ |
||||
Action: compileCmd, |
||||
Name: "compile", |
||||
Usage: "compiles easm source to evm binary", |
||||
ArgsUsage: "<file>", |
||||
} |
||||
|
||||
func compileCmd(ctx *cli.Context) error { |
||||
debug := ctx.GlobalBool(DebugFlag.Name) |
||||
|
||||
if len(ctx.Args().First()) == 0 { |
||||
return errors.New("filename required") |
||||
} |
||||
|
||||
fn := ctx.Args().First() |
||||
src, err := ioutil.ReadFile(fn) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
|
||||
bin, err := compiler.Compile(fn, src, debug) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
fmt.Println(bin) |
||||
return nil |
||||
} |
@ -0,0 +1,39 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of go-ethereum.
|
||||
//
|
||||
// go-ethereum is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// go-ethereum is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package compiler |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
|
||||
"github.com/ethereum/go-ethereum/core/asm" |
||||
) |
||||
|
||||
func Compile(fn string, src []byte, debug bool) (string, error) { |
||||
compiler := asm.NewCompiler(debug) |
||||
compiler.Feed(asm.Lex(fn, src, debug)) |
||||
|
||||
bin, compileErrors := compiler.Compile() |
||||
if len(compileErrors) > 0 { |
||||
// report errors
|
||||
for _, err := range compileErrors { |
||||
fmt.Printf("%s:%v\n", fn, err) |
||||
} |
||||
return "", errors.New("compiling failed") |
||||
} |
||||
return bin, nil |
||||
} |
@ -0,0 +1,145 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of go-ethereum.
|
||||
//
|
||||
// go-ethereum is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// go-ethereum is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with go-ethereum. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package main |
||||
|
||||
import ( |
||||
"bytes" |
||||
"fmt" |
||||
"io/ioutil" |
||||
"os" |
||||
"time" |
||||
|
||||
goruntime "runtime" |
||||
|
||||
"github.com/ethereum/go-ethereum/cmd/utils" |
||||
"github.com/ethereum/go-ethereum/common" |
||||
"github.com/ethereum/go-ethereum/core/state" |
||||
"github.com/ethereum/go-ethereum/core/vm" |
||||
"github.com/ethereum/go-ethereum/core/vm/runtime" |
||||
"github.com/ethereum/go-ethereum/ethdb" |
||||
"github.com/ethereum/go-ethereum/log" |
||||
cli "gopkg.in/urfave/cli.v1" |
||||
) |
||||
|
||||
var runCommand = cli.Command{ |
||||
Action: runCmd, |
||||
Name: "run", |
||||
Usage: "run arbitrary evm binary", |
||||
ArgsUsage: "<code>", |
||||
Description: `The run command runs arbitrary EVM code.`, |
||||
} |
||||
|
||||
func runCmd(ctx *cli.Context) error { |
||||
glogger := log.NewGlogHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(false))) |
||||
glogger.Verbosity(log.Lvl(ctx.GlobalInt(VerbosityFlag.Name))) |
||||
log.Root().SetHandler(glogger) |
||||
|
||||
var ( |
||||
db, _ = ethdb.NewMemDatabase() |
||||
statedb, _ = state.New(common.Hash{}, db) |
||||
sender = common.StringToAddress("sender") |
||||
logger = vm.NewStructLogger(nil) |
||||
tstart = time.Now() |
||||
) |
||||
statedb.CreateAccount(sender) |
||||
|
||||
var ( |
||||
code []byte |
||||
ret []byte |
||||
err error |
||||
) |
||||
if ctx.GlobalString(CodeFlag.Name) != "" { |
||||
code = common.Hex2Bytes(ctx.GlobalString(CodeFlag.Name)) |
||||
} else { |
||||
var hexcode []byte |
||||
if ctx.GlobalString(CodeFileFlag.Name) != "" { |
||||
var err error |
||||
hexcode, err = ioutil.ReadFile(ctx.GlobalString(CodeFileFlag.Name)) |
||||
if err != nil { |
||||
fmt.Printf("Could not load code from file: %v\n", err) |
||||
os.Exit(1) |
||||
} |
||||
} else { |
||||
var err error |
||||
hexcode, err = ioutil.ReadAll(os.Stdin) |
||||
if err != nil { |
||||
fmt.Printf("Could not load code from stdin: %v\n", err) |
||||
os.Exit(1) |
||||
} |
||||
} |
||||
code = common.Hex2Bytes(string(bytes.TrimRight(hexcode, "\n"))) |
||||
} |
||||
|
||||
if ctx.GlobalBool(CreateFlag.Name) { |
||||
input := append(code, common.Hex2Bytes(ctx.GlobalString(InputFlag.Name))...) |
||||
ret, _, err = runtime.Create(input, &runtime.Config{ |
||||
Origin: sender, |
||||
State: statedb, |
||||
GasLimit: ctx.GlobalUint64(GasFlag.Name), |
||||
GasPrice: utils.GlobalBig(ctx, PriceFlag.Name), |
||||
Value: utils.GlobalBig(ctx, ValueFlag.Name), |
||||
EVMConfig: vm.Config{ |
||||
Tracer: logger, |
||||
Debug: ctx.GlobalBool(DebugFlag.Name), |
||||
DisableGasMetering: ctx.GlobalBool(DisableGasMeteringFlag.Name), |
||||
}, |
||||
}) |
||||
} else { |
||||
receiver := common.StringToAddress("receiver") |
||||
statedb.SetCode(receiver, code) |
||||
|
||||
ret, err = runtime.Call(receiver, common.Hex2Bytes(ctx.GlobalString(InputFlag.Name)), &runtime.Config{ |
||||
Origin: sender, |
||||
State: statedb, |
||||
GasLimit: ctx.GlobalUint64(GasFlag.Name), |
||||
GasPrice: utils.GlobalBig(ctx, PriceFlag.Name), |
||||
Value: utils.GlobalBig(ctx, ValueFlag.Name), |
||||
EVMConfig: vm.Config{ |
||||
Tracer: logger, |
||||
Debug: ctx.GlobalBool(DebugFlag.Name), |
||||
DisableGasMetering: ctx.GlobalBool(DisableGasMeteringFlag.Name), |
||||
}, |
||||
}) |
||||
} |
||||
vmdone := time.Since(tstart) |
||||
|
||||
if ctx.GlobalBool(DumpFlag.Name) { |
||||
statedb.Commit(true) |
||||
fmt.Println(string(statedb.Dump())) |
||||
} |
||||
vm.StdErrFormat(logger.StructLogs()) |
||||
|
||||
if ctx.GlobalBool(SysStatFlag.Name) { |
||||
var mem goruntime.MemStats |
||||
goruntime.ReadMemStats(&mem) |
||||
fmt.Printf("vm took %v\n", vmdone) |
||||
fmt.Printf(`alloc: %d |
||||
tot alloc: %d |
||||
no. malloc: %d |
||||
heap alloc: %d |
||||
heap objs: %d |
||||
num gc: %d |
||||
`, mem.Alloc, mem.TotalAlloc, mem.Mallocs, mem.HeapAlloc, mem.HeapObjects, mem.NumGC) |
||||
} |
||||
|
||||
fmt.Printf("OUT: 0x%x", ret) |
||||
if err != nil { |
||||
fmt.Printf(" error: %v", err) |
||||
} |
||||
fmt.Println() |
||||
return nil |
||||
} |
@ -0,0 +1,281 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"errors" |
||||
"fmt" |
||||
"math/big" |
||||
"os" |
||||
"strings" |
||||
|
||||
"github.com/ethereum/go-ethereum/common/math" |
||||
"github.com/ethereum/go-ethereum/core/vm" |
||||
) |
||||
|
||||
// Compiler contains information about the parsed source
|
||||
// and holds the tokens for the program.
|
||||
type Compiler struct { |
||||
tokens []token |
||||
binary []interface{} |
||||
|
||||
labels map[string]int |
||||
|
||||
pc, pos int |
||||
|
||||
debug bool |
||||
} |
||||
|
||||
// newCompiler returns a new allocated compiler.
|
||||
func NewCompiler(debug bool) *Compiler { |
||||
return &Compiler{ |
||||
labels: make(map[string]int), |
||||
debug: debug, |
||||
} |
||||
} |
||||
|
||||
// Feed feeds tokens in to ch and are interpreted by
|
||||
// the compiler.
|
||||
//
|
||||
// feed is the first pass in the compile stage as it
|
||||
// collect the used labels in the program and keeps a
|
||||
// program counter which is used to determine the locations
|
||||
// of the jump dests. The labels can than be used in the
|
||||
// second stage to push labels and determine the right
|
||||
// position.
|
||||
func (c *Compiler) Feed(ch <-chan token) { |
||||
for i := range ch { |
||||
switch i.typ { |
||||
case number: |
||||
num := math.MustParseBig256(i.text).Bytes() |
||||
if len(num) == 0 { |
||||
num = []byte{0} |
||||
} |
||||
c.pc += len(num) |
||||
case stringValue: |
||||
c.pc += len(i.text) - 2 |
||||
case element: |
||||
c.pc++ |
||||
case labelDef: |
||||
c.labels[i.text] = c.pc |
||||
c.pc++ |
||||
case label: |
||||
c.pc += 5 |
||||
} |
||||
|
||||
c.tokens = append(c.tokens, i) |
||||
} |
||||
if c.debug { |
||||
fmt.Fprintln(os.Stderr, "found", len(c.labels), "labels") |
||||
} |
||||
} |
||||
|
||||
// Compile compiles the current tokens and returns a
|
||||
// binary string that can be interpreted by the EVM
|
||||
// and an error if it failed.
|
||||
//
|
||||
// compile is the second stage in the compile phase
|
||||
// which compiles the tokens to EVM instructions.
|
||||
func (c *Compiler) Compile() (string, []error) { |
||||
var errors []error |
||||
// continue looping over the tokens until
|
||||
// the stack has been exhausted.
|
||||
for c.pos < len(c.tokens) { |
||||
if err := c.compileLine(); err != nil { |
||||
errors = append(errors, err) |
||||
} |
||||
} |
||||
|
||||
// turn the binary to hex
|
||||
var bin string |
||||
for _, v := range c.binary { |
||||
switch v := v.(type) { |
||||
case vm.OpCode: |
||||
bin += fmt.Sprintf("%x", []byte{byte(v)}) |
||||
case []byte: |
||||
bin += fmt.Sprintf("%x", v) |
||||
} |
||||
} |
||||
return bin, errors |
||||
} |
||||
|
||||
// next returns the next token and increments the
|
||||
// posititon.
|
||||
func (c *Compiler) next() token { |
||||
token := c.tokens[c.pos] |
||||
c.pos++ |
||||
return token |
||||
} |
||||
|
||||
// compile line compiles a single line instruction e.g.
|
||||
// "push 1", "jump @labal".
|
||||
func (c *Compiler) compileLine() error { |
||||
n := c.next() |
||||
if n.typ != lineStart { |
||||
return compileErr(n, n.typ.String(), lineStart.String()) |
||||
} |
||||
|
||||
lvalue := c.next() |
||||
switch lvalue.typ { |
||||
case eof: |
||||
return nil |
||||
case element: |
||||
if err := c.compileElement(lvalue); err != nil { |
||||
return err |
||||
} |
||||
case labelDef: |
||||
c.compileLabel() |
||||
case lineEnd: |
||||
return nil |
||||
default: |
||||
return compileErr(lvalue, lvalue.text, fmt.Sprintf("%v or %v", labelDef, element)) |
||||
} |
||||
|
||||
if n := c.next(); n.typ != lineEnd { |
||||
return compileErr(n, n.text, lineEnd.String()) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// compileNumber compiles the number to bytes
|
||||
func (c *Compiler) compileNumber(element token) (int, error) { |
||||
num := math.MustParseBig256(element.text).Bytes() |
||||
if len(num) == 0 { |
||||
num = []byte{0} |
||||
} |
||||
c.pushBin(num) |
||||
return len(num), nil |
||||
} |
||||
|
||||
// compileElement compiles the element (push & label or both)
|
||||
// to a binary representation and may error if incorrect statements
|
||||
// where fed.
|
||||
func (c *Compiler) compileElement(element token) error { |
||||
// check for a jump. jumps must be read and compiled
|
||||
// from right to left.
|
||||
if isJump(element.text) { |
||||
rvalue := c.next() |
||||
switch rvalue.typ { |
||||
case number: |
||||
// TODO figure out how to return the error properly
|
||||
c.compileNumber(rvalue) |
||||
case stringValue: |
||||
// strings are quoted, remove them.
|
||||
c.pushBin(rvalue.text[1 : len(rvalue.text)-2]) |
||||
case label: |
||||
c.pushBin(vm.PUSH4) |
||||
pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes() |
||||
pos = append(make([]byte, 4-len(pos)), pos...) |
||||
c.pushBin(pos) |
||||
default: |
||||
return compileErr(rvalue, rvalue.text, "number, string or label") |
||||
} |
||||
// push the operation
|
||||
c.pushBin(toBinary(element.text)) |
||||
return nil |
||||
} else if isPush(element.text) { |
||||
// handle pushes. pushes are read from left to right.
|
||||
var value []byte |
||||
|
||||
rvalue := c.next() |
||||
switch rvalue.typ { |
||||
case number: |
||||
value = math.MustParseBig256(rvalue.text).Bytes() |
||||
if len(value) == 0 { |
||||
value = []byte{0} |
||||
} |
||||
case stringValue: |
||||
value = []byte(rvalue.text[1 : len(rvalue.text)-1]) |
||||
case label: |
||||
value = make([]byte, 4) |
||||
copy(value, big.NewInt(int64(c.labels[rvalue.text])).Bytes()) |
||||
default: |
||||
return compileErr(rvalue, rvalue.text, "number, string or label") |
||||
} |
||||
|
||||
if len(value) > 32 { |
||||
return fmt.Errorf("%d type error: unsupported string or number with size > 32", rvalue.lineno) |
||||
} |
||||
|
||||
c.pushBin(vm.OpCode(int(vm.PUSH1) - 1 + len(value))) |
||||
c.pushBin(value) |
||||
} else { |
||||
c.pushBin(toBinary(element.text)) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// compileLabel pushes a jumpdest to the binary slice.
|
||||
func (c *Compiler) compileLabel() { |
||||
c.pushBin(vm.JUMPDEST) |
||||
} |
||||
|
||||
// pushBin pushes the value v to the binary stack.
|
||||
func (c *Compiler) pushBin(v interface{}) { |
||||
if c.debug { |
||||
fmt.Printf("%d: %v\n", len(c.binary), v) |
||||
} |
||||
c.binary = append(c.binary, v) |
||||
} |
||||
|
||||
// isPush returns whether the string op is either any of
|
||||
// push(N).
|
||||
func isPush(op string) bool { |
||||
if op == "push" { |
||||
return true |
||||
} |
||||
return false |
||||
} |
||||
|
||||
// isJump returns whether the string op is jump(i)
|
||||
func isJump(op string) bool { |
||||
return op == "jumpi" || op == "jump" |
||||
} |
||||
|
||||
// toBinary converts text to a vm.OpCode
|
||||
func toBinary(text string) vm.OpCode { |
||||
if isPush(text) { |
||||
text = "push1" |
||||
} |
||||
return vm.StringToOp(strings.ToUpper(text)) |
||||
} |
||||
|
||||
type compileError struct { |
||||
got string |
||||
want string |
||||
|
||||
lineno int |
||||
} |
||||
|
||||
func (err compileError) Error() string { |
||||
return fmt.Sprintf("%d syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want) |
||||
} |
||||
|
||||
var ( |
||||
errExpBol = errors.New("expected beginning of line") |
||||
errExpElementOrLabel = errors.New("expected beginning of line") |
||||
) |
||||
|
||||
func compileErr(c token, got, want string) error { |
||||
return compileError{ |
||||
got: got, |
||||
want: want, |
||||
lineno: c.lineno, |
||||
} |
||||
} |
@ -0,0 +1,36 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import "testing" |
||||
|
||||
func lexAll(src string) []token { |
||||
ch := Lex("test.asm", []byte(src), false) |
||||
|
||||
var tokens []token |
||||
for i := range ch { |
||||
tokens = append(tokens, i) |
||||
} |
||||
return tokens |
||||
} |
||||
|
||||
func TestComment(t *testing.T) { |
||||
tokens := lexAll(";; this is a comment") |
||||
if len(tokens) != 2 { // {new line, EOF}
|
||||
t.Error("expected no tokens") |
||||
} |
||||
} |
@ -0,0 +1,291 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"fmt" |
||||
"os" |
||||
"strings" |
||||
"unicode" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
// stateFn is used through the lifetime of the
|
||||
// lexer to parse the different values at the
|
||||
// current state.
|
||||
type stateFn func(*lexer) stateFn |
||||
|
||||
// token is emitted when the lexer has discovered
|
||||
// a new parsable token. These are delivered over
|
||||
// the tokens channels of the lexer
|
||||
type token struct { |
||||
typ tokenType |
||||
lineno int |
||||
text string |
||||
} |
||||
|
||||
// tokenType are the different types the lexer
|
||||
// is able to parse and return.
|
||||
type tokenType int |
||||
|
||||
const ( |
||||
eof tokenType = iota // end of file
|
||||
lineStart // emitted when a line starts
|
||||
lineEnd // emitted when a line ends
|
||||
invalidStatement // any invalid statement
|
||||
element // any element during element parsing
|
||||
label // label is emitted when a labal is found
|
||||
labelDef // label definition is emitted when a new label is found
|
||||
number // number is emitted when a number is found
|
||||
stringValue // stringValue is emitted when a string has been found
|
||||
|
||||
Numbers = "1234567890" // characters representing any decimal number
|
||||
HexadecimalNumbers = Numbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
|
||||
Alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
|
||||
) |
||||
|
||||
// String implements stringer
|
||||
func (it tokenType) String() string { |
||||
if int(it) > len(stringtokenTypes) { |
||||
return "invalid" |
||||
} |
||||
return stringtokenTypes[it] |
||||
} |
||||
|
||||
var stringtokenTypes = []string{ |
||||
eof: "EOF", |
||||
invalidStatement: "invalid statement", |
||||
element: "element", |
||||
lineEnd: "end of line", |
||||
lineStart: "new line", |
||||
label: "label", |
||||
labelDef: "label definition", |
||||
number: "number", |
||||
stringValue: "string", |
||||
} |
||||
|
||||
// lexer is the basic construct for parsing
|
||||
// source code and turning them in to tokens.
|
||||
// Tokens are interpreted by the compiler.
|
||||
type lexer struct { |
||||
input string // input contains the source code of the program
|
||||
|
||||
tokens chan token // tokens is used to deliver tokens to the listener
|
||||
state stateFn // the current state function
|
||||
|
||||
lineno int // current line number in the source file
|
||||
start, pos, width int // positions for lexing and returning value
|
||||
|
||||
debug bool // flag for triggering debug output
|
||||
} |
||||
|
||||
// lex lexes the program by name with the given source. It returns a
|
||||
// channel on which the tokens are delivered.
|
||||
func Lex(name string, source []byte, debug bool) <-chan token { |
||||
ch := make(chan token) |
||||
l := &lexer{ |
||||
input: string(source), |
||||
tokens: ch, |
||||
state: lexLine, |
||||
debug: debug, |
||||
} |
||||
go func() { |
||||
l.emit(lineStart) |
||||
for l.state != nil { |
||||
l.state = l.state(l) |
||||
} |
||||
l.emit(eof) |
||||
close(l.tokens) |
||||
}() |
||||
|
||||
return ch |
||||
} |
||||
|
||||
// next returns the next rune in the program's source.
|
||||
func (l *lexer) next() (rune rune) { |
||||
if l.pos >= len(l.input) { |
||||
l.width = 0 |
||||
return 0 |
||||
} |
||||
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) |
||||
l.pos += l.width |
||||
return rune |
||||
} |
||||
|
||||
// backup backsup the last parsed element (multi-character)
|
||||
func (l *lexer) backup() { |
||||
l.pos -= l.width |
||||
} |
||||
|
||||
// peek returns the next rune but does not advance the seeker
|
||||
func (l *lexer) peek() rune { |
||||
r := l.next() |
||||
l.backup() |
||||
return r |
||||
} |
||||
|
||||
// ignore advances the seeker and ignores the value
|
||||
func (l *lexer) ignore() { |
||||
l.start = l.pos |
||||
} |
||||
|
||||
// Accepts checks whether the given input matches the next rune
|
||||
func (l *lexer) accept(valid string) bool { |
||||
if strings.IndexRune(valid, l.next()) >= 0 { |
||||
return true |
||||
} |
||||
|
||||
l.backup() |
||||
|
||||
return false |
||||
} |
||||
|
||||
// acceptRun will continue to advance the seeker until valid
|
||||
// can no longer be met.
|
||||
func (l *lexer) acceptRun(valid string) { |
||||
for strings.IndexRune(valid, l.next()) >= 0 { |
||||
} |
||||
l.backup() |
||||
} |
||||
|
||||
// acceptRunUntil is the inverse of acceptRun and will continue
|
||||
// to advance the seeker until the rune has been found.
|
||||
func (l *lexer) acceptRunUntil(until rune) bool { |
||||
// Continues running until a rune is found
|
||||
for i := l.next(); strings.IndexRune(string(until), i) == -1; i = l.next() { |
||||
if i == 0 { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
// blob returns the current value
|
||||
func (l *lexer) blob() string { |
||||
return l.input[l.start:l.pos] |
||||
} |
||||
|
||||
// Emits a new token on to token channel for processing
|
||||
func (l *lexer) emit(t tokenType) { |
||||
token := token{t, l.lineno, l.blob()} |
||||
|
||||
if l.debug { |
||||
fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) |
||||
} |
||||
|
||||
l.tokens <- token |
||||
l.start = l.pos |
||||
} |
||||
|
||||
// lexLine is state function for lexing lines
|
||||
func lexLine(l *lexer) stateFn { |
||||
for { |
||||
switch r := l.next(); { |
||||
case r == '\n': |
||||
l.emit(lineEnd) |
||||
l.ignore() |
||||
l.lineno++ |
||||
|
||||
l.emit(lineStart) |
||||
case r == ';' && l.peek() == ';': |
||||
return lexComment |
||||
case isSpace(r): |
||||
l.ignore() |
||||
case isAlphaNumeric(r) || r == '_': |
||||
return lexElement |
||||
case isNumber(r): |
||||
return lexNumber |
||||
case r == '@': |
||||
l.ignore() |
||||
return lexLabel |
||||
case r == '"': |
||||
return lexInsideString |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
// lexComment parses the current position until the end
|
||||
// of the line and discards the text.
|
||||
func lexComment(l *lexer) stateFn { |
||||
l.acceptRunUntil('\n') |
||||
l.ignore() |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
// lexLabel parses the current label, emits and returns
|
||||
// the lex text state function to advance the parsing
|
||||
// process.
|
||||
func lexLabel(l *lexer) stateFn { |
||||
l.acceptRun(Alpha + "_") |
||||
|
||||
l.emit(label) |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
// lexInsideString lexes the inside of a string until
|
||||
// until the state function finds the closing quote.
|
||||
// It returns the lex text state function.
|
||||
func lexInsideString(l *lexer) stateFn { |
||||
if l.acceptRunUntil('"') { |
||||
l.emit(stringValue) |
||||
} |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
func lexNumber(l *lexer) stateFn { |
||||
acceptance := Numbers |
||||
if l.accept("0") && l.accept("xX") { |
||||
acceptance = HexadecimalNumbers |
||||
} |
||||
l.acceptRun(acceptance) |
||||
|
||||
l.emit(number) |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
func lexElement(l *lexer) stateFn { |
||||
l.acceptRun(Alpha + "_" + Numbers) |
||||
|
||||
if l.peek() == ':' { |
||||
l.emit(labelDef) |
||||
|
||||
l.accept(":") |
||||
l.ignore() |
||||
} else { |
||||
l.emit(element) |
||||
} |
||||
return lexLine |
||||
} |
||||
|
||||
func isAlphaNumeric(t rune) bool { |
||||
return unicode.IsLetter(t) |
||||
} |
||||
|
||||
func isSpace(t rune) bool { |
||||
return unicode.IsSpace(t) |
||||
} |
||||
|
||||
func isNumber(t rune) bool { |
||||
return unicode.IsNumber(t) |
||||
} |
Loading…
Reference in new issue