mirror of https://github.com/ethereum/go-ethereum
core/asm: delete assembler/disassembler (#31211)
I maintain an improved version of the go-ethereum assembler at https://github.com/fjl/geas. We don't really use core/asm in our tests, and it has some bugs that prevent it from being useful, so I'm removing the package.pull/31217/head
parent
07d7fe2b33
commit
aac621987e
@ -1,156 +0,0 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
// Package asm provides support for dealing with EVM assembly instructions (e.g., disassembling them).
|
||||
package asm |
||||
|
||||
import ( |
||||
"encoding/hex" |
||||
"fmt" |
||||
|
||||
"github.com/ethereum/go-ethereum/core/vm" |
||||
) |
||||
|
||||
// Iterator for disassembled EVM instructions
|
||||
type instructionIterator struct { |
||||
code []byte |
||||
pc uint64 |
||||
arg []byte |
||||
op vm.OpCode |
||||
error error |
||||
started bool |
||||
eofEnabled bool |
||||
} |
||||
|
||||
// NewInstructionIterator creates a new instruction iterator.
|
||||
func NewInstructionIterator(code []byte) *instructionIterator { |
||||
it := new(instructionIterator) |
||||
it.code = code |
||||
return it |
||||
} |
||||
|
||||
// NewEOFInstructionIterator creates a new instruction iterator for EOF-code.
|
||||
func NewEOFInstructionIterator(code []byte) *instructionIterator { |
||||
it := NewInstructionIterator(code) |
||||
it.eofEnabled = true |
||||
return it |
||||
} |
||||
|
||||
// Next returns true if there is a next instruction and moves on.
|
||||
func (it *instructionIterator) Next() bool { |
||||
if it.error != nil || uint64(len(it.code)) <= it.pc { |
||||
// We previously reached an error or the end.
|
||||
return false |
||||
} |
||||
|
||||
if it.started { |
||||
// Since the iteration has been already started we move to the next instruction.
|
||||
if it.arg != nil { |
||||
it.pc += uint64(len(it.arg)) |
||||
} |
||||
it.pc++ |
||||
} else { |
||||
// We start the iteration from the first instruction.
|
||||
it.started = true |
||||
} |
||||
|
||||
if uint64(len(it.code)) <= it.pc { |
||||
// We reached the end.
|
||||
return false |
||||
} |
||||
it.op = vm.OpCode(it.code[it.pc]) |
||||
var a int |
||||
if !it.eofEnabled { // Legacy code
|
||||
if it.op.IsPush() { |
||||
a = int(it.op) - int(vm.PUSH0) |
||||
} |
||||
} else { // EOF code
|
||||
if it.op == vm.RJUMPV { |
||||
// RJUMPV is unique as it has a variable sized operand. The total size is
|
||||
// determined by the count byte which immediately follows RJUMPV.
|
||||
maxIndex := int(it.code[it.pc+1]) |
||||
a = (maxIndex+1)*2 + 1 |
||||
} else { |
||||
a = vm.Immediates(it.op) |
||||
} |
||||
} |
||||
if a > 0 { |
||||
u := it.pc + 1 + uint64(a) |
||||
if uint64(len(it.code)) <= it.pc || uint64(len(it.code)) < u { |
||||
it.error = fmt.Errorf("incomplete instruction at %v", it.pc) |
||||
return false |
||||
} |
||||
it.arg = it.code[it.pc+1 : u] |
||||
} else { |
||||
it.arg = nil |
||||
} |
||||
return true |
||||
} |
||||
|
||||
// Error returns any error that may have been encountered.
|
||||
func (it *instructionIterator) Error() error { |
||||
return it.error |
||||
} |
||||
|
||||
// PC returns the PC of the current instruction.
|
||||
func (it *instructionIterator) PC() uint64 { |
||||
return it.pc |
||||
} |
||||
|
||||
// Op returns the opcode of the current instruction.
|
||||
func (it *instructionIterator) Op() vm.OpCode { |
||||
return it.op |
||||
} |
||||
|
||||
// Arg returns the argument of the current instruction.
|
||||
func (it *instructionIterator) Arg() []byte { |
||||
return it.arg |
||||
} |
||||
|
||||
// PrintDisassembled pretty-print all disassembled EVM instructions to stdout.
|
||||
func PrintDisassembled(code string) error { |
||||
script, err := hex.DecodeString(code) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
it := NewInstructionIterator(script) |
||||
for it.Next() { |
||||
if it.Arg() != nil && 0 < len(it.Arg()) { |
||||
fmt.Printf("%05x: %v %#x\n", it.PC(), it.Op(), it.Arg()) |
||||
} else { |
||||
fmt.Printf("%05x: %v\n", it.PC(), it.Op()) |
||||
} |
||||
} |
||||
return it.Error() |
||||
} |
||||
|
||||
// Disassemble returns all disassembled EVM instructions in human-readable format.
|
||||
func Disassemble(script []byte) ([]string, error) { |
||||
instrs := make([]string, 0) |
||||
|
||||
it := NewInstructionIterator(script) |
||||
for it.Next() { |
||||
if it.Arg() != nil && 0 < len(it.Arg()) { |
||||
instrs = append(instrs, fmt.Sprintf("%05x: %v %#x\n", it.PC(), it.Op(), it.Arg())) |
||||
} else { |
||||
instrs = append(instrs, fmt.Sprintf("%05x: %v\n", it.PC(), it.Op())) |
||||
} |
||||
} |
||||
if err := it.Error(); err != nil { |
||||
return nil, err |
||||
} |
||||
return instrs, nil |
||||
} |
@ -1,94 +0,0 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"encoding/hex" |
||||
"fmt" |
||||
"strings" |
||||
"testing" |
||||
) |
||||
|
||||
// Tests disassembling instructions
|
||||
func TestInstructionIterator(t *testing.T) { |
||||
for i, tc := range []struct { |
||||
code string |
||||
legacyWant string |
||||
eofWant string |
||||
}{ |
||||
{"", "", ""}, // empty
|
||||
{"6100", `err: incomplete instruction at 0`, `err: incomplete instruction at 0`}, |
||||
{"61000000", ` |
||||
00000: PUSH2 0x0000 |
||||
00003: STOP`, ` |
||||
00000: PUSH2 0x0000 |
||||
00003: STOP`}, |
||||
{"5F00", ` |
||||
00000: PUSH0 |
||||
00001: STOP`, ` |
||||
00000: PUSH0 |
||||
00001: STOP`}, |
||||
{"d1aabb00", `00000: DATALOADN |
||||
00001: opcode 0xaa not defined |
||||
00002: opcode 0xbb not defined |
||||
00003: STOP`, ` |
||||
00000: DATALOADN 0xaabb |
||||
00003: STOP`}, // DATALOADN(aabb),STOP
|
||||
{"d1aa", ` |
||||
00000: DATALOADN |
||||
00001: opcode 0xaa not defined`, "err: incomplete instruction at 0\n"}, // DATALOADN(aa) invalid
|
||||
{"e20211223344556600", ` |
||||
00000: RJUMPV |
||||
00001: MUL |
||||
00002: GT |
||||
00003: opcode 0x22 not defined |
||||
00004: CALLER |
||||
00005: DIFFICULTY |
||||
00006: SSTORE |
||||
err: incomplete instruction at 7`, ` |
||||
00000: RJUMPV 0x02112233445566 |
||||
00008: STOP`}, // RJUMPV( 6 bytes), STOP
|
||||
|
||||
} { |
||||
var ( |
||||
code, _ = hex.DecodeString(tc.code) |
||||
legacy = strings.TrimSpace(disassembly(NewInstructionIterator(code))) |
||||
eof = strings.TrimSpace(disassembly(NewEOFInstructionIterator(code))) |
||||
) |
||||
if want := strings.TrimSpace(tc.legacyWant); legacy != want { |
||||
t.Errorf("test %d: wrong (legacy) output. have:\n%q\nwant:\n%q\n", i, legacy, want) |
||||
} |
||||
if want := strings.TrimSpace(tc.eofWant); eof != want { |
||||
t.Errorf("test %d: wrong (eof) output. have:\n%q\nwant:\n%q\n", i, eof, want) |
||||
} |
||||
} |
||||
} |
||||
|
||||
func disassembly(it *instructionIterator) string { |
||||
var out = new(strings.Builder) |
||||
for it.Next() { |
||||
if it.Arg() != nil && 0 < len(it.Arg()) { |
||||
fmt.Fprintf(out, "%05x: %v %#x\n", it.PC(), it.Op(), it.Arg()) |
||||
} else { |
||||
fmt.Fprintf(out, "%05x: %v\n", it.PC(), it.Op()) |
||||
} |
||||
} |
||||
if err := it.Error(); err != nil { |
||||
fmt.Fprintf(out, "err: %v\n", err) |
||||
} |
||||
return out.String() |
||||
} |
@ -1,292 +0,0 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"encoding/hex" |
||||
"errors" |
||||
"fmt" |
||||
"math/big" |
||||
"os" |
||||
"strings" |
||||
|
||||
"github.com/ethereum/go-ethereum/common/math" |
||||
"github.com/ethereum/go-ethereum/core/vm" |
||||
) |
||||
|
||||
// Compiler contains information about the parsed source
|
||||
// and holds the tokens for the program.
|
||||
type Compiler struct { |
||||
tokens []token |
||||
out []byte |
||||
|
||||
labels map[string]int |
||||
|
||||
pc, pos int |
||||
|
||||
debug bool |
||||
} |
||||
|
||||
// NewCompiler returns a new allocated compiler.
|
||||
func NewCompiler(debug bool) *Compiler { |
||||
return &Compiler{ |
||||
labels: make(map[string]int), |
||||
debug: debug, |
||||
} |
||||
} |
||||
|
||||
// Feed feeds tokens into ch and are interpreted by
|
||||
// the compiler.
|
||||
//
|
||||
// feed is the first pass in the compile stage as it collects the used labels in the
|
||||
// program and keeps a program counter which is used to determine the locations of the
|
||||
// jump dests. The labels can than be used in the second stage to push labels and
|
||||
// determine the right position.
|
||||
func (c *Compiler) Feed(ch <-chan token) { |
||||
var prev token |
||||
for i := range ch { |
||||
switch i.typ { |
||||
case number: |
||||
num := math.MustParseBig256(i.text).Bytes() |
||||
if len(num) == 0 { |
||||
num = []byte{0} |
||||
} |
||||
c.pc += len(num) |
||||
case stringValue: |
||||
c.pc += len(i.text) - 2 |
||||
case element: |
||||
c.pc++ |
||||
case labelDef: |
||||
c.labels[i.text] = c.pc |
||||
c.pc++ |
||||
case label: |
||||
c.pc += 4 |
||||
if prev.typ == element && isJump(prev.text) { |
||||
c.pc++ |
||||
} |
||||
} |
||||
c.tokens = append(c.tokens, i) |
||||
prev = i |
||||
} |
||||
if c.debug { |
||||
fmt.Fprintln(os.Stderr, "found", len(c.labels), "labels") |
||||
} |
||||
} |
||||
|
||||
// Compile compiles the current tokens and returns a binary string that can be interpreted
|
||||
// by the EVM and an error if it failed.
|
||||
//
|
||||
// compile is the second stage in the compile phase which compiles the tokens to EVM
|
||||
// instructions.
|
||||
func (c *Compiler) Compile() (string, []error) { |
||||
var errors []error |
||||
// continue looping over the tokens until
|
||||
// the stack has been exhausted.
|
||||
for c.pos < len(c.tokens) { |
||||
if err := c.compileLine(); err != nil { |
||||
errors = append(errors, err) |
||||
} |
||||
} |
||||
|
||||
// turn the binary to hex
|
||||
h := hex.EncodeToString(c.out) |
||||
return h, errors |
||||
} |
||||
|
||||
// next returns the next token and increments the
|
||||
// position.
|
||||
func (c *Compiler) next() token { |
||||
token := c.tokens[c.pos] |
||||
c.pos++ |
||||
return token |
||||
} |
||||
|
||||
// compileLine compiles a single line instruction e.g.
|
||||
// "push 1", "jump @label".
|
||||
func (c *Compiler) compileLine() error { |
||||
n := c.next() |
||||
if n.typ != lineStart { |
||||
return compileErr(n, n.typ.String(), lineStart.String()) |
||||
} |
||||
|
||||
lvalue := c.next() |
||||
switch lvalue.typ { |
||||
case eof: |
||||
return nil |
||||
case element: |
||||
if err := c.compileElement(lvalue); err != nil { |
||||
return err |
||||
} |
||||
case labelDef: |
||||
c.compileLabel() |
||||
case lineEnd: |
||||
return nil |
||||
default: |
||||
return compileErr(lvalue, lvalue.text, fmt.Sprintf("%v or %v", labelDef, element)) |
||||
} |
||||
|
||||
if n := c.next(); n.typ != lineEnd { |
||||
return compileErr(n, n.text, lineEnd.String()) |
||||
} |
||||
|
||||
return nil |
||||
} |
||||
|
||||
// parseNumber compiles the number to bytes
|
||||
func parseNumber(tok token) ([]byte, error) { |
||||
if tok.typ != number { |
||||
panic("parseNumber of non-number token") |
||||
} |
||||
num, ok := math.ParseBig256(tok.text) |
||||
if !ok { |
||||
return nil, errors.New("invalid number") |
||||
} |
||||
bytes := num.Bytes() |
||||
if len(bytes) == 0 { |
||||
bytes = []byte{0} |
||||
} |
||||
return bytes, nil |
||||
} |
||||
|
||||
// compileElement compiles the element (push & label or both)
|
||||
// to a binary representation and may error if incorrect statements
|
||||
// where fed.
|
||||
func (c *Compiler) compileElement(element token) error { |
||||
switch { |
||||
case isJump(element.text): |
||||
return c.compileJump(element.text) |
||||
case isPush(element.text): |
||||
return c.compilePush() |
||||
default: |
||||
c.outputOpcode(toBinary(element.text)) |
||||
return nil |
||||
} |
||||
} |
||||
|
||||
func (c *Compiler) compileJump(jumpType string) error { |
||||
rvalue := c.next() |
||||
switch rvalue.typ { |
||||
case number: |
||||
numBytes, err := parseNumber(rvalue) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
c.outputBytes(numBytes) |
||||
|
||||
case stringValue: |
||||
// strings are quoted, remove them.
|
||||
str := rvalue.text[1 : len(rvalue.text)-2] |
||||
c.outputBytes([]byte(str)) |
||||
|
||||
case label: |
||||
c.outputOpcode(vm.PUSH4) |
||||
pos := big.NewInt(int64(c.labels[rvalue.text])).Bytes() |
||||
pos = append(make([]byte, 4-len(pos)), pos...) |
||||
c.outputBytes(pos) |
||||
|
||||
case lineEnd: |
||||
// push without argument is supported, it just takes the destination from the stack.
|
||||
c.pos-- |
||||
|
||||
default: |
||||
return compileErr(rvalue, rvalue.text, "number, string or label") |
||||
} |
||||
// push the operation
|
||||
c.outputOpcode(toBinary(jumpType)) |
||||
return nil |
||||
} |
||||
|
||||
func (c *Compiler) compilePush() error { |
||||
// handle pushes. pushes are read from left to right.
|
||||
var value []byte |
||||
rvalue := c.next() |
||||
switch rvalue.typ { |
||||
case number: |
||||
value = math.MustParseBig256(rvalue.text).Bytes() |
||||
if len(value) == 0 { |
||||
value = []byte{0} |
||||
} |
||||
case stringValue: |
||||
value = []byte(rvalue.text[1 : len(rvalue.text)-1]) |
||||
case label: |
||||
value = big.NewInt(int64(c.labels[rvalue.text])).Bytes() |
||||
value = append(make([]byte, 4-len(value)), value...) |
||||
default: |
||||
return compileErr(rvalue, rvalue.text, "number, string or label") |
||||
} |
||||
if len(value) > 32 { |
||||
return fmt.Errorf("%d: string or number size > 32 bytes", rvalue.lineno+1) |
||||
} |
||||
c.outputOpcode(vm.OpCode(int(vm.PUSH1) - 1 + len(value))) |
||||
c.outputBytes(value) |
||||
return nil |
||||
} |
||||
|
||||
// compileLabel pushes a jumpdest to the binary slice.
|
||||
func (c *Compiler) compileLabel() { |
||||
c.outputOpcode(vm.JUMPDEST) |
||||
} |
||||
|
||||
func (c *Compiler) outputOpcode(op vm.OpCode) { |
||||
if c.debug { |
||||
fmt.Printf("%d: %v\n", len(c.out), op) |
||||
} |
||||
c.out = append(c.out, byte(op)) |
||||
} |
||||
|
||||
// output pushes the value v to the binary stack.
|
||||
func (c *Compiler) outputBytes(b []byte) { |
||||
if c.debug { |
||||
fmt.Printf("%d: %x\n", len(c.out), b) |
||||
} |
||||
c.out = append(c.out, b...) |
||||
} |
||||
|
||||
// isPush returns whether the string op is either any of
|
||||
// push(N).
|
||||
func isPush(op string) bool { |
||||
return strings.EqualFold(op, "PUSH") |
||||
} |
||||
|
||||
// isJump returns whether the string op is jump(i)
|
||||
func isJump(op string) bool { |
||||
return strings.EqualFold(op, "JUMPI") || strings.EqualFold(op, "JUMP") |
||||
} |
||||
|
||||
// toBinary converts text to a vm.OpCode
|
||||
func toBinary(text string) vm.OpCode { |
||||
return vm.StringToOp(strings.ToUpper(text)) |
||||
} |
||||
|
||||
type compileError struct { |
||||
got string |
||||
want string |
||||
|
||||
lineno int |
||||
} |
||||
|
||||
func (err compileError) Error() string { |
||||
return fmt.Sprintf("%d: syntax error: unexpected %v, expected %v", err.lineno, err.got, err.want) |
||||
} |
||||
|
||||
func compileErr(c token, got, want string) error { |
||||
return compileError{ |
||||
got: got, |
||||
want: want, |
||||
lineno: c.lineno + 1, |
||||
} |
||||
} |
@ -1,79 +0,0 @@ |
||||
// Copyright 2019 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"testing" |
||||
) |
||||
|
||||
func TestCompiler(t *testing.T) { |
||||
tests := []struct { |
||||
input, output string |
||||
}{ |
||||
{ |
||||
input: ` |
||||
GAS |
||||
label: |
||||
PUSH @label |
||||
`, |
||||
output: "5a5b6300000001", |
||||
}, |
||||
{ |
||||
input: ` |
||||
PUSH @label |
||||
label: |
||||
`, |
||||
output: "63000000055b", |
||||
}, |
||||
{ |
||||
input: ` |
||||
PUSH @label |
||||
JUMP |
||||
label: |
||||
`, |
||||
output: "6300000006565b", |
||||
}, |
||||
{ |
||||
input: ` |
||||
JUMP @label |
||||
label: |
||||
`, |
||||
output: "6300000006565b", |
||||
}, |
||||
{ |
||||
input: ` |
||||
JUMP @label |
||||
label: ;; comment |
||||
ADD ;; comment |
||||
`, |
||||
output: "6300000006565b01", |
||||
}, |
||||
} |
||||
for _, test := range tests { |
||||
ch := Lex([]byte(test.input), false) |
||||
c := NewCompiler(false) |
||||
c.Feed(ch) |
||||
output, err := c.Compile() |
||||
if len(err) != 0 { |
||||
t.Errorf("compile error: %v\ninput: %s", err, test.input) |
||||
continue |
||||
} |
||||
if output != test.output { |
||||
t.Errorf("incorrect output\ninput: %sgot: %s\nwant: %s\n", test.input, output, test.output) |
||||
} |
||||
} |
||||
} |
@ -1,93 +0,0 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"reflect" |
||||
"testing" |
||||
) |
||||
|
||||
func lexAll(src string) []token { |
||||
ch := Lex([]byte(src), false) |
||||
|
||||
var tokens []token |
||||
for i := range ch { |
||||
tokens = append(tokens, i) |
||||
} |
||||
return tokens |
||||
} |
||||
|
||||
func TestLexer(t *testing.T) { |
||||
tests := []struct { |
||||
input string |
||||
tokens []token |
||||
}{ |
||||
{ |
||||
input: ";; this is a comment", |
||||
tokens: []token{{typ: lineStart}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "0x12345678", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "0x12345678"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "0x123ggg", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "0x123"}, {typ: element, text: "ggg"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "12345678", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "12345678"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "123abc", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "123"}, {typ: element, text: "abc"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "0123abc", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "0123"}, {typ: element, text: "abc"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "00123abc", |
||||
tokens: []token{{typ: lineStart}, {typ: number, text: "00123"}, {typ: element, text: "abc"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "@foo", |
||||
tokens: []token{{typ: lineStart}, {typ: label, text: "foo"}, {typ: eof}}, |
||||
}, |
||||
{ |
||||
input: "@label123", |
||||
tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}}, |
||||
}, |
||||
// Comment after label
|
||||
{ |
||||
input: "@label123 ;; comment", |
||||
tokens: []token{{typ: lineStart}, {typ: label, text: "label123"}, {typ: eof}}, |
||||
}, |
||||
// Comment after instruction
|
||||
{ |
||||
input: "push 3 ;; comment\nadd", |
||||
tokens: []token{{typ: lineStart}, {typ: element, text: "push"}, {typ: number, text: "3"}, {typ: lineEnd, text: "\n"}, {typ: lineStart, lineno: 1}, {typ: element, lineno: 1, text: "add"}, {typ: eof, lineno: 1}}, |
||||
}, |
||||
} |
||||
|
||||
for _, test := range tests { |
||||
tokens := lexAll(test.input) |
||||
if !reflect.DeepEqual(tokens, test.tokens) { |
||||
t.Errorf("input %q\ngot: %+v\nwant: %+v", test.input, tokens, test.tokens) |
||||
} |
||||
} |
||||
} |
@ -1,275 +0,0 @@ |
||||
// Copyright 2017 The go-ethereum Authors
|
||||
// This file is part of the go-ethereum library.
|
||||
//
|
||||
// The go-ethereum library is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// The go-ethereum library is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public License
|
||||
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package asm |
||||
|
||||
import ( |
||||
"fmt" |
||||
"os" |
||||
"strings" |
||||
"unicode" |
||||
"unicode/utf8" |
||||
) |
||||
|
||||
// stateFn is used through the lifetime of the
|
||||
// lexer to parse the different values at the
|
||||
// current state.
|
||||
type stateFn func(*lexer) stateFn |
||||
|
||||
// token is emitted when the lexer has discovered
|
||||
// a new parsable token. These are delivered over
|
||||
// the tokens channels of the lexer
|
||||
type token struct { |
||||
typ tokenType |
||||
lineno int |
||||
text string |
||||
} |
||||
|
||||
// tokenType are the different types the lexer
|
||||
// is able to parse and return.
|
||||
type tokenType int |
||||
|
||||
//go:generate go run golang.org/x/tools/cmd/stringer -type tokenType
|
||||
|
||||
const ( |
||||
eof tokenType = iota // end of file
|
||||
lineStart // emitted when a line starts
|
||||
lineEnd // emitted when a line ends
|
||||
invalidStatement // any invalid statement
|
||||
element // any element during element parsing
|
||||
label // label is emitted when a label is found
|
||||
labelDef // label definition is emitted when a new label is found
|
||||
number // number is emitted when a number is found
|
||||
stringValue // stringValue is emitted when a string has been found
|
||||
) |
||||
|
||||
const ( |
||||
decimalNumbers = "1234567890" // characters representing any decimal number
|
||||
hexNumbers = decimalNumbers + "aAbBcCdDeEfF" // characters representing any hexadecimal
|
||||
alpha = "abcdefghijklmnopqrstuwvxyzABCDEFGHIJKLMNOPQRSTUWVXYZ" // characters representing alphanumeric
|
||||
) |
||||
|
||||
// lexer is the basic construct for parsing
|
||||
// source code and turning them in to tokens.
|
||||
// Tokens are interpreted by the compiler.
|
||||
type lexer struct { |
||||
input string // input contains the source code of the program
|
||||
|
||||
tokens chan token // tokens is used to deliver tokens to the listener
|
||||
state stateFn // the current state function
|
||||
|
||||
lineno int // current line number in the source file
|
||||
start, pos, width int // positions for lexing and returning value
|
||||
|
||||
debug bool // flag for triggering debug output
|
||||
} |
||||
|
||||
// Lex lexes the program by name with the given source. It returns a
|
||||
// channel on which the tokens are delivered.
|
||||
func Lex(source []byte, debug bool) <-chan token { |
||||
ch := make(chan token) |
||||
l := &lexer{ |
||||
input: string(source), |
||||
tokens: ch, |
||||
state: lexLine, |
||||
debug: debug, |
||||
} |
||||
go func() { |
||||
l.emit(lineStart) |
||||
for l.state != nil { |
||||
l.state = l.state(l) |
||||
} |
||||
l.emit(eof) |
||||
close(l.tokens) |
||||
}() |
||||
|
||||
return ch |
||||
} |
||||
|
||||
// next returns the next rune in the program's source.
|
||||
func (l *lexer) next() (rune rune) { |
||||
if l.pos >= len(l.input) { |
||||
l.width = 0 |
||||
return 0 |
||||
} |
||||
rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) |
||||
l.pos += l.width |
||||
return rune |
||||
} |
||||
|
||||
// backup backsup the last parsed element (multi-character)
|
||||
func (l *lexer) backup() { |
||||
l.pos -= l.width |
||||
} |
||||
|
||||
// peek returns the next rune but does not advance the seeker
|
||||
func (l *lexer) peek() rune { |
||||
r := l.next() |
||||
l.backup() |
||||
return r |
||||
} |
||||
|
||||
// ignore advances the seeker and ignores the value
|
||||
func (l *lexer) ignore() { |
||||
l.start = l.pos |
||||
} |
||||
|
||||
// accept checks whether the given input matches the next rune
|
||||
func (l *lexer) accept(valid string) bool { |
||||
if strings.ContainsRune(valid, l.next()) { |
||||
return true |
||||
} |
||||
|
||||
l.backup() |
||||
|
||||
return false |
||||
} |
||||
|
||||
// acceptRun will continue to advance the seeker until valid
|
||||
// can no longer be met.
|
||||
func (l *lexer) acceptRun(valid string) { |
||||
for strings.ContainsRune(valid, l.next()) { |
||||
} |
||||
l.backup() |
||||
} |
||||
|
||||
// acceptRunUntil is the inverse of acceptRun and will continue
|
||||
// to advance the seeker until the rune has been found.
|
||||
func (l *lexer) acceptRunUntil(until rune) bool { |
||||
// Continues running until a rune is found
|
||||
for i := l.next(); !strings.ContainsRune(string(until), i); i = l.next() { |
||||
if i == 0 { |
||||
return false |
||||
} |
||||
} |
||||
|
||||
return true |
||||
} |
||||
|
||||
// blob returns the current value
|
||||
func (l *lexer) blob() string { |
||||
return l.input[l.start:l.pos] |
||||
} |
||||
|
||||
// Emits a new token on to token channel for processing
|
||||
func (l *lexer) emit(t tokenType) { |
||||
token := token{t, l.lineno, l.blob()} |
||||
|
||||
if l.debug { |
||||
fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.lineno, token.typ, token.text) |
||||
} |
||||
|
||||
l.tokens <- token |
||||
l.start = l.pos |
||||
} |
||||
|
||||
// lexLine is state function for lexing lines
|
||||
func lexLine(l *lexer) stateFn { |
||||
for { |
||||
switch r := l.next(); { |
||||
case r == '\n': |
||||
l.emit(lineEnd) |
||||
l.ignore() |
||||
l.lineno++ |
||||
l.emit(lineStart) |
||||
case r == ';' && l.peek() == ';': |
||||
return lexComment |
||||
case isSpace(r): |
||||
l.ignore() |
||||
case isLetter(r) || r == '_': |
||||
return lexElement |
||||
case isNumber(r): |
||||
return lexNumber |
||||
case r == '@': |
||||
l.ignore() |
||||
return lexLabel |
||||
case r == '"': |
||||
return lexInsideString |
||||
default: |
||||
return nil |
||||
} |
||||
} |
||||
} |
||||
|
||||
// lexComment parses the current position until the end
|
||||
// of the line and discards the text.
|
||||
func lexComment(l *lexer) stateFn { |
||||
l.acceptRunUntil('\n') |
||||
l.backup() |
||||
l.ignore() |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
// lexLabel parses the current label, emits and returns
|
||||
// the lex text state function to advance the parsing
|
||||
// process.
|
||||
func lexLabel(l *lexer) stateFn { |
||||
l.acceptRun(alpha + "_" + decimalNumbers) |
||||
|
||||
l.emit(label) |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
// lexInsideString lexes the inside of a string until
|
||||
// the state function finds the closing quote.
|
||||
// It returns the lex text state function.
|
||||
func lexInsideString(l *lexer) stateFn { |
||||
if l.acceptRunUntil('"') { |
||||
l.emit(stringValue) |
||||
} |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
func lexNumber(l *lexer) stateFn { |
||||
acceptance := decimalNumbers |
||||
if l.accept("xX") { |
||||
acceptance = hexNumbers |
||||
} |
||||
l.acceptRun(acceptance) |
||||
|
||||
l.emit(number) |
||||
|
||||
return lexLine |
||||
} |
||||
|
||||
func lexElement(l *lexer) stateFn { |
||||
l.acceptRun(alpha + "_" + decimalNumbers) |
||||
|
||||
if l.peek() == ':' { |
||||
l.emit(labelDef) |
||||
|
||||
l.accept(":") |
||||
l.ignore() |
||||
} else { |
||||
l.emit(element) |
||||
} |
||||
return lexLine |
||||
} |
||||
|
||||
func isLetter(t rune) bool { |
||||
return unicode.IsLetter(t) |
||||
} |
||||
|
||||
func isSpace(t rune) bool { |
||||
return unicode.IsSpace(t) |
||||
} |
||||
|
||||
func isNumber(t rune) bool { |
||||
return unicode.IsNumber(t) |
||||
} |
@ -1,31 +0,0 @@ |
||||
// Code generated by "stringer -type tokenType"; DO NOT EDIT.
|
||||
|
||||
package asm |
||||
|
||||
import "strconv" |
||||
|
||||
func _() { |
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{} |
||||
_ = x[eof-0] |
||||
_ = x[lineStart-1] |
||||
_ = x[lineEnd-2] |
||||
_ = x[invalidStatement-3] |
||||
_ = x[element-4] |
||||
_ = x[label-5] |
||||
_ = x[labelDef-6] |
||||
_ = x[number-7] |
||||
_ = x[stringValue-8] |
||||
} |
||||
|
||||
const _tokenType_name = "eoflineStartlineEndinvalidStatementelementlabellabelDefnumberstringValue" |
||||
|
||||
var _tokenType_index = [...]uint8{0, 3, 12, 19, 35, 42, 47, 55, 61, 72} |
||||
|
||||
func (i tokenType) String() string { |
||||
if i < 0 || i >= tokenType(len(_tokenType_index)-1) { |
||||
return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")" |
||||
} |
||||
return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]] |
||||
} |
Loading…
Reference in new issue