diff --git a/nimbus/common/evmforks.nim b/nimbus/common/evmforks.nim index 2885daaa35..f804247d88 100644 --- a/nimbus/common/evmforks.nim +++ b/nimbus/common/evmforks.nim @@ -28,3 +28,7 @@ const FkParis* = EVMC_PARIS FkShanghai* = EVMC_SHANGHAI FkCancun* = EVMC_CANCUN + + + # Meta forks related to specific EIP + FkEOF* = FkCancun diff --git a/nimbus/common/hardforks.nim b/nimbus/common/hardforks.nim index c81e560ead..38dddd82b2 100644 --- a/nimbus/common/hardforks.nim +++ b/nimbus/common/hardforks.nim @@ -50,10 +50,14 @@ type Shanghai Cancun -const lastPurelyBlockNumberBasedFork* = GrayGlacier -# MergeFork is special because of TTD. -const firstTimeBasedFork* = Shanghai +const + lastPurelyBlockNumberBasedFork* = GrayGlacier + + # MergeFork is special because of TTD. + firstTimeBasedFork* = Shanghai + # Meta Fork + EOFFork* = Cancun type CliqueOptions* = object diff --git a/nimbus/db/accounts_cache.nim b/nimbus/db/accounts_cache.nim index 221cd559d0..5219aadca7 100644 --- a/nimbus/db/accounts_cache.nim +++ b/nimbus/db/accounts_cache.nim @@ -11,9 +11,8 @@ import std/[tables, hashes, sets], eth/[common, rlp], + ../constants, ../utils/[utils, eof], ../../stateless/multi_keys, - ../constants, - ../utils/utils, ./access_list as ac_access_list, "."/[core_db, distinct_tries, storage_types, transient_storage] @@ -376,6 +375,18 @@ proc getNonce*(ac: AccountsCache, address: EthAddress): AccountNonce {.inline.} if acc.isNil: emptyAcc.nonce else: acc.account.nonce +proc loadCode(acc: RefAccount, ac: AccountsCache) = + if CodeLoaded in acc.flags or CodeChanged in acc.flags: + return + + when defined(geth): + let data = ac.kvt.get(acc.account.codeHash.data) + else: + let data = ac.kvt.get(contractHashKey(acc.account.codeHash).toOpenArray) + + acc.code = data + acc.flags.incl CodeLoaded + proc getCode*(ac: AccountsCache, address: EthAddress): seq[byte] = let acc = ac.getAccount(address, false) if acc.isNil: @@ -394,7 +405,18 @@ proc getCode*(ac: AccountsCache, address: EthAddress): seq[byte] = result = acc.code proc getCodeSize*(ac: AccountsCache, address: EthAddress): int {.inline.} = - ac.getCode(address).len + let acc = ac.getAccount(address, false) + if acc.isNil: + return + acc.loadCode(ac) + acc.code.len + +proc hasEOFCode*(ac: AccountsCache, address: EthAddress): bool = + let acc = ac.getAccount(address, false) + if acc.isNil: + return + acc.loadCode(ac) + eof.hasEOFMagic(acc.code) proc getCommittedStorage*(ac: AccountsCache, address: EthAddress, slot: UInt256): UInt256 {.inline.} = let acc = ac.getAccount(address, false) @@ -744,6 +766,7 @@ proc getStorage*(db: ReadOnlyStateDB, address: EthAddress, slot: UInt256): UInt2 proc getNonce*(db: ReadOnlyStateDB, address: EthAddress): AccountNonce {.borrow.} proc getCode*(db: ReadOnlyStateDB, address: EthAddress): seq[byte] {.borrow.} proc getCodeSize*(db: ReadOnlyStateDB, address: EthAddress): int {.borrow.} +proc hasEOFCode*(ac: ReadOnlyStateDB, address: EthAddress): bool {.borrow.} proc hasCodeOrNonce*(db: ReadOnlyStateDB, address: EthAddress): bool {.borrow.} proc accountExists*(db: ReadOnlyStateDB, address: EthAddress): bool {.borrow.} proc isDeadAccount*(db: ReadOnlyStateDB, address: EthAddress): bool {.borrow.} diff --git a/nimbus/evm/analysis.nim b/nimbus/evm/analysis.nim new file mode 100644 index 0000000000..c2beca3087 --- /dev/null +++ b/nimbus/evm/analysis.nim @@ -0,0 +1,167 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +import + interpreter/op_codes + +const + set2BitsMask = uint16(0b11) + set3BitsMask = uint16(0b111) + set4BitsMask = uint16(0b1111) + set5BitsMask = uint16(0b1_1111) + set6BitsMask = uint16(0b11_1111) + set7BitsMask = uint16(0b111_1111) + +# bitvec is a bit vector which maps bytes in a program. +# An unset bit means the byte is an opcode, a set bit means +# it's data (i.e. argument of PUSHxx). +type + Bitvec* = seq[byte] + +proc set1(bits: var Bitvec, pos: int) = + let x = bits[pos div 8] + bits[pos div 8] = x or byte(1 shl (pos mod 8)) + +proc setN(bits: var Bitvec, flag: uint16, pos: int) = + let z = pos div 8 + let a = flag shl (pos mod 8) + let x = bits[z] + bits[z] = x or byte(a) + let b = byte(a shr 8) + if b != 0: + bits[z+1] = b + +proc set8(bits: var Bitvec, pos: int) = + let z = pos div 8 + let a = byte(0xFF shl (pos mod 8)) + bits[z] = bits[z] or a + bits[z+1] = not a + +proc set16(bits: var Bitvec, pos: int) = + let z = pos div 8 + let a = byte(0xFF shl (pos mod 8)) + bits[z] = bits[z] or a + bits[z+1] = 0xFF + bits[z+2] = not a + +# codeSegment checks if the position is in a code segment. +proc codeSegment*(bits: Bitvec, pos: int): bool = + ((bits[pos div 8] shr (pos mod 8)) and 1) == 0 + +# codeBitmapInternal is the internal implementation of codeBitmap. +# It exists for the purpose of being able to run benchmark tests +# without dynamic allocations affecting the results. +proc codeBitmapInternal(bits: var Bitvec; code: openArray[byte]) = + var pc = 0 + while pc < code.len: + let op = Op(code[pc]) + inc pc + + if op < PUSH1: + continue + + var numbits = op.int - PUSH1.int + 1 + if numbits >= 8: + while numbits >= 16: + bits.set16(pc) + pc += 16 + numbits -= 16 + + while numbits >= 8: + bits.set8(pc) + pc += 8 + numbits -= 8 + + case numbits + of 1: bits.set1(pc) + of 2: bits.setN(set2BitsMask, pc) + of 3: bits.setN(set3BitsMask, pc) + of 4: bits.setN(set4BitsMask, pc) + of 5: bits.setN(set5BitsMask, pc) + of 6: bits.setN(set6BitsMask, pc) + of 7: bits.setN(set7BitsMask, pc) + else: discard + pc += numbits + +# codeBitmap collects data locations in code. +proc codeBitmap*(code: openArray[byte]): Bitvec = + # The bitmap is 4 bytes longer than necessary, in case the code + # ends with a PUSH32, the algorithm will push zeroes onto the + # bitvector outside the bounds of the actual code. + let len = (code.len div 8)+1+4 + result = newSeq[byte](len) + result.codeBitmapInternal(code) + +# eofCodeBitmapInternal is the internal implementation of codeBitmap for EOF +# code validation. +proc eofCodeBitmapInternal(bits: var Bitvec; code: openArray[byte]) = + var pc = 0 + while pc < code.len: + let op = Op(code[pc]) + inc pc + + # RJUMP and RJUMPI always have 2 byte operand. + if op == RJUMP or op == RJUMPI: + bits.setN(set2BitsMask, pc) + pc += 2 + continue + + var numbits = 0 + if op >= PUSH1 and op <= PUSH32: + numbits = op.int - PUSH1.int + 1 + elif op == RJUMPV: + # RJUMPV is unique as it has a variable sized operand. + # The total size is determined by the count byte which + # immediate proceeds RJUMPV. Truncation will be caught + # in other validation steps -- for now, just return a + # valid bitmap for as much of the code as is + # available. + if pc >= code.len: + # Count missing, no more bits to mark. + return + numbits = code[pc].int*2 + 1 + if pc+numbits > code.len: + # Jump table is truncated, mark as many bits + # as possible. + numbits = code.len - pc + else: + # If not PUSH (the int8(op) > int(PUSH32) is always false). + continue + + if numbits >= 8: + while numbits >= 16: + bits.set16(pc) + pc += 16 + numbits -= 16 + + while numbits >= 8: + bits.set8(pc) + pc += 8 + numbits -= 8 + + case numbits + of 1: bits.set1(pc) + of 2: bits.setN(set2BitsMask, pc) + of 3: bits.setN(set3BitsMask, pc) + of 4: bits.setN(set4BitsMask, pc) + of 5: bits.setN(set5BitsMask, pc) + of 6: bits.setN(set6BitsMask, pc) + of 7: bits.setN(set7BitsMask, pc) + else: discard + pc += numbits + +# eofCodeBitmap collects data locations in code. +proc eofCodeBitmap*(code: openArray[byte]): Bitvec = + # The bitmap is 4 bytes longer than necessary, in case the code + # ends with a PUSH32, the algorithm will push zeroes onto the + # bitvector outside the bounds of the actual code. + let len = (code.len div 8)+1+4 + result = newSeq[byte](len) + result.eofCodeBitmapInternal(code) diff --git a/nimbus/evm/code_stream.nim b/nimbus/evm/code_stream.nim index 7dd66bb8c2..cd0bcbd9d9 100644 --- a/nimbus/evm/code_stream.nim +++ b/nimbus/evm/code_stream.nim @@ -6,31 +6,56 @@ # at your option. This file may not be copied, modified, or distributed except according to those terms. import - chronicles, strformat, strutils, sequtils, parseutils, sets, macros, + std/[strformat, strutils, sequtils, parseutils, sets], + chronicles, eth/common, + stew/[results, endians2], + stew/ptrops, + ../utils/eof, ./interpreter/op_codes logScope: topics = "vm code_stream" type + CodeView = ptr UncheckedArray[byte] + CodeStream* = ref object - bytes*: seq[byte] + # pre EOF byte code + legacyCode*: seq[byte] + + # view into legacyCode or + # into one of EOF code section + codeView: CodeView + + # length of legacy code or + # one of EOF code section + codeLen: int + depthProcessed: int invalidPositions: HashSet[int] pc*: int cached: seq[(int, Op, string)] + # EOF container + container*: Container + + # EOF code section index + section: int + proc `$`*(b: byte): string = $(b.int) proc newCodeStream*(codeBytes: seq[byte]): CodeStream = new(result) - shallowCopy(result.bytes, codeBytes) + shallowCopy(result.legacyCode, codeBytes) result.pc = 0 result.invalidPositions = initHashSet[int]() result.depthProcessed = 0 result.cached = @[] + result.codeLen = result.legacyCode.len + if result.codeLen > 0: + result.codeView = cast[CodeView](addr result.legacyCode[0]) proc newCodeStream*(codeBytes: string): CodeStream = newCodeStream(codeBytes.mapIt(it.byte)) @@ -47,29 +72,62 @@ proc newCodeStreamFromUnescaped*(code: string): CodeStream = proc read*(c: var CodeStream, size: int): seq[byte] = # TODO: use openArray[bytes] - if c.pc + size - 1 < c.bytes.len: - result = c.bytes[c.pc .. c.pc + size - 1] + if c.pc + size - 1 < c.codeLen: + result = @(makeOpenArray(addr c.codeView[c.pc], byte, size)) c.pc += size else: result = @[] - c.pc = c.bytes.len + c.pc = c.codeLen proc readVmWord*(c: var CodeStream, n: int): UInt256 = ## Reads `n` bytes from the code stream and pads ## the remaining bytes with zeros. - let result_bytes = cast[ptr array[32, byte]](addr result) + let resultBytes = cast[ptr array[32, byte]](addr result) - let last = min(c.pc + n, c.bytes.len) + let last = min(c.pc + n, c.codeLen) let toWrite = last - c.pc - for i in 0 ..< toWrite : result_bytes[i] = c.bytes[last - i - 1] + for i in 0 ..< toWrite : resultBytes[i] = c.codeView[last - i - 1] c.pc = last +proc readInt16*(c: var CodeStream): int = + let x = uint16.fromBytesBE(makeOpenArray(addr c.codeView[c.pc], byte, 2)) + result = cast[int16](x).int + c.pc += 2 + +proc readByte*(c: var CodeStream): byte = + result = c.codeView[c.pc] + inc c.pc + proc len*(c: CodeStream): int = - len(c.bytes) + if c.container.code.len > 0: + c.container.size + else: + c.legacyCode.len + +proc setSection*(c: CodeStream, sec: int) = + if sec < c.container.code.len: + c.codeLen = c.container.code[sec].len + if c.codeLen > 0: + c.codeView = cast[CodeView](addr c.container.code[sec][0]) + c.section = sec + +proc parseEOF*(c: CodeStream): Result[void, EOFV1Error] = + result = decode(c.container, c.legacyCode) + if result.isOk: + c.setSection(0) + +func hasEOFCode*(c: CodeStream): bool = + hasEOFMagic(c.legacyCode) + +func getType*(c: CodeStream, idx: int): FunctionMetaData = + c.container.types[idx] + +func section*(c: CodeStream): int = + c.section proc next*(c: var CodeStream): Op = - if c.pc != c.bytes.len: - result = Op(c.bytes[c.pc]) + if c.pc != c.codeLen: + result = Op(c.codeView[c.pc]) inc c.pc else: result = Stop @@ -81,11 +139,11 @@ iterator items*(c: var CodeStream): Op = nextOpcode = c.next() proc `[]`*(c: CodeStream, offset: int): Op = - Op(c.bytes[offset]) + Op(c.codeView[offset]) proc peek*(c: var CodeStream): Op = - if c.pc < c.bytes.len: - result = Op(c.bytes[c.pc]) + if c.pc < c.codeLen: + result = Op(c.codeView[c.pc]) else: result = Stop @@ -103,7 +161,7 @@ when false: cs.pc = anchorPc proc isValidOpcode*(c: CodeStream, position: int): bool = - if position >= len(c): + if position >= c.codeLen: return false if position in c.invalidPositions: return false @@ -133,7 +191,12 @@ proc decompile*(original: var CodeStream): seq[(int, Op, string)] = if original.cached.len > 0: return original.cached result = @[] - var c = newCodeStream(original.bytes) + var c = newCodeStream(original.legacyCode) + if c.hasEOFCode: + let res = c.parseEOF + if res.isErr: + return + while true: var op = c.next if op >= Push1 and op <= Push32: @@ -157,4 +220,4 @@ proc hasSStore*(c: var CodeStream): bool = result = opcodes.anyIt(it[1] == Sstore) proc atEnd*(c: CodeStream): bool = - result = c.pc >= c.bytes.len + result = c.pc >= c.codeLen diff --git a/nimbus/evm/computation.nim b/nimbus/evm/computation.nim index 2a6d0f1f08..484d3f7e1d 100644 --- a/nimbus/evm/computation.nim +++ b/nimbus/evm/computation.nim @@ -11,10 +11,10 @@ import ".."/[db/accounts_cache, constants], "."/[code_stream, memory, message, stack, state], - "."/[types], + "."/[types, validate], ./interpreter/[gas_meter, gas_costs, op_codes], ../common/[common, evmforks], - ../utils/utils, + ../utils/[utils, eof], chronicles, chronos, eth/[keys], sets @@ -45,6 +45,7 @@ when defined(evmc_enabled): const evmc_enabled* = defined(evmc_enabled) + ErrLegacyCode* = "invalid code: EOF contract must not deploy legacy code" # ------------------------------------------------------------------------------ # Helpers @@ -225,7 +226,8 @@ proc newComputation*(vmState: BaseVMState, sysCall: bool, message: Message, result.msg = message result.memory = Memory() result.stack = newStack() - result.returnStack = @[] + # disable EIP-2315 + # result.returnStack = @[] result.gasMeter.init(message.gas) result.sysCall = sysCall @@ -237,6 +239,11 @@ proc newComputation*(vmState: BaseVMState, sysCall: bool, message: Message, result.code = newCodeStream( vmState.readOnlyStateDB.getCode(message.codeAddress)) + # EIP-4750 + result.returnStack = @[ + ReturnContext(section: 0, pc: 0, stackHeight: 0) + ] + proc newComputation*(vmState: BaseVMState, sysCall: bool, message: Message, code: seq[byte]): Computation = new result @@ -244,11 +251,17 @@ proc newComputation*(vmState: BaseVMState, sysCall: bool, result.msg = message result.memory = Memory() result.stack = newStack() - result.returnStack = @[] + # disable EIP-2315 + # result.returnStack = @[] result.gasMeter.init(message.gas) result.code = newCodeStream(code) result.sysCall = sysCall + # EIP-4750 + result.returnStack = @[ + ReturnContext(section: 0, pc: 0, stackHeight: 0) + ] + template gasCosts*(c: Computation): untyped = c.vmState.gasCosts @@ -313,12 +326,30 @@ proc writeContract*(c: Computation) if len == 0: return - # EIP-3541 constraint (https://eips.ethereum.org/EIPS/eip-3541). - if fork >= FkLondon and c.output[0] == 0xEF.byte: - withExtra trace, "New contract code starts with 0xEF byte, not allowed by EIP-3541" - c.setError(EVMC_CONTRACT_VALIDATION_FAILURE, true) + # Reject legacy contract deployment from EOF. + if c.initCodeEOF and not hasEOFMagic(c.output): + c.setError(ErrLegacyCode, true) return + # EIP-3541 constraint (https://eips.ethereum.org/EIPS/eip-3541). + if hasEOFByte(c.output): + if fork >= FkEOF: + var con: Container + let res = con.decode(c.output) + if res.isErr: + c.setError("EOF retcode parse error: " & res.error.toString, true) + return + + let vres = con.validateCode() + if vres.isErr: + c.setError("EOF retcode validate error: " & vres.error.toString, true) + return + + elif fork >= FkLondon: + withExtra trace, "New contract code starts with 0xEF byte, not allowed by EIP-3541" + c.setError(EVMC_CONTRACT_VALIDATION_FAILURE, true) + return + # EIP-170 constraint (https://eips.ethereum.org/EIPS/eip-3541). if fork >= FkSpurious and len > EIP170_MAX_CODE_SIZE: withExtra trace, "New contract code exceeds EIP-170 limit", diff --git a/nimbus/evm/interpreter/gas_costs.nim b/nimbus/evm/interpreter/gas_costs.nim index 332784e8cb..07ff95e0b5 100644 --- a/nimbus/evm/interpreter/gas_costs.nim +++ b/nimbus/evm/interpreter/gas_costs.nim @@ -20,6 +20,7 @@ type GasZero, # Nothing paid for operations of the set Wzero. GasBase, # Amount of gas to pay for operations of the set Wbase. GasVeryLow, # Amount of gas to pay for operations of the set Wverylow. + GasMidLow, # Introduced in Shanghai EIP4200 GasLow, # Amount of gas to pay for operations of the set Wlow. GasMid, # Amount of gas to pay for operations of the set Wmid. GasHigh, # Amount of gas to pay for operations of the set Whigh. @@ -721,6 +722,15 @@ template gasCosts(fork: EVMFork, prefix, ResultGasCostsName: untyped) = Log3: memExpansion `prefix gasLog3`, Log4: memExpansion `prefix gasLog4`, + # b0s: Function Operations + CallF: fixed GasLow, + RetF: fixed GasVeryLow, + + # e0s: Static jumps + RJump: fixed GasBase, + RJumpI: fixed GasMidLow, + RJumpV: fixed GasMidLow, + # f0s: System operations Create: complex `prefix gasCreate`, Call: complex `prefix gasCall`, @@ -741,6 +751,7 @@ const GasZero: 0'i64, GasBase: 2, GasVeryLow: 3, + GasMidLow: 4, # Introduced in Shanghai (EIP4200) GasLow: 5, GasMid: 8, GasHigh: 10, diff --git a/nimbus/evm/interpreter/op_codes.nim b/nimbus/evm/interpreter/op_codes.nim index 13a447d3b0..010c1d815e 100644 --- a/nimbus/evm/interpreter/op_codes.nim +++ b/nimbus/evm/interpreter/op_codes.nim @@ -128,6 +128,7 @@ type JumpDest = 0x5b, ## Mark a valid destination for jumps. This ## operation has no effect on machine state during ## execution. + Tload = 0x5c, ## Load word from transient storage. Tstore = 0x5d, ## Save word to transient storage. @@ -175,16 +176,25 @@ type Log4 = 0xa4, ## Append log record with four topics. Nop0xA5, Nop0xA6, Nop0xA7, Nop0xA8, Nop0xA9, Nop0xAA, - Nop0xAB, Nop0xAC, Nop0xAD, Nop0xAE, Nop0xAF, Nop0xB0, - Nop0xB1, Nop0xB2, Nop0xB3, Nop0xB4, Nop0xB5, Nop0xB6, + Nop0xAB, Nop0xAC, Nop0xAD, Nop0xAE, Nop0xAF, + + CallF = 0xb0, ## call a function (EIP4750) + RetF = 0xb1, ## return from a function (EIP4750) + + Nop0xB2, Nop0xB3, Nop0xB4, Nop0xB5, Nop0xB6, Nop0xB7, Nop0xB8, Nop0xB9, Nop0xBA, Nop0xBB, Nop0xBC, Nop0xBD, Nop0xBE, Nop0xBF, Nop0xC0, Nop0xC1, Nop0xC2, Nop0xC3, Nop0xC4, Nop0xC5, Nop0xC6, Nop0xC7, Nop0xC8, Nop0xC9, Nop0xCA, Nop0xCB, Nop0xCC, Nop0xCD, Nop0xCE, Nop0xCF, Nop0xD0, Nop0xD1, Nop0xD2, Nop0xD3, Nop0xD4, Nop0xD5, Nop0xD6, Nop0xD7, Nop0xD8, Nop0xD9, Nop0xDA, - Nop0xDB, Nop0xDC, Nop0xDD, Nop0xDE, Nop0xDF, Nop0xE0, - Nop0xE1, Nop0xE2, Nop0xE3, Nop0xE4, Nop0xE5, Nop0xE6, + Nop0xDB, Nop0xDC, Nop0xDD, Nop0xDE, Nop0xDF, + + Rjump = 0xe0, ## Relative jump (EIP4200) + RJumpI = 0xe1, ## Conditional relative jump (EIP4200) + RJumpV = 0xe2, ## Relative jump via jump table (EIP4200) + + Nop0xE3, Nop0xE4, Nop0xE5, Nop0xE6, Nop0xE7, Nop0xE8, Nop0xE9, Nop0xEA, Nop0xEB, Nop0xEC, Nop0xED, Nop0xEE, Nop0xEF, ## .. diff --git a/nimbus/evm/interpreter/op_dispatcher.nim b/nimbus/evm/interpreter/op_dispatcher.nim index b84bba7842..44a33150c2 100644 --- a/nimbus/evm/interpreter/op_dispatcher.nim +++ b/nimbus/evm/interpreter/op_dispatcher.nim @@ -105,6 +105,14 @@ proc toCaseStmt(forkArg, opArg, k: NimNode): NimNode = quote do: `forkCaseSubExpr` break + of RetF: + quote do: + `forkCaseSubExpr` + # EIP-4750: If returning from top frame, exit cleanly. + let c = `k`.cpt + if c.fork >= FkEOF and + c.returnStack.len == 0: + break else: # FIXME-manyOpcodesNowRequireContinuations # We used to have another clause in this case statement for various @@ -150,6 +158,11 @@ template genLowMemDispatcher*(fork: EVMFork; op: Op; k: Vm2Ctx) = case c.instr of Return, Revert, SelfDestruct: break + of RetF: + # EIP-4750: If returning from top frame, exit cleanly. + if fork >= FkEOF and + k.cpt.returnStack.len == 0: + break else: # FIXME-manyOpcodesNowRequireContinuations if not k.cpt.continuation.isNil: diff --git a/nimbus/evm/interpreter/op_handlers.nim b/nimbus/evm/interpreter/op_handlers.nim index 0bc0dbef24..7e4900f584 100644 --- a/nimbus/evm/interpreter/op_handlers.nim +++ b/nimbus/evm/interpreter/op_handlers.nim @@ -24,7 +24,7 @@ import ./op_handlers/[oph_defs, oph_arithmetic, oph_hash, oph_envinfo, oph_blockdata, oph_memory, oph_push, oph_dup, oph_swap, oph_log, - oph_create, oph_call, oph_sysops] + oph_create, oph_call, oph_sysops, oph_function] const allHandlersList = @[ @@ -40,7 +40,8 @@ const (vm2OpExecLog, "Log"), (vm2OpExecCreate, "Create"), (vm2OpExecCall, "Call"), - (vm2OpExecSysOp, "SysOp")] + (vm2OpExecSysOp, "SysOp"), + (vm2OpExecFunction, "Function")] # ------------------------------------------------------------------------------ # Helper @@ -74,13 +75,6 @@ proc mkOpTable(selected: EVMFork): array[Op,Vm2OpExec] {.compileTime.} = # Public functions # ------------------------------------------------------------------------------ -#const -# vm2OpHandlers* = block: -# var rc: array[Fork, array[Op, Vm2OpExec]] -# for w in Fork: -# rc[w] = w.mkOpTable -# rc - type vmOpHandlersRec* = tuple name: string ## Name (or ID) of op handler diff --git a/nimbus/evm/interpreter/op_handlers/oph_call.nim b/nimbus/evm/interpreter/op_handlers/oph_call.nim index 8b8b744ab8..359041a692 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_call.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_call.nim @@ -519,7 +519,7 @@ const post: vm2OpIgnore)), (opCode: CallCode, ## 0xf2, Message-Call with alternative code - forks: Vm2OpAllForks, + forks: Vm2OpAllForks - Vm2OpEOFAndLater, name: "callCode", info: "Message-call into this account with alternative account's code", exec: (prep: vm2OpIgnore, diff --git a/nimbus/evm/interpreter/op_handlers/oph_defs.nim b/nimbus/evm/interpreter/op_handlers/oph_defs.nim index 789ec8127b..acb6c6668a 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_defs.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_defs.nim @@ -89,6 +89,8 @@ const Vm2OpCancunAndLater* = Vm2OpShanghaiAndLater - {FkShanghai} + Vm2OpEOFAndLater* = Vm2OpCancunAndLater + # ------------------------------------------------------------------------------ # End # ------------------------------------------------------------------------------ diff --git a/nimbus/evm/interpreter/op_handlers/oph_envinfo.nim b/nimbus/evm/interpreter/op_handlers/oph_envinfo.nim index d8ab1b9b69..6921536cd3 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_envinfo.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_envinfo.nim @@ -145,7 +145,7 @@ const cpt.gasCosts[CodeCopy].m_handler(cpt.memory.len, memPos, len), reason = "CodeCopy fee") - cpt.memory.writePadded(cpt.code.bytes, memPos, copyPos, len) + cpt.memory.writePadded(cpt.code.legacyCode, memPos, copyPos, len) gasPriceOp: Vm2OpFn = proc (k: var Vm2Ctx) = diff --git a/nimbus/evm/interpreter/op_handlers/oph_function.nim b/nimbus/evm/interpreter/op_handlers/oph_function.nim new file mode 100644 index 0000000000..de5097cc7f --- /dev/null +++ b/nimbus/evm/interpreter/op_handlers/oph_function.nim @@ -0,0 +1,78 @@ +# Nimbus +# Copyright (c) 2018 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or distributed except +# according to those terms. + +## EVM Opcode Handlers: Function Operations +## ====================================== +## + +import + ../../../errors, + ../../code_stream, + ../../stack, + ../../types, + ../op_codes, + ./oph_defs + +# ------------------------------------------------------------------------------ +# Private, op handlers implementation +# ------------------------------------------------------------------------------ + +const + callfOp: Vm2OpFn = proc (k: var Vm2Ctx) = + ## 0xb0, Call a function. + let + idx = k.cpt.code.readInt16() + typ = k.cpt.code.getType(idx) + + if k.cpt.stack.len + typ.maxStackHeight.int >= 1024: + raise newException( + StackDepthError, "CallF stack overflow") + + k.cpt.returnStack.add ReturnContext( + section : k.cpt.code.section, + pc : k.cpt.code.pc, + stackHeight: k.cpt.stack.len - typ.input.int + ) + + k.cpt.code.setSection(idx) + k.cpt.code.pc = 0 + + retfOp: Vm2OpFn = proc (k: var Vm2Ctx) = + ## 0x50, Return from a function. + let ctx = k.cpt.returnStack.pop() + k.cpt.code.setSection(ctx.section) + k.cpt.code.pc = ctx.pc + +# ------------------------------------------------------------------------------ +# Public, op exec table entries +# ------------------------------------------------------------------------------ + +const + vm2OpExecFunction*: seq[Vm2OpExec] = @[ + + (opCode: CallF, ## 0xb0, Call a function + forks: Vm2OpEOFAndLater, + name: "CallF", + info: "Create a new account with associated code", + exec: (prep: vm2OpIgnore, + run: callfOp, + post: vm2OpIgnore)), + + (opCode: RetF, ## 0xb1, Return from a function + forks: Vm2OpEOFAndLater, + name: "RetF", + info: "Behaves identically to CREATE, except using keccak256", + exec: (prep: vm2OpIgnore, + run: retfOp, + post: vm2OpIgnore))] + +# ------------------------------------------------------------------------------ +# End +# ------------------------------------------------------------------------------ diff --git a/nimbus/evm/interpreter/op_handlers/oph_memory.nim b/nimbus/evm/interpreter/op_handlers/oph_memory.nim index 74ee84357c..a67318fed2 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_memory.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_memory.nim @@ -315,6 +315,39 @@ const k.cpt.memory.copy(dstPos, srcPos, len) + rjumpOp: Vm2OpFn = proc (k: var Vm2Ctx) = + ## 0xe0, Relative jump (EIP4200) + let relativeOffset = k.cpt.code.readInt16() + k.cpt.code.pc += relativeOffset + + rjumpiOp: Vm2OpFn = proc (k: var Vm2Ctx) = + ## 0xe1, Conditional relative jump (EIP4200) + let condition = k.cpt.stack.popInt() + if condition.isZero: + # Not branching, just skip over immediate argument. + k.cpt.code.pc += 2 + return + + k.rjumpOp() + + rjumpvOp: Vm2OpFn = proc (k: var Vm2Ctx) = + ## 0xe2, Relative jump via jump table (EIP4200) + let count = k.cpt.code.readByte().int + let savedPC = k.cpt.code.pc + let idx = k.cpt.stack.popInt().truncate(int) + if idx >= count: + # Index out-of-bounds, don't branch, just skip over immediate + # argument. + k.cpt.code.pc += count * 2 + return + + # get jump table entry + k.cpt.code.pc += idx * 2 + let relativeOffset = k.cpt.code.readInt16() + + # move pc past operand(1 + count * 2) and relativeOffset + k.cpt.code.pc = savedPC + count * 2 + relativeOffset + #[ EIP-2315: temporary disabled Reason : not included in berlin hard fork @@ -455,7 +488,7 @@ const post: vm2OpIgnore)), (opCode: Jump, ## 0x56, Jump - forks: Vm2OpAllForks, + forks: Vm2OpAllForks - Vm2OpEOFAndLater, name: "jump", info: "Alter the program counter", exec: (prep: vm2OpIgnore, @@ -463,7 +496,7 @@ const post: vm2OpIgnore)), (opCode: JumpI, ## 0x57, Conditional jump - forks: Vm2OpAllForks, + forks: Vm2OpAllForks - Vm2OpEOFAndLater, name: "jumpI", info: "Conditionally alter the program counter", exec: (prep: vm2OpIgnore, @@ -471,7 +504,7 @@ const post: vm2OpIgnore)), (opCode: Pc, ## 0x58, Program counter prior to instruction - forks: Vm2OpAllForks, + forks: Vm2OpAllForks - Vm2OpEOFAndLater, name: "pc", info: "Get the value of the program counter prior to the increment "& "corresponding to this instruction", @@ -527,6 +560,30 @@ const info: "Copy memory", exec: (prep: vm2OpIgnore, run: mCopyOp, + post: vm2OpIgnore)), + + (opCode: RJump, ## 0xe0, Relative jump (EIP4200) + forks: Vm2OpEOFAndLater, + name: "RJump", + info: "Relative jump via jump table", + exec: (prep: vm2OpIgnore, + run: rjumpOp, + post: vm2OpIgnore)), + + (opCode: RJumpI, ## 0xe1, Conditional relative jump (EIP4200) + forks: Vm2OpEOFAndLater, + name: "RJumpI", + info: "Relative jump via jump table", + exec: (prep: vm2OpIgnore, + run: rjumpiOp, + post: vm2OpIgnore)), + + (opCode: RJumpV, ## 0xe2, Relative jump via jump table (EIP4200) + forks: Vm2OpEOFAndLater, + name: "RJumpV", + info: "Relative jump via jump table", + exec: (prep: vm2OpIgnore, + run: rjumpvOp, post: vm2OpIgnore))] #[ diff --git a/nimbus/evm/interpreter/op_handlers/oph_sysops.nim b/nimbus/evm/interpreter/op_handlers/oph_sysops.nim index 9870bc9a4c..f467ef4c76 100644 --- a/nimbus/evm/interpreter/op_handlers/oph_sysops.nim +++ b/nimbus/evm/interpreter/op_handlers/oph_sysops.nim @@ -209,7 +209,7 @@ const post: vm2OpIgnore)), (opCode: SelfDestruct, ## 0xff, EIP2929: self destruct, Berlin and later - forks: Vm2OpBerlinAndLater, + forks: Vm2OpBerlinAndLater - Vm2OpEOFAndLater, name: "selfDestructEIP2929", info: "EIP2929: Halt execution and register account for later deletion", exec: (prep: vm2OpIgnore, diff --git a/nimbus/evm/interpreter_dispatch.nim b/nimbus/evm/interpreter_dispatch.nim index d2f78ef907..c8d0a60632 100644 --- a/nimbus/evm/interpreter_dispatch.nim +++ b/nimbus/evm/interpreter_dispatch.nim @@ -15,12 +15,12 @@ const import std/[macros, strformat], - pkg/[chronicles, chronos, stew/byteutils], ".."/[constants, db/accounts_cache], - "."/[code_stream, computation], + "."/[code_stream, computation, validate], "."/[message, precompiles, state, types], - ./async/operations, - ./interpreter/[op_dispatcher, gas_costs] + ../utils/eof, + ./interpreter/[op_dispatcher, gas_costs], + pkg/[chronicles, chronos, eth/keys, stew/byteutils] {.push raises: [].} @@ -108,13 +108,22 @@ proc selectVM(c: Computation, fork: EVMFork, shouldPrepareTracer: bool) genLowMemDispatcher(fork, c.instr, desc) -proc beforeExecCall(c: Computation) = +proc beforeExecCall(c: Computation): bool {.gcsafe, raises: [ValueError].} = c.snapshot() if c.msg.kind == EVMC_CALL: c.vmState.mutateStateDB: db.subBalance(c.msg.sender, c.msg.value) db.addBalance(c.msg.contractAddress, c.msg.value) + if c.fork >= FkEOF: + if c.code.hasEOFCode: + # Code was already validated, so no other errors should be possible. + # TODO: what should we do if there is really an error? + let res = c.code.parseEOF() + if res.isErr: + c.setError(res.error.toString, false) + return true + proc afterExecCall(c: Computation) = ## Collect all of the accounts that *may* need to be deleted based on EIP161 ## https://github.com/ethereum/EIPs/blob/master/EIPS/eip-161.md @@ -145,6 +154,26 @@ proc beforeExecCreate(c: Computation): bool if c.fork >= FkBerlin: db.accessList(c.msg.contractAddress) + let isCallerEOF = c.vmState.readOnlyStateDB.hasEOFCode(c.msg.sender) + c.initCodeEOF = c.code.hasEOFCode + + if c.fork >= FkEOF: + if isCallerEOF and not c.initcodeEOF: + # Don't allow EOF contract to run legacy initcode. + c.setError(ErrLegacyCode, false) + return true + elif c.initcodeEOF: + # If the initcode is EOF, verify it is well-formed. + let res = c.code.parseEOF() + if res.isErr: + c.setError("EOF initcode parse error: " & res.error.toString, false) + return true + + let vres = c.code.container.validateCode() + if vres.isErr: + c.setError("EOF initcode validation error: " & vres.error.toString, false) + return true + c.snapshot() if c.vmState.readOnlyStateDB().hasCodeOrNonce(c.msg.contractAddress): @@ -206,7 +235,6 @@ proc beforeExec(c: Computation): bool if not c.msg.isCreate: c.beforeExecCall() - false else: c.beforeExecCreate() @@ -337,6 +365,9 @@ else: # to write the async version of the iterative one, but this one is # a bit shorter and feels cleaner, so if it works just as well I'd # rather use this one. --Adam +import + async/operations + proc asyncExecCallOrCreate*(c: Computation): Future[void] {.async.} = defer: c.dispose() diff --git a/nimbus/evm/stack_table.nim b/nimbus/evm/stack_table.nim new file mode 100644 index 0000000000..bf7a56ebf5 --- /dev/null +++ b/nimbus/evm/stack_table.nim @@ -0,0 +1,243 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + +import + interpreter/op_codes, + ../common/evmforks + +type + StackDesc* = object + min*: int + max*: int + enabled*: bool + + StackTable* = array[Op, StackDesc] + +const + StackLimit* = 1024 + +func maxStack(pop, push: int): int {.compileTime.} = + StackLimit + pop - push + +func minStack(pops, push: int): int {.compileTime.} = + pops + +func minSwapStack(n: int): int {.compileTime.} = + minStack(n, n) + +func maxSwapStack(n: int): int {.compileTime.} = + maxStack(n, n) + +func minDupStack(n: int): int {.compileTime.} = + minStack(n, n+1) + +func maxDupStack(n: int): int {.compileTime.} = + maxStack(n, n+1) + +template sm(op: Op, a, b: int): untyped = + (op, StackDesc( + min: minStack(a, b), + max: maxStack(a, b), + enabled: true) + ) + +template sp(a, b: int): untyped = + StackDesc( + min: minStack(a, b), + max: maxStack(a, b), + enabled: true + ) + +template sd(x: int): untyped = + StackDesc( + min: minDupStack(x), + max: maxDupStack(x), + enabled: true + ) + +template ss(x: int): untyped = + StackDesc( + min: minSwapStack(x), + max: maxSwapStack(x), + enabled: true + ) + +const + BaseStackTable = [ + sm(Stop, 0, 0), + sm(Add, 2, 1), + sm(Mul, 2, 1), + sm(Sub, 2, 1), + sm(Div, 2, 1), + sm(Sdiv, 2, 1), + sm(Mod, 2, 1), + sm(Smod, 2, 1), + sm(Addmod, 3, 1), + sm(Mulmod, 3, 1), + sm(Exp, 2, 1), + sm(SignExtend, 2, 1), + sm(Lt, 2, 1), + sm(Gt, 2, 1), + sm(Slt, 2, 1), + sm(Sgt, 2, 1), + sm(Eq, 2, 1), + sm(IsZero, 1, 1), + sm(And, 2, 1), + sm(Or, 2, 1), + sm(Xor, 2, 1), + sm(Not, 1, 1), + sm(Byte, 2, 1), + sm(Sha3, 2, 1), + sm(Address, 0, 1), + sm(Balance, 1, 1), + sm(Origin, 0, 1), + sm(Caller, 0, 1), + sm(CallValue, 0, 1), + sm(CallDataLoad, 1, 1), + sm(CallDataSize, 0, 1), + sm(CallDataCopy, 3, 0), + sm(CodeSize, 0, 1), + sm(CodeCopy, 3, 0), + sm(GasPrice, 0, 1), + sm(ExtCodeSize, 1, 1), + sm(ExtCodeCopy, 4, 0), + sm(Blockhash, 1, 1), + sm(Coinbase, 0, 1), + sm(Timestamp, 0, 1), + sm(Number, 0, 1), + sm(Difficulty, 0, 1), + sm(GasLimit, 0, 1), + sm(Pop, 1, 0), + sm(Mload, 1, 1), + sm(Mstore, 2, 0), + sm(Mstore8, 2, 0), + sm(Sload, 1, 1), + sm(Sstore, 2, 0), + sm(Jump, 1, 0), + sm(JumpI, 2, 0), + sm(Pc, 0, 1), + sm(Msize, 0, 1), + sm(Gas, 0, 1), + sm(JumpDest, 0, 0), + sm(Log0, 2, 0), + sm(Log1, 3, 0), + sm(Log2, 4, 0), + sm(Log3, 5, 0), + sm(Log4, 6, 0), + sm(Create, 3, 1), + sm(Call, 7, 1), + sm(CallCode, 7, 1), + sm(Return, 2, 0), + sm(SelfDestruct, 1, 0), + sm(Invalid, 0, 0), + ] + +proc frontierStackTable(): StackTable {.compileTime.} = + for x in BaseStackTable: + result[x[0]] = x[1] + + for x in Push1..Push32: + result[x] = sp(0, 1) + + for x in Dup1..Dup16: + result[x] = sd(x.int-Dup1.int+1) + + for x in Swap1..Swap16: + result[x] = ss(x.int-Swap1.int+2) + +proc homesteadStackTable(): StackTable {.compileTime.} = + result = frontierStackTable() + result[DelegateCall] = sp(6, 1) + +proc byzantiumStackTable(): StackTable {.compileTime.} = + result = homesteadStackTable() + result[StaticCall] = sp(6, 1) + result[ReturnDataSize] = sp(0, 1) + result[ReturnDataCopy] = sp(3, 0) + result[Revert] = sp(2, 0) + +proc constantinopleStackTable(): StackTable {.compileTime.} = + result = byzantiumStackTable() + result[Shl] = sp(2, 1) + result[Shr] = sp(2, 1) + result[Sar] = sp(2, 1) + result[ExtCodeHash] = sp(1, 1) + result[Create2] = sp(4, 1) + +proc istanbulStackTable(): StackTable {.compileTime.} = + result = constantinopleStackTable() + # new opcodes EIP-1344 + result[ChainIdOp] = sp(0, 1) + # new opcodes EIP-1884 + result[SelfBalance] = sp(0, 1) + +proc londonStackTable(): StackTable {.compileTime.} = + result = istanbulStackTable() + # new opcodes EIP-3198 + result[BaseFee] = sp(0, 1) + +proc mergeStackTable(): StackTable {.compileTime.} = + result = londonStackTable() + result[PrevRandao] = sp(0, 1) + +proc shanghaiStackTable(): StackTable {.compileTime.} = + result = mergeStackTable() + # new opcodes EIP-3855 + result[Push0] = sp(0, 1) + +proc cancunStackTable(): StackTable {.compileTime.} = + result = shanghaiStackTable() + # new opcodes EIP-4844 + result[BlobHash] = sp(1, 1) + + # new opcodes EIP-1153 + result[TLoad] = sp(1, 1) + result[TStore] = sp(2, 0) + + # new opcodes EIP-5656 + result[Mcopy] = sp(3, 0) + + # new opcodes EIP-4200 + result[Rjump] = sp(0, 0) + result[RJumpI] = sp(1, 0) + result[RJumpV] = sp(1, 0) + + # new opcodes EIP-4750 + result[CallF] = sp(0, 0) + result[RetF] = sp(0, 0) + + # new opcodes EIP-7516 + result[BlobBaseFee] = sp(1, 1) + + # disable opcodes EIP-3670 + result[CallCode] = StackDesc() + result[SelfDestruct] = StackDesc() + + # disable opcodes EIP-5450 + result[Jump] = StackDesc() + result[JumpI] = StackDesc() + result[Pc] = StackDesc() + +const + EVMForksStackTable*: array[EVMFork, StackTable] = [ + frontierStackTable(), + homesteadStackTable(), + homesteadStackTable(), + homesteadStackTable(), + byzantiumStackTable(), + constantinopleStackTable(), + constantinopleStackTable(), + istanbulStackTable(), + istanbulStackTable(), + londonStackTable(), + mergeStackTable(), + shanghaiStackTable(), + cancunStackTable(), + ] diff --git a/nimbus/evm/types.nim b/nimbus/evm/types.nim index f9c23202a2..6e90ec99cf 100644 --- a/nimbus/evm/types.nim +++ b/nimbus/evm/types.nim @@ -72,13 +72,20 @@ type gasCosts* : GasCosts asyncFactory* : AsyncOperationFactory + # EIP-4750 + ReturnContext* = object + section*: int + pc*: int + stackHeight*: int + Computation* = ref object # The execution computation vmState*: BaseVMState msg*: Message memory*: Memory stack*: Stack - returnStack*: seq[int] + # disable EIP-2315 + # returnStack*: seq[int] gasMeter*: GasMeter code*: CodeStream output*: seq[byte] @@ -96,6 +103,8 @@ type pendingAsyncOperation*: Future[void] continuation*: proc() {.gcsafe, raises: [CatchableError].} sysCall*: bool + initcodeEOF*: bool + returnStack*: seq[ReturnContext] Error* = ref object statusCode*: evmc_status_code diff --git a/nimbus/evm/validate.nim b/nimbus/evm/validate.nim new file mode 100644 index 0000000000..bc9811e0c4 --- /dev/null +++ b/nimbus/evm/validate.nim @@ -0,0 +1,267 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + + +# EIP-3670: EOF - Code Validation +# EIP-5450: EOF - Stack Validation + +import + std/[tables, strutils], + stew/[results, endians2], + ../utils/eof, + ../common/evmforks, + ./interpreter/op_codes, + ./analysis, + ./stack_table + +type + OpDefined = array[Op, bool] + +template EOFStackTable(): untyped = + EVMForksStackTable[FkEOF] + +func isTerminal(op: Op): bool = + case op + of RJUMP, RETF, STOP, RETURN, REVERT, INVALID: + true + else: + false + +proc parseUint16(code: openArray[byte], pos: int): int = + fromBytesBE(uint16, toOpenArray(code, pos, pos+2-1)).int + +proc parseInt16(code: openArray[byte], pos: int): int = + let x = fromBytesBE(uint16, toOpenArray(code, pos, pos+2-1)) + cast[int16](x).int + +# checkDest parses a relative offset at code[0:2] and checks if it is a valid jump destination. +proc checkDest(code: openArray[byte], analysis: Bitvec, + imm, src, length: int): Result[void, EOFV1Error] = + if code.len < imm+2: + return err(eofErr(ErrUnexpectedEOF, code.len)) + + let offset = parseInt16(code, imm) + let dest = src + offset + if dest < 0 or dest >= length: + return err(eofErr(ErrInvalidJumpDest, + imm, + "relative offset out-of-bounds: offset $1, dest $2" % + [$offset, $dest])) + + if not analysis.codeSegment(dest): + return err(eofErr(ErrInvalidJumpDest, + imm, + "relative offset into immediate value: offset $1, dest $2" % + [$offset, $dest])) + + ok() + +proc stackOverflow(pos: int, len: int, limit: int, msg = ""): EOFV1Error = + if msg.len == 0: + eofErr(ErrStackOverflow, pos, "len: $1, limit: $2" % [$len, $limit]) + else: + eofErr(ErrStackOverflow, pos, "len: $1, limit: $2, $3" % [$len, $limit, msg]) + +proc stackUnderflow(pos: int, len: int, req: int, msg = ""): EOFV1Error = + if msg.len == 0: + eofErr(ErrStackUnderflow, pos, "($1 <=> $2)" % [$len, $req]) + else: + eofErr(ErrStackUnderflow, pos, "($1 <=> $2), $3" % [$len, $req, msg]) + +# validateControlFlow iterates through all possible branches the provided code +# value and determines if it is valid per EOF v1. +proc validateControlFlow(code: openArray[byte], + section: int, + metadata: openArray[FunctionMetadata], + st: StackTable): Result[int, EOFV1Error] = + var + heights = initTable[int, int]() + worklist = @[(0, metadata[section].input.int)] + maxStackHeight = metadata[section].input.int + + while worklist.len > 0: + var (pos, height) = worklist.pop() + + block outer: + while pos < code.len: + let op = Op(code[pos]) + + # Check if pos has already be visited; if so, the stack heights should be the same. + heights.withValue(pos, val) do: + let want = val[] + if height != want: + return err(eofErr(ErrConflictingStack, pos, + "have $1, want $2" % [$height, $want])) + # Already visited this path and stack height + # matches. + break + heights[pos] = height + + # Validate height for current op and update as needed. + if st[op].min > height: + return err(stackUnderflow(pos, height, st[op].min)) + + if st[op].max < height: + return err(stackOverflow(pos, height, st[op].max)) + + height += StackLimit - st[op].max + + case op + of CALLF: + let arg = parseUint16(code, pos+1) + if metadata[arg].input.int > height: + return err(stackUnderflow(pos, height, metadata[arg].input.int, + "CALLF underflow to section " & $arg)) + + if metadata[arg].output.int+height > StackLimit: + return err(stackOverflow(pos, metadata[arg].output.int+height, StackLimit, + "CALLF overflow to section " & $arg)) + + height -= metadata[arg].input.int + height += metadata[arg].output.int + pos += 3 + of RETF: + if int(metadata[section].output) != height: + return err(eofErr(ErrInvalidOutputs, pos, + "have $1, want $1" % + [$metadata[section].output, $height])) + break outer + of RJUMP: + let arg = parseInt16(code, pos+1) + pos += 3 + arg + of RJUMPI: + let arg = parseInt16(code, pos+1) + worklist.add((pos + 3 + arg, height)) + pos += 3 + of RJUMPV: + let count = int(code[pos+1]) + for i in 0 ..< count: + let arg = parseInt16(code, pos+2+2*i) + worklist.add((pos + 2 + 2*count + arg, height)) + pos += 2 + 2*count + else: + if op >= PUSH1 and op <= PUSH32: + pos += 1 + op.int-PUSH0.int + elif isTerminal(op): + break outer + else: + # Simple op, no operand. + pos += 1 + + maxStackHeight = max(maxStackHeight, height) + + if maxStackHeight != metadata[section].maxStackHeight.int: + return err(eofErr(ErrInvalidMaxStackHeight, 0, + "at code section $1, have $2, want $3" % + [$section, $metadata[section].maxStackHeight, $maxStackHeight])) + + ok(heights.len) + +# validateCode validates the code parameter against the EOF v1 validity requirements. +proc validateCode(code: openArray[byte], + section: int, + metadata: openArray[FunctionMetadata], + st: StackTable): Result[void, EOFV1Error] = + var + i = 0 + # Tracks the number of actual instructions in the code (e.g. + # non-immediate values). This is used at the end to determine + # if each instruction is reachable. + count = 0 + analysis = eofCodeBitmap(code) + op: Op + + # This loop visits every single instruction and verifies: + # * if the instruction is valid for the given jump table. + # * if the instruction has an immediate value, it is not truncated. + # * if performing a relative jump, all jump destinations are valid. + # * if changing code sections, the new code section index is valid and + # will not cause a stack overflow. + while i < code.len: + inc count + op = Op(code[i]) + if not st[op].enabled: + return err(eofErr(ErrUndefinedInstruction, + i, "opcode=" & $op)) + + case op + of PUSH1..PUSH32: + let size = op.int - PUSH0.int + if code.len <= i+size: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + i += size + of RJUMP, RJUMPI: + if code.len <= i+2: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + let res = checkDest(code, analysis, i+1, i+3, code.len) + if res.isErr: + return res + i += 2 + of RJUMPV: + if code.len <= i+1: + return err(eofErr(ErrTruncatedImmediate, + i, "jump table size missing")) + let count = int(code[i+1]) + if count == 0: + return err(eofErr(ErrInvalidBranchCount, + i, "must not be 0")) + if code.len <= i+count: + return err(eofErr(ErrTruncatedImmediate, + i, "jump table truncated")) + for j in 0 ..< count: + let res = checkDest(code, analysis, i+2+j*2, i+2*count+2, code.len) + if res.isErr: + return res + i += 1 + 2*count + of CALLF: + if i+2 >= code.len: + return err(eofErr(ErrTruncatedImmediate, + i, "op=" & $op)) + let arg = parseUint16(code, i+1) + if arg >= metadata.len: + return err(eofErr(ErrInvalidSectionArgument, + i, "arg $1, last section $2" % [$arg, $metadata.len])) + i += 2 + else: + discard + inc i + + # Code sections may not "fall through" and require proper termination. + # Therefore, the last instruction must be considered terminal. + if not isTerminal(op): + return err(eofErr(ErrInvalidCodeTermination, + i, "ends with op " & $op)) + + let res = validateControlFlow(code, section, metadata, st) + if res.isErr: + return err(res.error) + + let paths = res.get() + if paths != count: + # TODO: return actual unreachable position + return err(eofErr(ErrUnreachableCode, 0, "")) + + ok() + +proc validateCode*(code: openArray[byte], section: int, + metadata: openArray[FunctionMetadata]): Result[void, EOFV1Error] = + validateCode(code, section, metadata, EOFStackTable) + +# ValidateCode validates each code section of the container against the EOF v1 +# rule set. +proc validateCode*(c: Container): Result[void, EOFV1Error] = + for i in 0 ..< c.code.len: + let res = validateCode(c.code[i], i, c.types) + if res.isErr: + return res + + ok() diff --git a/nimbus/utils/eof.nim b/nimbus/utils/eof.nim new file mode 100644 index 0000000000..d1f3b6921b --- /dev/null +++ b/nimbus/utils/eof.nim @@ -0,0 +1,348 @@ +# Nimbus +# Copyright (c) 2023 Status Research & Development GmbH +# Licensed under either of +# * Apache License, version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or +# http://www.apache.org/licenses/LICENSE-2.0) +# * MIT license ([LICENSE-MIT](LICENSE-MIT) or +# http://opensource.org/licenses/MIT) +# at your option. This file may not be copied, modified, or +# distributed except according to those terms. + + +# https://eips.ethereum.org/EIPS/eip-3540 +# EIP-3540: EOF - EVM Object Format v1 + +import + std/strutils, + stew/[results, byteutils, endians2] + +type + Blob = seq[byte] + + Container* = object + types*: seq[FunctionMetaData] + code*: seq[Blob] + data*: Blob + size*: int + + FunctionMetaData* = object + input*: uint8 + output*: uint8 + maxStackHeight*: uint16 + + Section = object + kind: uint8 + size: uint16 + + SectionList = object + kind: uint8 + list: seq[uint16] + + EOFV1ErrorKind* = enum + ErrNoEOFErr + ErrUnexpectedEOF = "unexpected End of Data" + ErrInvalidMagic = "invalid magic" + ErrInvalidVersion = "invalid version" + ErrMissingTypeHeader = "missing type header" + ErrInvalidTypeSize = "invalid type section size" + ErrMissingCodeHeader = "missing code header" + ErrInvalidCodeHeader = "invalid code header" + ErrInvalidCodeSize = "invalid code size" + ErrMissingDataHeader = "missing data header" + ErrMissingTerminator = "missing header terminator" + ErrTooManyInputs = "invalid type content, too many inputs" + ErrTooManyOutputs = "invalid type content, too many inputs" + ErrInvalidSection0Type = "invalid section 0 type, input and output should be zero" + ErrTooLargeMaxStackHeight = "invalid type content, max stack height exceeds limit" + ErrInvalidContainerSize = "invalid container size" + + # validation error + ErrUndefinedInstruction = "undefined instrustion" + ErrTruncatedImmediate = "truncated immediate" + ErrInvalidSectionArgument = "invalid section argument" + ErrInvalidJumpDest = "invalid jump destination" + ErrConflictingStack = "conflicting stack height" + ErrInvalidBranchCount = "invalid number of branches in jump table" + ErrInvalidOutputs = "invalid number of outputs" + ErrInvalidMaxStackHeight = "invalid max stack height" + ErrInvalidCodeTermination = "invalid code termination" + ErrUnreachableCode = "unreachable code" + ErrStackUnderflow = "stack underflow" + ErrStackOverflow = "stack overflow" + + EOFV1Error* = object + kind*: EOFV1ErrorKind + pos* : int + msg* : string + +const + offsetTypesKind = 3 + offsetCodeKind = 6 + + kindTypes = 1.uint8 + kindCode = 2.uint8 + kindData = 3.uint8 + + eofFormatByte = 0xef.byte + eof1Version = 1.byte + eofMagicLen = 2 + eofMagic0 = 0xef.byte + eofMagic1 = 0x00.byte + + maxInputItems = 127 + maxOutputItems = 127 + maxStackHeight = 1023 + +proc toString*(p: EOFV1Error): string = + if p.msg.len == 0: + return "$1 at position $2" % [$p.kind, $p.pos] + "$1 at position $2, $3" % [$p.kind, $p.pos, p.msg] + +proc eofErr*(kind: EOFV1ErrorKind, pos: int): EOFV1Error = + EOFV1Error(kind: kind, pos: pos) + +proc eofErr*(kind: EOFV1ErrorKind, pos: int, msg: string): EOFV1Error = + EOFV1Error(kind: kind, pos: pos, msg: msg) + +# HasEOFByte returns true if code starts with 0xEF byte +func hasEOFByte*(code: openArray[byte]): bool = + code.len != 0 and code[0] == eofFormatByte + +# hasEOFMagic returns true if code starts with magic defined by EIP-3540 +func hasEOFMagic*(code: openArray[byte]): bool = + eofMagicLen <= code.len and + eofMagic0 == code[0] and + eofMagic1 == code[1] + +# isEOFVersion1 returns true if the code's version byte equals eof1Version. It +# does not verify the EOF magic is valid. +func isEOFVersion1(code: openArray[byte]): bool = + eofMagicLen < code.len and + code[2] == eof1Version + +# parseSection decodes a (kind, size) pair from an EOF header. +func parseSection(s: var Section, b: openArray[byte], idx: int): Result[void, EOFV1Error] = + if idx+3 > b.len: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + s = Section( + kind: uint8(b[idx]), + size: uint16.frombytesBE(toOpenArray(b, idx+1, idx+1+2-1)) + ) + + ok() + +# parseList decodes a list of uint16.. +func parseList(s: var SectionList, b: openArray[byte], idx: int): Result[void, EOFV1Error] = + if b.len < idx+2: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + let count = frombytesBE(uint16, toOpenArray(b, idx, idx+2-1)).int + if b.len < idx+2+count*2: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + s.list = newSeq[uint16](count) + for i in 0..= b.len: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + s.kind = b[idx].uint8 + let res = parseList(s, b, idx+1) + if res.isErr: + return res + + ok() + +func size(s: SectionList): int = + for x in s.list: + result += x.int + +# decodes an EOF container. +proc decode*(c: var Container, b: openArray[byte]): Result[void, EOFV1Error] = + if b.len < eofMagicLen: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + if not b.hasEOFMagic: + let z = min(2, b.len) + return err(eofErr(ErrInvalidMagic, + 0, "have 0x$1, want 0xEF00" % [toOpenArray(b, 0, z-1).toHex])) + + if not b.isEOFVersion1: + var have = "" + if b.len >= 3: + have = $(b[2].int) + return err(eofErr(ErrInvalidVersion, + 2, "have $1, want $2" % [have, $(eof1Version.int)])) + + # Parse type section header. + var types: Section + var res = types.parseSection(b, offsetTypesKind) + if res.isErr: + return res + + if types.kind != kindTypes: + return err(eofErr(ErrMissingTypeHeader, + offsetTypesKind, + "found section kind $1 instead" % [toHex(types.kind.int, 2)])) + + if types.size < 4 or ((types.size mod 4) != 0): + return err(eofErr(ErrInvalidTypeSize, + offsetTypesKind+1, + "type section size must be divisible by 4: have " & $types.size)) + + let typesSize = types.size.int div 4 + if typesSize > 1024: + return err(eofErr(ErrInvalidTypeSize, + offsetTypesKind+1, + "type section must not exceed 4*1024: have " & $(typesSize*4))) + + # Parse code section header. + var code: SectionList + res = code.parseSectionList(b, offsetCodeKind) + if res.isErr: + return res + + if code.kind != kindCode: + return err(eofErr(ErrMissingCodeHeader, + offsetCodeKind, "found section kind $1 instead" % + [toHex(code.kind.int, 2)])) + + if code.list.len != typesSize: + return err(eofErr(ErrInvalidCodeSize, + offsetCodeKind+1, + "mismatch of code sections count and type signatures: types $1, code $2" % + [$typessize, $code.list.len])) + + # Parse data section header. + let offsetDataKind = offsetCodeKind + 2 + 2*code.list.len + 1 + var data: Section + res = data.parseSection(b, offsetDataKind) + if res.isErr: + return res + + if data.kind != kindData: + return err(eofErr(ErrMissingDataHeader, + offsetDataKind, "found section kind $1 instead" % + [toHex(data.kind.int, 2)])) + + # Check for terminator. + let offsetTerminator = offsetDataKind + 3 + if b.len <= offsetTerminator: + return err(eofErr(ErrUnexpectedEOF, b.len)) + + if b[offsetTerminator] != 0: + return err(eofErr(ErrMissingTerminator, + offsetTerminator, + "have " & $(b[offsetTerminator].int))) + + # Verify overall container size. + c.size = offsetTerminator + types.size.int + code.size + data.size.int + 1 + if b.len != c.size: + return err(eofErr(ErrInvalidContainerSize, 0, + "have $1, want $2" % + [$b.len, $c.size])) + + # Parse types section. + var idx = offsetTerminator + 1 + c.types = @[] # for testing purpose + for i in 0 ..< typesSize: + let z = idx+i*4 + let sig = FunctionMetadata( + input: b[z], + output: b[z+1], + maxStackHeight: uint16.fromBytesBE(toOpenArray(b, z+2, z+4-1)) + ) + + if sig.input > maxInputItems: + return err(eofErr(ErrTooManyInputs, idx+i*4, + "for section $1, have $2" % + [$i, $sig.input.int])) + + if sig.output > maxOutputItems: + return err(eofErr(ErrTooManyOutputs, idx+i*4+1, + "for section $1, have $2" % + [$i, $sig.output.int])) + + if sig.maxStackHeight > maxStackHeight: + return err(eofErr(ErrTooLargeMaxStackHeight, idx+i*4+2, + "for section $1, have $2" % + [$i, $sig.maxStackHeight])) + + c.types.add(sig) + + if c.types[0].input != 0 or c.types[0].output != 0: + return err(eofErr(ErrInvalidSection0Type, idx, + "have $1, $2" % + [$c.types[0].input.int, $c.types[0].output.int])) + + # Parse code sections. + idx += types.size.int + c.code = newSeq[Blob](code.list.len) + for i, size in code.list: + if size == 0: + return err(eofErr(ErrInvalidCodeSize, + offsetCodeKind+2+i*2, + "invalid code section $1: size must not be 0" % [$i])) + + c.code[i] = @b[idx..