#!/usr/bin/env python3 """ validate.py Validates mnemonic labels match the numeric values given for opcodes, register codes, etc; normalizes argument order and verifies that there are no missing arguments. >>> from io import StringIO >>> # doctest: +REPORT_NDIFF ... print(subv.join_all(validate(StringIO(''' ... == code 0x80000000 ... main: ... # load 0x10010000 (UART0) into t0 ... 37/lui 5/rd/t0 10010/imm20 ... # store 0x48 (H) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 48/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # store 0x65 (e) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 65/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # store 0x6c (l) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6c/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # store 0x6c (l) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6c/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # store 0x6f (o) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6f/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # store 0x0a (\\\\n) in UART0+0 ... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 0a/imm12 ... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1 ... # jump back up to the top ... 6f/jal 0/rd/x0 main/off21 ... '''[1:-1])))) == code 0x80000000 main: # load 0x10010000 (UART0) into t0 37/u 5/rd 10010/imm20 # store 0x48 (H) in UART0+0 13/i 6/rd 0/funct3 0/rs 48/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # store 0x65 (e) in UART0+0 13/i 6/rd 0/funct3 0/rs 65/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # store 0x6c (l) in UART0+0 13/i 6/rd 0/funct3 0/rs 6c/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # store 0x6c (l) in UART0+0 13/i 6/rd 0/funct3 0/rs 6c/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # store 0x6f (o) in UART0+0 13/i 6/rd 0/funct3 0/rs 6f/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # store 0x0a (\\n) in UART0+0 13/i 6/rd 0/funct3 0/rs a/imm12 23/s 2/funct3 5/rs1 0/imm12 6/rs2 # jump back up to the top 6f/j 0/rd main/off21 """ import subv def pop_part(line, labels, NAME="part"): for part in line: if part[1] in labels: line.remove(part) return (part[0], labels[0]) + part[2:] raise ValueError("Expected a {} with label {}".format(NAME, "/".join(labels))) def try_pop_zeroes(line, labels): try: part = pop_part(line, labels)[:2] except ValueError: part = (0, labels[0]) if part[0] != 0: raise ValueError("Expected {} value to be zero".format(subv.format_part(part))) return part def validate_part(line, labels, MAP, NAME="part"): part = pop_part(line, labels, NAME) MNEMONICS = "\nvalid mnemonics are: " + ", ".join( subv.format_part((MAP[name], labels[0], name)) for name in MAP ) if len(part) != 3: raise ValueError( "{} part {} needs a value mnemonic".format(NAME, subv.format_part(part)) + MNEMONICS ) value, _, name = part if name not in MAP: raise ValueError("Unknown {} mnemonic '{}'".format(NAME, name) + MNEMONICS) if value != MAP[name]: raise ValueError( "{} code doesn't match mnemonic (got {}, expected {})".format( NAME, subv.format_part(part), subv.format_part((MAP[name],) + part[1:]), ) ) return part def pop_immediate(line, sizes, modes=["imm"]): if isinstance(sizes, int): sizes = [sizes] for part in line: imm = subv.parse_immediate(part[1]) if imm and imm["size"] in sizes: line.remove(part) if imm["mode"] not in modes: raise ValueError( "Expected immediate {} to use mode {}".format( subv.format_part(part), "/".join(modes) ) ) if not isinstance(part[0], str): imm["mode"] = modes[0] return (part[0], subv.format_immediate(imm)) raise ValueError( "Expected an immediate with size {}".format("/".join(str(s) for s in sizes)) ) def validate_empty(inputs): if len(inputs) != 0: raise ValueError("Extra arguments: {}".format(inputs)) REG_NAMES = "zero,ra,sp,gp,tp,t0,t1,t2,s0,s1,a0,a1,a2,a3,a4,a5,a6,a7,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,t3,t4,t5,t6" REG_NAMES = {name: i for i, name in enumerate(REG_NAMES.split(","))} REG_NAMES.update({"x{}".format(i): i for i in range(32)}) def validate_reg(line, labels): """pop a register and validate its mnemonic. >>> validate_reg([(5, 'rd', 't0')], ['rd', 'dest']) (5, 'rd') >>> validate_reg([(5, 'rd', 'x5')], ['rd', 'dest']) (5, 'rd') >>> validate_reg([(5, 'dest', 'x5')], ['rd', 'dest']) (5, 'rd') >>> validate_reg([(5, 'rd')], ['rd']) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: register part 5/rd needs a value mnemonic valid mnemonics are: 0/rd/zero, 1/rd/ra, 2/rd/sp, ... >>> validate_reg([(5, 'rd', 'xxx')], ['rd']) # doctest: +ELLIPSIS Traceback (most recent call last): ... ValueError: Unknown register mnemonic 'xxx' valid mnemonics are: 0/rd/zero, 1/rd/ra, 2/rd/sp, ... >>> validate_reg([(3, 'rd', 't0')], ['rd']) Traceback (most recent call last): ... ValueError: register code doesn't match mnemonic (got 3/rd/t0, expected 5/rd/t0) >>> validate_reg([(5, 'rd')], ['rs']) Traceback (most recent call last): ... ValueError: Expected a register with label rs >>> validate_reg([], ['rd', 'dest']) Traceback (most recent call last): ... ValueError: Expected a register with label rd/dest """ return validate_part(line, labels, REG_NAMES, "register")[:2] COMP_OP_NAMES = { "add": 0, "sub": 0, "xor": 4, "or": 6, "and": 7, "sll": 1, "slt": 2, "sltu": 3, "srl": 5, "sra": 5, } STORE_WIDTHS = { # ISA names # byte "b": 0, "byte": 0, # halfword "h": 1, "half": 1, # word "w": 2, "word": 2, } LOAD_WIDTHS = STORE_WIDTHS | { # unsigned byte "bu": 4, "ubyte": 4, # unsigned halfword "hu": 5, "uhalf": 5, } BRANCH_NAMES = { # equal "beq": 0, "eq": 0, "==": 0, # not equal "bne": 1, "ne": 1, "!=": 1, # lower than "blt": 4, "lt": 4, "<": 4, # greater/equal than "bge": 5, "ge": 5, ">=": 5, # unsigned lower than "bltu": 6, "ltu": 6, "=u": 7, } SYSTEM_OP_NAMES = { "priv": 0, # fence, ecall, ebreak "csrrw": 1, # CSR read/write "csrrs": 2, # CSR read/set, "csrrc": 3, # CSR read/clear, "csrrwi": 5, # CSR read/write immediate "csrrsi": 6, # CSR read/set, immediate "csrrci": 7, # CSR read/clear, immediate } def validate_opr(inputs): """ validate integer register-register operations. >>> validate_opr(subv.parse('33/opr 0/mode/norm 0/subop/add 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) [(51, 'r'), (5, 'rd'), (0, 'funct3'), (10, 'rs1'), (11, 'rs2'), (0, 'funct7')] >>> validate_opr(subv.parse('33/opr 20/mode/alt 0/subop/sub 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) [(51, 'r'), (5, 'rd'), (0, 'funct3'), (10, 'rs1'), (11, 'rs2'), (32, 'funct7')] >>> validate_opr(subv.parse('33/opr 20/mode/alt 5/subop/sra 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) [(51, 'r'), (5, 'rd'), (5, 'funct3'), (10, 'rs1'), (11, 'rs2'), (32, 'funct7')] >>> validate_opr(subv.parse('33/opr 20/mode/alt 0/subop/add 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) Traceback (most recent call last): ... ValueError: comp-op 0/funct3/add needs to go with comp-mode 0/funct7/norm >>> validate_opr(subv.parse('33/opr 0/mode/norm 0/subop/sub 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) Traceback (most recent call last): ... ValueError: comp-op 0/funct3/sub needs to go with comp-mode 20/funct7/alt >>> validate_opr(subv.parse('33/opr 0/mode/norm 5/subop/sra 5/rd/t0 a/rs/a0 b/rs/a1')['instr']) Traceback (most recent call last): ... ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt """ op = inputs.pop(0) dest = validate_reg(inputs, ["rd", "dest"]) rs1 = validate_reg(inputs, ["rs1", "rs", "src1", "src"]) rs2 = validate_reg(inputs, ["rs2", "rs", "src2", "src"]) funct3 = validate_part( inputs, ["funct3", "funct", "subop"], COMP_OP_NAMES, NAME="comp-op" ) funct7 = validate_part( inputs, ["funct7", "mode"], {"norm": 0, "alt": 32}, NAME="comp-mode" ) expect_mode = "alt" if funct3[2] in ["sub", "sra"] else "norm" expect_val = 32 if expect_mode == "alt" else 0 if funct7[2] != expect_mode: raise ValueError( "comp-op {} needs to go with comp-mode {}".format( subv.format_part(funct3), subv.format_part((expect_val, "funct7", expect_mode)), ) ) validate_empty(inputs) return [ (op[0], "r"), dest, funct3[:2], rs1, rs2, funct7[:2], ] def validate_opi(inputs): """ validate integer register-immediate operations. >>> validate_opi(subv.parse('13/opi 5/rd/t0 0/subop/add a/rs/a0 42/imm12')['instr']) [(19, 'i'), (5, 'rd'), (0, 'funct3'), (10, 'rs'), (66, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 3/subop/sltu 5/rs/t0 8/imm12')['instr']) [(19, 'i'), (5, 'rd'), (3, 'funct3'), (5, 'rs'), (8, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 0/mode/norm 5/rs/t0 8/imm5')['instr']) [(19, 'i'), (5, 'rd'), (1, 'funct3'), (5, 'rs'), (8, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 5/rs/t0 8/imm12')['instr']) [(19, 'i'), (5, 'rd'), (1, 'funct3'), (5, 'rs'), (8, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 20/mode/alt 5/rs/t0 8/imm5')['instr']) [(19, 'i'), (5, 'rd'), (5, 'funct3'), (5, 'rs'), (1032, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 5/rs/t0 408/imm12')['instr']) [(19, 'i'), (5, 'rd'), (5, 'funct3'), (5, 'rs'), (1032, 'imm12')] >>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 20/mode/alt 5/rs/t0 8/imm5')['instr']) Traceback (most recent call last): ... ValueError: comp-op 1/funct3/sll needs to go with comp-mode 0/funct7/norm >>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 5/rs/t0 108/imm12')['instr']) Traceback (most recent call last): ... ValueError: comp-op 1/funct3/sll needs to go with comp-mode 0/funct7/norm >>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 0/mode/norm 5/rs/t0 8/imm5')['instr']) Traceback (most recent call last): ... ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt >>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 5/rs/t0 208/imm12')['instr']) Traceback (most recent call last): ... ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt """ op = inputs.pop(0) dest = validate_reg(inputs, ["rd", "dest"]) funct = validate_part( inputs, ["funct3", "funct", "subop"], COMP_OP_NAMES, NAME="comp-op" ) src = validate_reg(inputs, ["rs", "rs1"]) if funct[2] in ["sll", "srl", "sra"]: try: shamt = pop_immediate(inputs, 5) mode = validate_part( inputs, ["funct7", "mode"], {"norm": 0, "alt": 32}, NAME="comp-mode" ) except ValueError: imm = pop_immediate(inputs, 12) shamt = ((imm[0] & 0b11111), 'imm5') mode = (imm[0] >> 5, 'funct7') expect_mode = "alt" if funct[2] == "sra" else "norm" expect_val = 32 if expect_mode == "alt" else 0 if mode[0] != expect_val: raise ValueError( "comp-op {} needs to go with comp-mode {}".format( subv.format_part(funct), subv.format_part((expect_val, "funct7", expect_mode)), ) ) imm = ((mode[0] << 5) | shamt[0], 'imm12') else: imm = pop_immediate(inputs, 12) validate_empty(inputs) return [ (op[0], "i"), dest, funct[:2], src, imm, ] def validate_jalr(inputs): """ validate jalr operations. >>> validate_jalr(subv.parse('67/jalr 0/rd/x0 0/subop 1/rs/ra 0/off12')['instr']) [(103, 'i'), (0, 'rd'), (0, 'funct3'), (1, 'rs'), (0, 'imm12')] >>> validate_jalr(subv.parse('67/jalr 0/rd/x0 1/rs/ra 0/off12')['instr']) [(103, 'i'), (0, 'rd'), (0, 'funct3'), (1, 'rs'), (0, 'imm12')] >>> validate_jalr(subv.parse('67/jalr 0/rd/x0 4/subop 1/rs/ra 0/off12')['instr']) Traceback (most recent call last): ... ValueError: Expected 4/funct3 value to be zero """ op = inputs.pop(0) dest = validate_reg(inputs, ["rd", "dest"]) funct = try_pop_zeroes(inputs, ["funct3", "funct", "subop"]) src = validate_reg(inputs, ["rs", "rs1", "base"]) offset = pop_immediate(inputs, 12, ["imm", "off"]) if funct[0] != 0: raise ValueError("Expected 0/funct3, got {}".format(subv.format_part(funct))) validate_empty(inputs) return [ (op[0], "i"), dest, funct[:2], src, offset, ] def validate_system(inputs): """ validate system operations. >>> validate_system(subv.parse('73/system 0/dest/x0 1/subop/csrrw 5/rs/t0 afe/csr')['instr']) [(115, 'i'), (0, 'rd'), (1, 'funct3'), (5, 'rs'), (2814, 'imm12')] >>> validate_system(subv.parse('73/system 0/dest/x0 5/subop/csrrwi 7/imm5 afe/csr')['instr']) [(115, 'i'), (0, 'rd'), (5, 'funct3'), (7, 'imm5'), (2814, 'imm12')] >>> validate_system(subv.parse('73/system 0/subop/priv 0/funct12/ecall')['instr']) [(115, 'i'), (0, 'rd'), (0, 'funct3'), (0, 'rs'), (0, 'imm12')] >>> validate_system(subv.parse('73/system 0/subop/priv 1/funct12/ebreak')['instr']) [(115, 'i'), (0, 'rd'), (0, 'funct3'), (0, 'rs'), (1, 'imm12')] >>> validate_system(subv.parse('73/system 0/dest/x0 1/subop/csrrw 7/imm5 afe/csr')['instr']) Traceback (most recent call last): ... ValueError: Expected a register with label rs/rs1/src >>> validate_system(subv.parse('73/system 0/dest/x0 5/subop/csrrwi 5/rs/t0 afe/csr')['instr']) Traceback (most recent call last): ... ValueError: Expected an immediate with size 5 >>> validate_system(subv.parse('73/system 1/dest/x1 0/subop/priv 1/funct12/ebreak')['instr']) Traceback (most recent call last): ... ValueError: Expected 1/rd value to be zero """ op = inputs.pop(0) funct = validate_part( inputs, ["funct3", "funct", "subop"], SYSTEM_OP_NAMES, "system-op" ) if funct[0] == 0: # ecall/ebreak imm = validate_part( inputs, ["imm12", "funct12"], {"ecall": 0, "ebreak": 1}, "system-priv-op" )[:2] dest = try_pop_zeroes(inputs, ["rd", "dest"]) src = try_pop_zeroes(inputs, ["rs", "rs1", "src"]) else: dest = validate_reg(inputs, ["rd", "dest"]) imm = pop_part(inputs, ["imm12", "csr"], "CSR-specifier") if funct[2][-1] == "i": src = pop_immediate(inputs, 5) else: src = validate_reg(inputs, ["rs", "rs1", "src"]) validate_empty(inputs) return [ (op[0], "i"), dest, funct[:2], src, imm, ] def validate_load(inputs): op = inputs.pop(0) dest = validate_reg(inputs, ["rd", "dest"]) width = validate_part(inputs, ["funct3", "funct", "width"], LOAD_WIDTHS, "width") base = validate_reg(inputs, ["rs", "base"]) offset = pop_immediate(inputs, 12, ["imm", "off"]) validate_empty(inputs) return [ (op[0], "i"), dest, width[:2], base, offset, ] def validate_store(inputs): op = inputs.pop(0) width = validate_part(inputs, ["funct3", "funct", "width"], STORE_WIDTHS, "width") base = validate_reg(inputs, ["rs1", "rs", "base"]) offset = pop_immediate(inputs, 12, ["imm", "off"]) src = validate_reg(inputs, ["rs2", "rs", "src"]) validate_empty(inputs) return [ (op[0], "s"), width[:2], base, offset, src, ] def validate_branch(inputs): op = inputs.pop(0) funct = validate_part( inputs, ["funct3", "funct", "subop"], BRANCH_NAMES, "branch-op" ) rs1 = validate_reg(inputs, ["rs1", "rs", "src1", "src"]) rs2 = validate_reg(inputs, ["rs2", "rs", "src2", "src"]) offset = pop_immediate(inputs, [12, 13], ["imm", "off"]) validate_empty(inputs) return [ (op[0], "b"), funct[:2], rs1, rs2, offset, ] def validate_u(inputs): op = inputs.pop(0) rd = validate_reg(inputs, ["rd", "dest"]) imm = pop_immediate(inputs, 20, ["imm", "off"]) validate_empty(inputs) return [ (op[0], "u"), rd, imm, ] def validate_j(inputs): op = inputs.pop(0) dest = validate_reg(inputs, ["rd", "dest"]) offset = pop_immediate(inputs, [21, 20], ["imm", "off"]) validate_empty(inputs) return [ (op[0], "j"), dest, offset, ] instr_map = { "opr": (validate_opr, 0x33), "opi": (validate_opi, 0x13), "jalr": (validate_jalr, 0x67), "system": (validate_system, 0x73), "load": (validate_load, 0x03), "store": (validate_store, 0x23), "branch": (validate_branch, 0x63), "lui": (validate_u, 0x37), "auipc": (validate_u, 0x17), "jal": (validate_j, 0x6F), } @subv.with_parsed_lines def validate(iter): for segment, line in iter: if line["type"] == "instr" and segment == "code": op = line["instr"][0] assert len(op) == 2, "instruction without op label: {}".format(op) (op, label) = op if label not in instr_map: raise ValueError("unknown op label: {}".format(label)) (validator, expected) = instr_map[label] if op != expected: raise ValueError( "opcode {:02x} doesn't match label {} (expected {:02x})".format( op, label, expected ) ) line["instr"] = validator(line["instr"][:]) yield subv.format(line) else: yield line["raw"] if __name__ == "__main__": import sys for line in validate(sys.stdin): print(line)