#!/usr/bin/env python3
""" validate.py
Validates mnemonic labels match the numeric values given for opcodes,
register codes, etc; normalizes argument order and verifies that there are
no missing arguments.
>>> from io import StringIO
>>> # doctest: +REPORT_NDIFF
... print(subv.join_all(validate(StringIO('''
... == code 0x80000000
... main:
... # load 0x10010000 (UART0) into t0
... 37/lui 5/rd/t0 10010/imm20
... # store 0x48 (H) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 48/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # store 0x65 (e) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 65/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # store 0x6c (l) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6c/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # store 0x6c (l) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6c/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # store 0x6f (o) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 6f/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # store 0x0a (\\\\n) in UART0+0
... 13/opi 6/rd/t1 0/subop/add 0/rs/x0 0a/imm12
... 23/store 2/width/word 5/rs/t0 0/off12 6/rs/t1
... # jump back up to the top
... 6f/jal 0/rd/x0 main/off21
... '''[1:-1]))))
== code 0x80000000
main:
# load 0x10010000 (UART0) into t0
37/u 5/rd 10010/imm20
# store 0x48 (H) in UART0+0
13/i 6/rd 0/funct3 0/rs 48/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# store 0x65 (e) in UART0+0
13/i 6/rd 0/funct3 0/rs 65/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# store 0x6c (l) in UART0+0
13/i 6/rd 0/funct3 0/rs 6c/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# store 0x6c (l) in UART0+0
13/i 6/rd 0/funct3 0/rs 6c/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# store 0x6f (o) in UART0+0
13/i 6/rd 0/funct3 0/rs 6f/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# store 0x0a (\\n) in UART0+0
13/i 6/rd 0/funct3 0/rs a/imm12
23/s 2/funct3 5/rs1 0/imm12 6/rs2
# jump back up to the top
6f/j 0/rd main/off21
"""
import subv
def pop_part(line, labels, NAME="part"):
for part in line:
if part[1] in labels:
line.remove(part)
return (part[0], labels[0]) + part[2:]
raise ValueError("Expected a {} with label {}".format(NAME, "/".join(labels)))
def try_pop_zeroes(line, labels):
try:
part = pop_part(line, labels)[:2]
except ValueError:
part = (0, labels[0])
if part[0] != 0:
raise ValueError("Expected {} value to be zero".format(subv.format_part(part)))
return part
def validate_part(line, labels, MAP, NAME="part"):
part = pop_part(line, labels, NAME)
MNEMONICS = "\nvalid mnemonics are: " + ", ".join(
subv.format_part((MAP[name], labels[0], name)) for name in MAP
)
if len(part) != 3:
raise ValueError(
"{} part {} needs a value mnemonic".format(NAME, subv.format_part(part))
+ MNEMONICS
)
value, _, name = part
if name not in MAP:
raise ValueError("Unknown {} mnemonic '{}'".format(NAME, name) + MNEMONICS)
if value != MAP[name]:
raise ValueError(
"{} code doesn't match mnemonic (got {}, expected {})".format(
NAME,
subv.format_part(part),
subv.format_part((MAP[name],) + part[1:]),
)
)
return part
def pop_immediate(line, sizes, modes=["imm"]):
if isinstance(sizes, int):
sizes = [sizes]
for part in line:
imm = subv.parse_immediate(part[1])
if imm and imm["size"] in sizes:
line.remove(part)
if imm["mode"] not in modes:
raise ValueError(
"Expected immediate {} to use mode {}".format(
subv.format_part(part), "/".join(modes)
)
)
if not isinstance(part[0], str):
imm["mode"] = modes[0]
return (part[0], subv.format_immediate(imm))
raise ValueError(
"Expected an immediate with size {}".format("/".join(str(s) for s in sizes))
)
def validate_empty(inputs):
if len(inputs) != 0:
raise ValueError("Extra arguments: {}".format(inputs))
REG_NAMES = "zero,ra,sp,gp,tp,t0,t1,t2,s0,s1,a0,a1,a2,a3,a4,a5,a6,a7,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,t3,t4,t5,t6"
REG_NAMES = {name: i for i, name in enumerate(REG_NAMES.split(","))}
REG_NAMES.update({"x{}".format(i): i for i in range(32)})
def validate_reg(line, labels):
"""pop a register and validate its mnemonic.
>>> validate_reg([(5, 'rd', 't0')], ['rd', 'dest'])
(5, 'rd')
>>> validate_reg([(5, 'rd', 'x5')], ['rd', 'dest'])
(5, 'rd')
>>> validate_reg([(5, 'dest', 'x5')], ['rd', 'dest'])
(5, 'rd')
>>> validate_reg([(5, 'rd')], ['rd']) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: register part 5/rd needs a value mnemonic
valid mnemonics are: 0/rd/zero, 1/rd/ra, 2/rd/sp, ...
>>> validate_reg([(5, 'rd', 'xxx')], ['rd']) # doctest: +ELLIPSIS
Traceback (most recent call last):
...
ValueError: Unknown register mnemonic 'xxx'
valid mnemonics are: 0/rd/zero, 1/rd/ra, 2/rd/sp, ...
>>> validate_reg([(3, 'rd', 't0')], ['rd'])
Traceback (most recent call last):
...
ValueError: register code doesn't match mnemonic (got 3/rd/t0, expected 5/rd/t0)
>>> validate_reg([(5, 'rd')], ['rs'])
Traceback (most recent call last):
...
ValueError: Expected a register with label rs
>>> validate_reg([], ['rd', 'dest'])
Traceback (most recent call last):
...
ValueError: Expected a register with label rd/dest
"""
return validate_part(line, labels, REG_NAMES, "register")[:2]
COMP_OP_NAMES = {
"add": 0,
"sub": 0,
"xor": 4,
"or": 6,
"and": 7,
"sll": 1,
"slt": 2,
"sltu": 3,
"srl": 5,
"sra": 5,
}
STORE_WIDTHS = {
# ISA names
# byte
"b": 0,
"byte": 0,
# halfword
"h": 1,
"half": 1,
# word
"w": 2,
"word": 2,
}
LOAD_WIDTHS = STORE_WIDTHS | {
# unsigned byte
"bu": 4,
"ubyte": 4,
# unsigned halfword
"hu": 5,
"uhalf": 5,
}
BRANCH_NAMES = {
# equal
"beq": 0,
"eq": 0,
"==": 0,
# not equal
"bne": 1,
"ne": 1,
"!=": 1,
# lower than
"blt": 4,
"lt": 4,
"<": 4,
# greater/equal than
"bge": 5,
"ge": 5,
">=": 5,
# unsigned lower than
"bltu": 6,
"ltu": 6,
"<u": 6,
# unsigned greater/equal than
"bgeu": 7,
"geu": 7,
">=u": 7,
}
SYSTEM_OP_NAMES = {
"priv": 0, # fence, ecall, ebreak
"csrrw": 1, # CSR read/write
"csrrs": 2, # CSR read/set,
"csrrc": 3, # CSR read/clear,
"csrrwi": 5, # CSR read/write immediate
"csrrsi": 6, # CSR read/set, immediate
"csrrci": 7, # CSR read/clear, immediate
}
def validate_opr(inputs):
"""
validate integer register-register operations.
>>> validate_opr(subv.parse('33/opr 0/mode/norm 0/subop/add 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
[(51, 'r'), (5, 'rd'), (0, 'funct3'), (10, 'rs1'), (11, 'rs2'), (0, 'funct7')]
>>> validate_opr(subv.parse('33/opr 20/mode/alt 0/subop/sub 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
[(51, 'r'), (5, 'rd'), (0, 'funct3'), (10, 'rs1'), (11, 'rs2'), (32, 'funct7')]
>>> validate_opr(subv.parse('33/opr 20/mode/alt 5/subop/sra 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
[(51, 'r'), (5, 'rd'), (5, 'funct3'), (10, 'rs1'), (11, 'rs2'), (32, 'funct7')]
>>> validate_opr(subv.parse('33/opr 20/mode/alt 0/subop/add 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 0/funct3/add needs to go with comp-mode 0/funct7/norm
>>> validate_opr(subv.parse('33/opr 0/mode/norm 0/subop/sub 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 0/funct3/sub needs to go with comp-mode 20/funct7/alt
>>> validate_opr(subv.parse('33/opr 0/mode/norm 5/subop/sra 5/rd/t0 a/rs/a0 b/rs/a1')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt
"""
op = inputs.pop(0)
dest = validate_reg(inputs, ["rd", "dest"])
rs1 = validate_reg(inputs, ["rs1", "rs", "src1", "src"])
rs2 = validate_reg(inputs, ["rs2", "rs", "src2", "src"])
funct3 = validate_part(
inputs, ["funct3", "funct", "subop"], COMP_OP_NAMES, NAME="comp-op"
)
funct7 = validate_part(
inputs, ["funct7", "mode"], {"norm": 0, "alt": 32}, NAME="comp-mode"
)
expect_mode = "alt" if funct3[2] in ["sub", "sra"] else "norm"
expect_val = 32 if expect_mode == "alt" else 0
if funct7[2] != expect_mode:
raise ValueError(
"comp-op {} needs to go with comp-mode {}".format(
subv.format_part(funct3),
subv.format_part((expect_val, "funct7", expect_mode)),
)
)
validate_empty(inputs)
return [
(op[0], "r"),
dest,
funct3[:2],
rs1,
rs2,
funct7[:2],
]
def validate_opi(inputs):
"""
validate integer register-immediate operations.
>>> validate_opi(subv.parse('13/opi 5/rd/t0 0/subop/add a/rs/a0 42/imm12')['instr'])
[(19, 'i'), (5, 'rd'), (0, 'funct3'), (10, 'rs'), (66, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 3/subop/sltu 5/rs/t0 8/imm12')['instr'])
[(19, 'i'), (5, 'rd'), (3, 'funct3'), (5, 'rs'), (8, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 0/mode/norm 5/rs/t0 8/imm5')['instr'])
[(19, 'i'), (5, 'rd'), (1, 'funct3'), (5, 'rs'), (8, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 5/rs/t0 8/imm12')['instr'])
[(19, 'i'), (5, 'rd'), (1, 'funct3'), (5, 'rs'), (8, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 20/mode/alt 5/rs/t0 8/imm5')['instr'])
[(19, 'i'), (5, 'rd'), (5, 'funct3'), (5, 'rs'), (1032, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 5/rs/t0 408/imm12')['instr'])
[(19, 'i'), (5, 'rd'), (5, 'funct3'), (5, 'rs'), (1032, 'imm12')]
>>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 20/mode/alt 5/rs/t0 8/imm5')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 1/funct3/sll needs to go with comp-mode 0/funct7/norm
>>> validate_opi(subv.parse('13/opi 5/rd/t0 1/subop/sll 5/rs/t0 108/imm12')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 1/funct3/sll needs to go with comp-mode 0/funct7/norm
>>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 0/mode/norm 5/rs/t0 8/imm5')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt
>>> validate_opi(subv.parse('13/opi 5/rd/t0 5/subop/sra 5/rs/t0 208/imm12')['instr'])
Traceback (most recent call last):
...
ValueError: comp-op 5/funct3/sra needs to go with comp-mode 20/funct7/alt
"""
op = inputs.pop(0)
dest = validate_reg(inputs, ["rd", "dest"])
funct = validate_part(
inputs, ["funct3", "funct", "subop"], COMP_OP_NAMES, NAME="comp-op"
)
src = validate_reg(inputs, ["rs", "rs1"])
if funct[2] in ["sll", "srl", "sra"]:
try:
shamt = pop_immediate(inputs, 5)
mode = validate_part(
inputs, ["funct7", "mode"], {"norm": 0, "alt": 32}, NAME="comp-mode"
)
except ValueError:
imm = pop_immediate(inputs, 12)
shamt = ((imm[0] & 0b11111), 'imm5')
mode = (imm[0] >> 5, 'funct7')
expect_mode = "alt" if funct[2] == "sra" else "norm"
expect_val = 32 if expect_mode == "alt" else 0
if mode[0] != expect_val:
raise ValueError(
"comp-op {} needs to go with comp-mode {}".format(
subv.format_part(funct),
subv.format_part((expect_val, "funct7", expect_mode)),
)
)
imm = ((mode[0] << 5) | shamt[0], 'imm12')
else:
imm = pop_immediate(inputs, 12)
validate_empty(inputs)
return [
(op[0], "i"),
dest,
funct[:2],
src,
imm,
]
def validate_jalr(inputs):
"""
validate jalr operations.
>>> validate_jalr(subv.parse('67/jalr 0/rd/x0 0/subop 1/rs/ra 0/off12')['instr'])
[(103, 'i'), (0, 'rd'), (0, 'funct3'), (1, 'rs'), (0, 'imm12')]
>>> validate_jalr(subv.parse('67/jalr 0/rd/x0 1/rs/ra 0/off12')['instr'])
[(103, 'i'), (0, 'rd'), (0, 'funct3'), (1, 'rs'), (0, 'imm12')]
>>> validate_jalr(subv.parse('67/jalr 0/rd/x0 4/subop 1/rs/ra 0/off12')['instr'])
Traceback (most recent call last):
...
ValueError: Expected 4/funct3 value to be zero
"""
op = inputs.pop(0)
dest = validate_reg(inputs, ["rd", "dest"])
funct = try_pop_zeroes(inputs, ["funct3", "funct", "subop"])
src = validate_reg(inputs, ["rs", "rs1", "base"])
offset = pop_immediate(inputs, 12, ["imm", "off"])
if funct[0] != 0:
raise ValueError("Expected 0/funct3, got {}".format(subv.format_part(funct)))
validate_empty(inputs)
return [
(op[0], "i"),
dest,
funct[:2],
src,
offset,
]
def validate_system(inputs):
"""
validate system operations.
>>> validate_system(subv.parse('73/system 0/dest/x0 1/subop/csrrw 5/rs/t0 afe/csr')['instr'])
[(115, 'i'), (0, 'rd'), (1, 'funct3'), (5, 'rs'), (2814, 'imm12')]
>>> validate_system(subv.parse('73/system 0/dest/x0 5/subop/csrrwi 7/imm5 afe/csr')['instr'])
[(115, 'i'), (0, 'rd'), (5, 'funct3'), (7, 'imm5'), (2814, 'imm12')]
>>> validate_system(subv.parse('73/system 0/subop/priv 0/funct12/ecall')['instr'])
[(115, 'i'), (0, 'rd'), (0, 'funct3'), (0, 'rs'), (0, 'imm12')]
>>> validate_system(subv.parse('73/system 0/subop/priv 1/funct12/ebreak')['instr'])
[(115, 'i'), (0, 'rd'), (0, 'funct3'), (0, 'rs'), (1, 'imm12')]
>>> validate_system(subv.parse('73/system 0/dest/x0 1/subop/csrrw 7/imm5 afe/csr')['instr'])
Traceback (most recent call last):
...
ValueError: Expected a register with label rs/rs1/src
>>> validate_system(subv.parse('73/system 0/dest/x0 5/subop/csrrwi 5/rs/t0 afe/csr')['instr'])
Traceback (most recent call last):
...
ValueError: Expected an immediate with size 5
>>> validate_system(subv.parse('73/system 1/dest/x1 0/subop/priv 1/funct12/ebreak')['instr'])
Traceback (most recent call last):
...
ValueError: Expected 1/rd value to be zero
"""
op = inputs.pop(0)
funct = validate_part(
inputs, ["funct3", "funct", "subop"], SYSTEM_OP_NAMES, "system-op"
)
if funct[0] == 0:
# ecall/ebreak
imm = validate_part(
inputs, ["imm12", "funct12"], {"ecall": 0, "ebreak": 1}, "system-priv-op"
)[:2]
dest = try_pop_zeroes(inputs, ["rd", "dest"])
src = try_pop_zeroes(inputs, ["rs", "rs1", "src"])
else:
dest = validate_reg(inputs, ["rd", "dest"])
imm = pop_part(inputs, ["imm12", "csr"], "CSR-specifier")
if funct[2][-1] == "i":
src = pop_immediate(inputs, 5)
else:
src = validate_reg(inputs, ["rs", "rs1", "src"])
validate_empty(inputs)
return [
(op[0], "i"),
dest,
funct[:2],
src,
imm,
]
def validate_load(inputs):
op = inputs.pop(0)
dest = validate_reg(inputs, ["rd", "dest"])
width = validate_part(inputs, ["funct3", "funct", "width"], LOAD_WIDTHS, "width")
base = validate_reg(inputs, ["rs", "base"])
offset = pop_immediate(inputs, 12, ["imm", "off"])
validate_empty(inputs)
return [
(op[0], "i"),
dest,
width[:2],
base,
offset,
]
def validate_store(inputs):
op = inputs.pop(0)
width = validate_part(inputs, ["funct3", "funct", "width"], STORE_WIDTHS, "width")
base = validate_reg(inputs, ["rs1", "rs", "base"])
offset = pop_immediate(inputs, 12, ["imm", "off"])
src = validate_reg(inputs, ["rs2", "rs", "src"])
validate_empty(inputs)
return [
(op[0], "s"),
width[:2],
base,
offset,
src,
]
def validate_branch(inputs):
op = inputs.pop(0)
funct = validate_part(
inputs, ["funct3", "funct", "subop"], BRANCH_NAMES, "branch-op"
)
rs1 = validate_reg(inputs, ["rs1", "rs", "src1", "src"])
rs2 = validate_reg(inputs, ["rs2", "rs", "src2", "src"])
offset = pop_immediate(inputs, [12, 13], ["imm", "off"])
validate_empty(inputs)
return [
(op[0], "b"),
funct[:2],
rs1,
rs2,
offset,
]
def validate_u(inputs):
op = inputs.pop(0)
rd = validate_reg(inputs, ["rd", "dest"])
imm = pop_immediate(inputs, 20, ["imm", "off"])
validate_empty(inputs)
return [
(op[0], "u"),
rd,
imm,
]
def validate_j(inputs):
op = inputs.pop(0)
dest = validate_reg(inputs, ["rd", "dest"])
offset = pop_immediate(inputs, [21, 20], ["imm", "off"])
validate_empty(inputs)
return [
(op[0], "j"),
dest,
offset,
]
instr_map = {
"opr": (validate_opr, 0x33),
"opi": (validate_opi, 0x13),
"jalr": (validate_jalr, 0x67),
"system": (validate_system, 0x73),
"load": (validate_load, 0x03),
"store": (validate_store, 0x23),
"branch": (validate_branch, 0x63),
"lui": (validate_u, 0x37),
"auipc": (validate_u, 0x17),
"jal": (validate_j, 0x6F),
}
@subv.with_parsed_lines
def validate(iter):
for segment, line in iter:
if line["type"] == "instr" and segment == "code":
op = line["instr"][0]
assert len(op) == 2, "instruction without op label: {}".format(op)
(op, label) = op
if label not in instr_map:
raise ValueError("unknown op label: {}".format(label))
(validator, expected) = instr_map[label]
if op != expected:
raise ValueError(
"opcode {:02x} doesn't match label {} (expected {:02x})".format(
op, label, expected
)
)
line["instr"] = validator(line["instr"][:])
yield subv.format(line)
else:
yield line["raw"]
if __name__ == "__main__":
import sys
for line in validate(sys.stdin):
print(line)