#!/usr/bin/env python2 import sys def tokenize(i): if i[0] == ';' or i.strip() == '': return None x = i.strip().split(' ') if len(x) == 3 and ',' in x[1]: x[1] = x[1].split(',')[0] return x def genaddresses(tokens): offset = 31 lblcnt = 0 table = {} for i,t in enumerate(tokens): if ':' not in t[0]: continue label = t[0].split(':')[0] table[label] = offset + i - lblcnt lblcnt += 1 return table def gencode(tokens, addresses, ops): out = [] regs = {'R0': 0, 'R1': 1, 'R2': 2, 'R3': 3, 'R4' : 4, 'R5' : 5, 'R6' : 6} for t in tokens: word = t[0] if word == "halt": out.append(opcodes[word]) elif word in ["inc", "je", "jne", "jmp"]: out.append(handle1arg(t, ops, regs, addresses)) elif word in ["add", "sub", "xor", "cmp"]: out.append(handle2arg(t, ops, regs)) elif word == "mov": out.append(handlemov(t, ops, regs)) return out def handle1arg(l, ops, regs, ads): op = ops[l[0]] << 12 if l[0] == 'inc': op += regs[l[1]] else: try: op += int(l[1], 16) except ValueError: op += ads[l[1]] return op def handle2arg(l, ops, regs): op = ops[l[0]] << 12 op += regs[l[1]] << 6 op += regs[l[2]] return op def handlemov(l, ops, regs): if "[" in l[1]: op = ops['cpt'] << 12 op += regs[l[1][1:3]] << 6 op += regs[l[2]] elif "[" in l[2]: op = ops['cpf'] << 12 op += regs[l[1]] << 6 op += regs[l[2][1:3]] elif "R" in l[1] and "R" in l[2]: op = ops['cpy'] << 12 op += regs[l[1]] << 6 op += regs[l[2]] else: op = ops['set'] << 12 op += regs[l[1]] << 6 op += int(l[2], 16) return op def formatcode(code): lines = [] for i in code: lines.append("{:0>4X}".format(i)) return ''.join(lines) f = sys.argv[1] opcodes = { 'halt': 0x0, 'inc' : 0x1, 'jmp' : 0x2, 'jne' : 0x3, 'je' : 0x4, 'add' : 0x5, 'sub' : 0x6, 'xor' : 0x7, 'cmp' : 0x8, 'set' : 0x9, 'cpy' : 0xA, 'cpt' : 0xB, 'cpf' : 0xC } prgm = open(f).readlines() tokens = [] for i in prgm: tokens.append(tokenize(i)) tokens = filter(None, tokens) addresses = genaddresses(tokens) code = gencode(tokens, addresses, opcodes) binary = formatcode(code) print binary