77
88TEXT , DATA , BSS = 'text' , 'data' , 'bss'
99
10+ REL , ABS = 0 , 1
11+
12+
13+ class SymbolTable :
14+ def __init__ (self , symbols , bases ):
15+ self ._symbols = symbols
16+ self ._bases = bases
17+ self ._pass = None
18+
19+ def set_pass (self , _pass ):
20+ self ._pass = _pass
21+
22+ def set_bases (self , bases ):
23+ self ._bases = bases
24+
25+ def set_from (self , from_section , from_offset ):
26+ self ._from_section , self ._from_offset = from_section , from_offset
27+
28+ def get_from (self ):
29+ return self ._from_section , self ._from_offset
30+
31+ def set_sym (self , symbol , stype , section , value ):
32+ entry = (stype , section , value )
33+ if symbol in self ._symbols and entry != self ._symbols [symbol ]:
34+ raise Exception ('redefining symbol %s with different value %r -> %r.' % (label , self ._symbols [symbol ], entry ))
35+ self ._symbols [symbol ] = entry
36+
37+ def has_sym (self , symbol ):
38+ return symbol in self ._symbols
39+
40+ def get_sym (self , symbol ):
41+ try :
42+ entry = self ._symbols [symbol ]
43+ except KeyError :
44+ if self ._pass == 1 :
45+ entry = (REL , TEXT , 0 ) # for a dummy, this is good enough
46+ else :
47+ raise
48+ return entry
49+
50+ def dump (self ):
51+ for symbol , entry in self ._symbols .items ():
52+ print (symbol , entry )
53+
54+ def to_abs_addr (self , section , offset ):
55+ try :
56+ base = self ._bases [section ]
57+ except KeyError :
58+ if self ._pass == 1 :
59+ base = 0 # for a dummy this is good enough
60+ else :
61+ raise
62+ return base + offset
63+
64+ def resolve_absolute (self , symbol ):
65+ if isinstance (symbol , str ):
66+ stype , section , value = self .get_sym (symbol )
67+ elif isinstance (symbol , tuple ):
68+ stype , section , value = symbol
69+ else :
70+ raise TypeError
71+ if stype == REL :
72+ return self .to_abs_addr (section , value )
73+ if stype == ABS :
74+ return value
75+ raise TypeError (stype )
76+
77+ def resolve_relative (self , symbol ):
78+ if isinstance (symbol , str ):
79+ sym_type , sym_section , sym_value = self .get_sym (symbol )
80+ elif isinstance (symbol , tuple ):
81+ sym_type , sym_section , sym_value = symbol
82+ else :
83+ raise TypeError
84+ if sym_type == REL :
85+ sym_addr = self .to_abs_addr (sym_section , sym_value )
86+ elif sym_type == ABS :
87+ sym_addr = sym_value
88+ from_addr = self .to_abs_addr (self ._from_section , self ._from_offset )
89+ return sym_addr - from_addr
90+
1091
1192class Assembler :
1293
13- def __init__ (self ):
14- self .symbols = {}
94+ def __init__ (self , symbols = None , bases = None ):
95+ self .symbols = SymbolTable (symbols or {}, bases or {})
96+ opcodes .symbols = self .symbols # XXX dirty hack
97+
98+ def init (self , a_pass ):
99+ self .a_pass = a_pass
100+ self .symbols .set_pass (a_pass )
15101 self .sections = dict (text = [], data = [])
16102 self .offsets = dict (text = 0 , data = 0 , bss = 0 )
17103 self .section = TEXT
@@ -78,10 +164,18 @@ def finalize_sections(self):
78164 if s is not BSS :
79165 self .sections [s ].append (fill )
80166
167+ def compute_bases (self ):
168+ bases = {}
169+ addr = 0
170+ # lay out sections in this order:
171+ for s in [TEXT , DATA , BSS ]: # TODO: more flexibility for custom sections
172+ bases [s ] = addr
173+ addr += self .offsets [s ] // 4 # 32bit word addresses
174+ return bases
175+
81176 def dump (self ):
82177 print ("Symbols:" )
83- for label , section_offset in sorted (self .symbols .items ()):
84- print (label , section_offset )
178+ self .symbols .dump ()
85179 print ("%s section:" % TEXT )
86180 for t in self .sections [TEXT ]:
87181 print ("%08x" % int .from_bytes (t , 'little' ))
@@ -108,19 +202,34 @@ def d_data(self):
108202 def d_bss (self ):
109203 self .section = BSS
110204
205+ def fill (self , section , amount , fill_byte ):
206+ if fill_byte is not None and section is BSS :
207+ raise ValueError ('fill in bss section not allowed' )
208+ if section is TEXT : # TODO: text section should be filled with NOPs
209+ raise ValueError ('fill/skip/align in text section not supported' )
210+ fill = int (fill_byte or 0 ).to_bytes (1 , 'little' ) * amount
211+ self .offsets [section ] += len (fill )
212+ if section is not BSS :
213+ self .sections [section ].append (fill )
214+
111215 def d_skip (self , amount , fill = None ):
112- s = self .section
113216 amount = int (amount )
114- if fill is not None and s is BSS :
115- raise ValueError ('fill not allowed in section %s' % s )
116- if s is BSS :
117- self .append_section (amount )
118- else :
119- fill = int (fill or 0 ).to_bytes (1 , 'little' ) * amount
120- self .append_section (fill )
217+ self .fill (self .section , amount , fill )
121218
122219 d_space = d_skip
123220
221+ def d_align (self , align = 4 , fill = None ):
222+ align = int (align )
223+ offs = self .offsets [self .section ]
224+ mod = offs % align
225+ if mod :
226+ amount = align - mod
227+ self .fill (self .section , amount , fill )
228+
229+ def d_set (self , symbol , expr ):
230+ value = int (expr ) # TODO: support more than just integers
231+ self .symbols .set_sym (symbol , ABS , None , value )
232+
124233 def append_data (self , wordlen , args ):
125234 data = [int (arg ).to_bytes (wordlen , 'little' ) for arg in args ]
126235 self .append_section (b'' .join (data ))
@@ -134,12 +243,11 @@ def d_word(self, *args):
134243 def d_long (self , * args ):
135244 self .append_data (4 , args )
136245
137- def assemble (self , lines ):
246+ def assembler_pass (self , lines ):
138247 for label , opcode , args in self .parse (lines ):
248+ self .symbols .set_from (self .section , self .offsets [self .section ] // 4 )
139249 if label is not None :
140- if label in self .symbols :
141- raise Exception ('label %s is already defined.' % label )
142- self .symbols [label ] = (self .section , self .offsets [self .section ] // 4 )
250+ self .symbols .set_sym (label , REL , * self .symbols .get_from ())
143251 if opcode is not None :
144252 if opcode [0 ] == '.' :
145253 # assembler directive
@@ -159,3 +267,10 @@ def assemble(self, lines):
159267 raise Exception ('Unknown opcode or directive: %s' % opcode )
160268 self .finalize_sections ()
161269
270+ def assemble (self , lines ):
271+ self .init (1 ) # pass 1 is only to get the symbol table right
272+ self .assembler_pass (lines )
273+ self .symbols .set_bases (self .compute_bases ())
274+ self .init (2 ) # now we know all symbols and bases, do the real assembler pass, pass 2
275+ self .assembler_pass (lines )
276+
0 commit comments