#!/usr/bin/perl # The P65 Assembler, v 0.1 # Copyright (c) 2001 Michael Martin # All rights reserved. # # Redistribution and use, with or without modification, are permitted # provided that the following conditions are met: # # - Redistributions of the code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # - The name of Michael Martin may not be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. use strict; use integer; my %opcodes = ( adc_imm => 0x69, adc_zp => 0x65, adc_zpx => 0x75, adc_abs => 0x6D, adc_absx => 0x7D, adc_absy => 0x79, adc_indx => 0x61, adc_indy => 0x71, and_imm => 0x29, and_zp => 0x25, and_zpx => 0x35, and_abs => 0x2D, and_absx => 0x3D, and_absy => 0x39, and_indx => 0x21, and_indy => 0x31, asl_imp => 0x0A, asl_zp => 0x06, asl_zpx => 0x16, asl_abs => 0x0E, asl_absx => 0x1E, bcc_rel => 0x90, bcs_rel => 0xB0, beq_rel => 0xF0, bit_zp => 0x24, bit_abs => 0x2C, bmi_rel => 0x30, bne_rel => 0xD0, bpl_rel => 0x10, brk_imp => 0x00, bvc_rel => 0x50, bvs_rel => 0x70, clc_imp => 0x18, cld_imp => 0xD8, cli_imp => 0x58, clv_imp => 0xB8, cmp_imm => 0xC9, cmp_zp => 0xC5, cmp_zpx => 0xD5, cmp_abs => 0xCD, cmp_absx => 0xDD, cmp_absy => 0xD9, cmp_indx => 0xC1, cmp_indy => 0xD1, cpx_imm => 0xE0, cpx_zp => 0xE4, cpx_abs => 0xEC, cpy_imm => 0xC0, cpy_zp => 0xC4, cpy_abs => 0xCC, dec_zp => 0xC6, dec_zpx => 0xD6, dec_abs => 0xCE, dec_absx => 0xDE, dex_imp => 0xCA, dey_imp => 0x88, eor_imm => 0x49, eor_zp => 0x45, eor_zpx => 0x55, eor_abs => 0x4D, eor_absx => 0x5D, eor_absy => 0x59, eor_indx => 0x41, eor_indy => 0x51, inc_zp => 0xE6, inc_zpx => 0xF6, inc_abs => 0xEE, inc_absx => 0xFE, inx_imp => 0xE8, iny_imp => 0xC8, jmp_abs => 0x4C, jmp_ind => 0x6C, jsr_abs => 0x20, lda_imm => 0xA9, lda_zp => 0xA5, lda_zpx => 0xB5, lda_abs => 0xAD, lda_absx => 0xBD, lda_absy => 0xB9, lda_indx => 0xA1, lda_indy => 0xB1, ldx_imm => 0xA2, ldx_zp => 0xA6, ldx_zpy => 0xB6, ldx_abs => 0xAE, ldx_absy => 0xBE, ldy_imm => 0xA0, ldy_zp => 0xA4, ldy_zpx => 0xB4, ldy_abs => 0xAC, ldy_absx => 0xBC, lsr_imp => 0x4A, lsr_zp => 0x46, lsr_zpy => 0x56, lsr_abs => 0x4E, lsr_absy => 0x5E, nop_imp => 0xEA, ora_imm => 0x09, ora_zp => 0x05, ora_zpx => 0x15, ora_abs => 0x0D, ora_absx => 0x1D, ora_absy => 0x19, ora_indx => 0x01, ora_indy => 0x11, pha_imp => 0x48, php_imp => 0x08, pla_imp => 0x68, plp_imp => 0x28, rol_imp => 0x2A, rol_zp => 0x26, rol_zpx => 0x36, rol_abs => 0x2E, rol_absx => 0x3E, ror_imp => 0x6A, ror_zp => 0x66, ror_zpx => 0x76, ror_abs => 0x6E, ror_absx => 0x7E, rti_imp => 0x40, rts_imp => 0x60, sbc_imm => 0xE9, sbc_zp => 0xE5, sbc_zpx => 0xF5, sbc_abs => 0xED, sbc_absx => 0xFD, sbc_absy => 0xF9, sbc_indx => 0xE1, sbc_indy => 0xF1, sec_imp => 0x38, sed_imp => 0xF8, sei_imp => 0x78, sta_zp => 0x85, sta_zpx => 0x95, sta_abs => 0x8D, sta_absx => 0x9D, sta_absy => 0x99, sta_indx => 0x81, sta_indy => 0x91, stx_zp => 0x86, stx_zpy => 0x96, stx_abs => 0x8E, sty_zp => 0x84, sty_zpx => 0x94, sty_abs => 0x8C, tax_imp => 0xAA, tay_imp => 0xA8, tya_imp => 0x98, tsx_imp => 0xBA, txa_imp => 0x8A, txs_imp => 0x9A, tya_imp => 0x98 ); my %labels = ( ); # Labels my @errors = ( ); # Error messages # Data for tracking forward references my @forwardlines = (); # The line of code my @forwardpcs = (); # Program counter location of req my @forwardlinenums = (); # The line number (for error reporting) my @forwardlabels = (); # Labels themselves (for error reporting) my @forwardoffsets = (); # Location in the @code array sub argval { my $arg = shift; my $linenum = shift; my $firstchar = substr($arg, 0, 1); my $rest = substr($arg,1); # print "Parsing $arg, checking possible flag char $firstchar.\n"; # Check '<', '>' prefixes if ($firstchar eq '<') { # print "Desires low byte.\n"; my $result = argval($rest); return -1 if ($result == -1); return $result % 256; } elsif ($firstchar eq '>') { # print "Desires high byte.\n"; my $result = argval($rest); return -1 if ($result == -1 || $result > 0xFFFF); return int($result / 256); # Todo: Check for anonymous label refs here # Check arithmetic suffixes } elsif ($arg =~ /(.*)\s*\+([0-9]+)$/) { my $toadd = int $2; my $base = argval($1); return -1 if ($base == -1); return $base+$toadd; } elsif ($arg =~ /(.*)\s*\-([0-9]+)$/) { my $tosub = int $2; my $base = argval($1); return -1 if ($base == -1); return $base-$tosub; } elsif ($firstchar eq '$') { # print "Reading hex value: "; if ($rest =~ /^[0-9a-f]+$/i) { my $result = hex $rest; # print "$result\n"; return $result; } else { push @errors, "$linenum: Expected a hex value, not '$rest'"; return 0; } } elsif ($firstchar =~ /[1-9]/) { # print "Reading dec value: "; if ($arg =~ /^[0-9]+$/i) { my $result = int $arg; # print "$result\n"; return $result; } else { push @errors, "$linenum: Expected a decimal value, not '$rest'"; return 0; } } elsif ($firstchar eq '0') { # print "Reading oct value: "; if ($arg =~ /^[0-7]+$/i) { my $result = oct $arg; # print "$result\n"; return $result; } else { push @errors, "$linenum: Expected an octal value, not '$rest'"; return 0; } } elsif ($firstchar eq "%") { # print "Reading bin value: "; if ($rest =~ /^[01]+$/) { my $result = 0; my @bits = split //, $rest; for (@bits) { $result *= 2; $result += $_; } # print "$result\n"; return $result; } else { push @errors, "$linenum: Expected a binary value, not '$rest'"; return 0; } } elsif ($firstchar eq "'") { my $char = substr($rest, 0, 1); my $result = ord $char; # print "Reading character value: '$char' => $result\n"; return $result; } else { # print "Label lookup: '$arg' => "; if (exists($labels{$arg})) { my $result = $labels{$arg}; # print "$result\n"; return $result; } else { # print "unknown" return -1; } } } sub parseline { my $line = shift; my $pc = shift; my $offset = shift; my $linenum = shift; my $badlabel = shift; $line =~ s/\;.*$//; # remove comments NOTE: This happens too soon and is a bug! $line =~ s/^\s*//; # remove leading whitespace $line =~ s/\s*$//; # remove trailing whitespace return () if ($line eq ''); # skip blank or comment lines # Is this a pragma line (does it begin with a .)? if ($line =~ /^\.(\S*)\s+(.*)/) { my $pragma = lc($1); my $rest = $2; if ($pragma eq 'raw') { $rest =~ s/\s//g; # Obliterate all whitespace if ($rest =~ /^([0-9a-f][0-9a-f])*$/i) { my @newcode = map hex, $rest =~ /../g; return @newcode; } else { push @errors, "$linenum: Invalid .raw data"; return (); } } elsif ($pragma eq 'ascii') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; $rest = substr($rest, 1); # Clear out the bracket characters chop $rest; my @newcode = map ord, split (//, $rest); return @newcode; } elsif ($pragma eq 'alias') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; my ($newlabel, $newtarget) = split " ", $rest; if (exists $labels{$newlabel}) { push @errors, "$linenum: Duplicate label '$newlabel'"; return (); } my $target = argval $newtarget, $linenum; if ($target == -1) { push @errors, "$linenum: Aliases cannot be forward references"; return (); } $labels{$newlabel} = $target; return (); } elsif ($pragma eq 'address') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; my $target = argval $rest, $linenum; if ($target == -1) { &$badlabel(".address $rest", $pc, $linenum, $rest, $offset); return (0, 0); } if ($target < 0 or $target > 0xFFFF) { push @errors, "$linenum: address constant out of range"; return (); } return ($target % 256, int ($target / 256)); } elsif ($pragma eq 'include') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; print "Including $rest at $pc, file offset $offset"; return parsefile($rest, $pc, $offset); } elsif ($pragma eq 'advance') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; my $target = argval $rest, $linenum; if ($target < $pc) { push @errors, "$linenum: Invalid .advance"; return (); } return (0) x ($target - $pc); } elsif ($pragma eq 'link') { $rest =~ s/^\s*//; $rest =~ s/\s*$//; my ($file, $basespec) = split /\s+/, $rest; my $newbase = argval $basespec, $linenum; if ($newbase < 0) { push @errors, "$linenum: .link cannot take labels"; return (); } print "Linking $file at $newbase, file offset $offset\n"; return parsefile($file, $newbase, $offset); } else { push @errors, "$linenum: Unknown assembler pragma .$pragma"; return (); } } # Is this a label definition? if ($line =~ /^([\w\']+):(.*)/) { my $newlabel = $1; my $restofline = $2; if (exists $labels{$newlabel}) { push @errors, "$linenum: Duplicate label definition '$newlabel'"; } else { print sprintf "Label: $newlabel => 0x%04x\n", $pc; $labels{$newlabel} = $pc; } return parseline ($restofline, $pc, $offset, $linenum, $badlabel); } # Otherwise, it's an instruction. First we extract the opcode and the "stuff". # The stuff contains the information we need to deduce argument and addressing # mode. if ($line =~ /^(\w\w\w)(.*)/) { my $opcode = lc($1); my $stuff = $2; $stuff =~ s/\s//g; # Obliterate whitespace return parseinstr($opcode, $stuff, $pc, $offset, $linenum, $badlabel); } else { # Unknown line type - produce an error, produce no code. push @errors, "$linenum: expected instruction, got '$line'"; return (); } } sub getnexttoken { my $argref = shift; my $token = ""; $token = shift @$argref while ($token eq "" && @$argref != ()); return $token; } sub forwardreference { push @forwardlines, shift; push @forwardpcs, shift; push @forwardlinenums, shift; push @forwardlabels, shift; push @forwardoffsets, shift; } sub labelerror { my ($foo, $bar, $line, $label, $baz) = @_; push @errors, "$line: Bad label '$label'"; } sub parseinstr { my ($opcode, $stuff, $pc, $offset, $linenum, $badlabel) = @_; my @tokens = split /([\(\)\#\,])/, $stuff; my $toklist = \@tokens; my @newcode = ( ); # print "parsing instruction $opcode $stuff\n"; my $token = getnexttoken($toklist); if ($opcode =~ /(bcc|bcs|beq|bmi|bne|bpl|bvc|bvs)/) { # relative mode push @newcode, $opcodes{"${opcode}_rel"}; my $abstarget = argval $token, $linenum; if ($abstarget == -1) { &$badlabel("$opcode $stuff", $pc, $linenum, $token, $offset); push @newcode, 0; return @newcode; } $token = getnexttoken $toklist; if ($token ne "") { push @errors, "$linenum: Branches take a single label as argument"; return (); } my $reltarget = $abstarget - ($pc + 2); if ($reltarget < -128 or $reltarget > 127) { push @errors, "$linenum: Branch out of range"; return (); } push @newcode, ($reltarget < 0) ? 256 + $reltarget : $reltarget; # print "Branch: $opcode $stuff => ", join (' ', @newcode), "\n"; return @newcode; } if ($token eq '') { # Implied mode unless (exists $opcodes{"${opcode}_imp"}) { push @errors, "$linenum: Illegal instruction: $opcode (implied)"; return (); } push @newcode, $opcodes{"${opcode}_imp"}; return @newcode; } if ($token eq '#') { # Immediate mode unless (exists $opcodes{"${opcode}_imm"}) { push @errors, "$linenum: Illegal instruction: $opcode (immediate)"; return (); } push @newcode, $opcodes{"${opcode}_imm"}; my $arg = getnexttoken $toklist; $token = argval $arg, $linenum; if ($token == -1) { &$badlabel("$opcode $stuff", $pc, $linenum, $arg, $offset); push @newcode, 0; return @newcode; } if ($token < 0 or $token > 255) { push @errors, "$linenum: Immediate value out of range"; return (); } push @newcode, $token; $token = getnexttoken $toklist; if ($token ne "") { push @errors, "$linenum: Immediates take only one argument"; return (); } return @newcode; } if ($token eq '(') { # Indirection my $arg = getnexttoken $toklist; my $argbytes = argval $arg, $linenum; if ($argbytes == -1) { &$badlabel("$opcode $stuff", $pc, $linenum, $arg, $offset); return (0, 0, 0xEA); # NOP for 2-address cases } $token = getnexttoken $toklist; my ($mode, $modename); my $ok = 0; if ($token eq ',') { $token = getnexttoken $toklist; if ($token =~ /^x$/i) { $token = getnexttoken $toklist; if ($token eq ')') { $token = getnexttoken $toklist; if ($token eq '') { $mode = 'indx'; $modename = 'Indirect Indexed'; $ok = 1; } } } } elsif ($token eq ')') { $token = getnexttoken $toklist; if ($token eq '') { $mode = 'ind'; $ok = 1; $modename = 'Pure Indirect'; } if ($token eq ',') { $token = getnexttoken $toklist; if ($token =~ /^y$/i) { $token = getnexttoken $toklist; if ($token eq '') { $mode = 'indy'; $ok = 1; $modename = 'Indirect Indexed'; } } } unless ($ok) { push @errors, "$linenum: Illegal indirection"; return (); } } unless (exists $opcodes{"${opcode}_$mode"}) { push @errors, "$linenum: Illegal instruction: $opcode ($modename)"; return (); } push @newcode, $opcodes{"${opcode}_$mode"}; if ($mode eq 'ind') { if ($argbytes < 0 or $argbytes > 0xFFFF) { push @errors, "$linenum: Argument out of range"; return (); } push @newcode, $argbytes % 256; push @newcode, int ($argbytes/256); } else { if ($argbytes < 0 or $argbytes > 0xFF) { push @errors, "$linenum: Argument out of range"; return (); } push @newcode, $argbytes; } return @newcode; } # Otherwise, it's Absolute or Zero page. my $arg = $token; my $argbytes = argval $arg, $linenum; if ($argbytes == -1) { &$badlabel("$opcode $stuff", $pc, $linenum, $arg, $offset); return (0, 0, 0xEA); # NOP for Zero-page cases. Note that forward # references to ZP variables will be slower than # absolute cases. Be warned. } $token = getnexttoken $toklist; my ($modeabs, $modeabsname, $modezp, $modezpname); my $ok = 0; if ($token eq '') { ($modeabs, $modeabsname) = ('abs', 'Absolute'); ($modezp, $modezpname) = ('zp', 'Zero Page'); $ok = 1; } elsif ($token eq ',') { $token = getnexttoken $toklist; if ($token =~ /^x$/i) { ($modeabs, $modeabsname) = ('absx', 'Absolute, X'); ($modezp, $modezpname) = ('zpx', 'Zero Page, X'); $ok = 1; } if ($token =~ /^y$/i) { ($modeabs, $modeabsname) = ('absy', 'Absolute, Y'); ($modezp, $modezpname) = ('zpy', 'Zero Page, Y'); $ok = 1; } $token = getnexttoken $toklist; $ok = 0 if ($token ne ''); } unless ($ok) { push @errors, "$linenum: Expected register index or no index at all"; return (); } if ($argbytes < 0 or $argbytes > 0xFFFF) { push @errors, "$linenum: Argument out of range"; return (); } # print "Instr: $opcode $stuff : Argument = $argbytes, considering $modezp or $modeabs\n"; if ($argbytes < 256 and exists $opcodes{"${opcode}_$modezp"}) { # print " --- Using $modezp\n"; push @newcode, $opcodes{"${opcode}_$modezp"}; push @newcode, $argbytes; return @newcode; } unless (exists $opcodes{"${opcode}_$modeabs"}) { push @errors, "$linenum: Illegal instruction: $opcode ($modeabsname)"; return (); } push @newcode, $opcodes{"${opcode}_$modeabs"}; push @newcode, $argbytes % 256; push @newcode, int ($argbytes/256); return @newcode; } sub parsefile { my $filename = shift; my $pcbase = shift; my $offset = shift; my $lnum = 1; my @code = (); local *INPUT; open INPUT, $filename or die "Cannot open $filename. Dying painful death"; while () { push @code, parseline($_, $pcbase+@code, $offset+@code, "$filename:$lnum", \&forwardreference); $lnum++; } return @code; } # Main routine { my @code = parsefile ($ARGV[0], 0, 0); # Backpatch forward references while (@forwardlines) { my $line = shift @forwardlines; my $pcs = shift @forwardpcs; my $lnum = shift @forwardlinenums; my $label = shift @forwardlabels; my $offset = shift @forwardoffsets; print sprintf("Repairing: %04x $line ($offset)\n", $pcs); my @fixedcode = parseline($line, $pcs, $offset, $lnum, \&labelerror); splice @code, $offset, $#fixedcode+1, @fixedcode; } if (@errors) { foreach (@errors) { print "ERROR: $_\n"; } } else { # my $count = 0; # foreach (@prelude, @code) { # printf "%02x", $_; # $count = ($count+1) % 16; # if ($count == 8) { print '-'; } # elsif ($count == 0) { print "\n"; } # else { print ' '; } # } # print "\n"; my $outfile = $ARGV[1]; $outfile = "c64.out" if ($outfile eq ""); open OUTPUT, ">$outfile" or die "Failed to create $outfile"; binmode OUTPUT; print OUTPUT pack "c*", @code; } }