#!/usr/bin/perl # The P65 Assembler, v 0.3 # Copyright (c) 2001 Michael Martin # All rights reserved. # # Redistribution and use, with or without modification, are permitted # provided that the following conditions are met: # # - Redistributions of the code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # - The name of Michael Martin may not be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN # ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. use strict; use integer; my %opcodes = ( adc_imm => 0x69, adc_zp => 0x65, adc_zpx => 0x75, adc_abs => 0x6D, adc_absx => 0x7D, adc_absy => 0x79, adc_indx => 0x61, adc_indy => 0x71, and_imm => 0x29, and_zp => 0x25, and_zpx => 0x35, and_abs => 0x2D, and_absx => 0x3D, and_absy => 0x39, and_indx => 0x21, and_indy => 0x31, asl_imp => 0x0A, asl_zp => 0x06, asl_zpx => 0x16, asl_abs => 0x0E, asl_absx => 0x1E, bcc_rel => 0x90, bcs_rel => 0xB0, beq_rel => 0xF0, bit_zp => 0x24, bit_abs => 0x2C, bmi_rel => 0x30, bne_rel => 0xD0, bpl_rel => 0x10, brk_imp => 0x00, bvc_rel => 0x50, bvs_rel => 0x70, clc_imp => 0x18, cld_imp => 0xD8, cli_imp => 0x58, clv_imp => 0xB8, cmp_imm => 0xC9, cmp_zp => 0xC5, cmp_zpx => 0xD5, cmp_abs => 0xCD, cmp_absx => 0xDD, cmp_absy => 0xD9, cmp_indx => 0xC1, cmp_indy => 0xD1, cpx_imm => 0xE0, cpx_zp => 0xE4, cpx_abs => 0xEC, cpy_imm => 0xC0, cpy_zp => 0xC4, cpy_abs => 0xCC, dec_zp => 0xC6, dec_zpx => 0xD6, dec_abs => 0xCE, dec_absx => 0xDE, dex_imp => 0xCA, dey_imp => 0x88, eor_imm => 0x49, eor_zp => 0x45, eor_zpx => 0x55, eor_abs => 0x4D, eor_absx => 0x5D, eor_absy => 0x59, eor_indx => 0x41, eor_indy => 0x51, inc_zp => 0xE6, inc_zpx => 0xF6, inc_abs => 0xEE, inc_absx => 0xFE, inx_imp => 0xE8, iny_imp => 0xC8, jmp_abs => 0x4C, jmp_ind => 0x6C, jsr_abs => 0x20, lda_imm => 0xA9, lda_zp => 0xA5, lda_zpx => 0xB5, lda_abs => 0xAD, lda_absx => 0xBD, lda_absy => 0xB9, lda_indx => 0xA1, lda_indy => 0xB1, ldx_imm => 0xA2, ldx_zp => 0xA6, ldx_zpy => 0xB6, ldx_abs => 0xAE, ldx_absy => 0xBE, ldy_imm => 0xA0, ldy_zp => 0xA4, ldy_zpx => 0xB4, ldy_abs => 0xAC, ldy_absx => 0xBC, lsr_imp => 0x4A, lsr_zp => 0x46, lsr_zpy => 0x56, lsr_abs => 0x4E, lsr_absy => 0x5E, nop_imp => 0xEA, ora_imm => 0x09, ora_zp => 0x05, ora_zpx => 0x15, ora_abs => 0x0D, ora_absx => 0x1D, ora_absy => 0x19, ora_indx => 0x01, ora_indy => 0x11, pha_imp => 0x48, php_imp => 0x08, pla_imp => 0x68, plp_imp => 0x28, rol_imp => 0x2A, rol_zp => 0x26, rol_zpx => 0x36, rol_abs => 0x2E, rol_absx => 0x3E, ror_imp => 0x6A, ror_zp => 0x66, ror_zpx => 0x76, ror_abs => 0x6E, ror_absx => 0x7E, rti_imp => 0x40, rts_imp => 0x60, sbc_imm => 0xE9, sbc_zp => 0xE5, sbc_zpx => 0xF5, sbc_abs => 0xED, sbc_absx => 0xFD, sbc_absy => 0xF9, sbc_indx => 0xE1, sbc_indy => 0xF1, sec_imp => 0x38, sed_imp => 0xF8, sei_imp => 0x78, sta_zp => 0x85, sta_zpx => 0x95, sta_abs => 0x8D, sta_absx => 0x9D, sta_absy => 0x99, sta_indx => 0x81, sta_indy => 0x91, stx_zp => 0x86, stx_zpy => 0x96, stx_abs => 0x8E, sty_zp => 0x84, sty_zpx => 0x94, sty_abs => 0x8C, tax_imp => 0xAA, tay_imp => 0xA8, tya_imp => 0x98, tsx_imp => 0xBA, txa_imp => 0x8A, txs_imp => 0x9A, tya_imp => 0x98 ); # If $op is a valid opcode, $instrs =~ /-$op-/ my $instrs="-adc-and-asl-bcc-bcs-beq-bit-bmi-bne-bpl-brk-bvc-bvs-clc-cld-cli-clv-cmp-cpx-cpy-dec-dex-dey-eor-inc-inx-iny-jmp-jsr-lda-ldx-ldy-lsr-nop-ora-pha-php-pla-plp-rol-ror-rti-rts-sbc-sec-sed-sei-sta-stx-sty-tax-tay-tsx-txa-txs-tya-"; # Mapping of addressing mode mnemonics to real names my %address_modes = ( imp => "Implied", imm => "Immediate", rel => "Relative", abs => "Absolute", absx => "Absolute, X", absy => "Absolute, Y", zp => "Zero Page", zpx => "Zero Page, X", zpy => "Zero Page, Y", ind => "Indirect", indx => "(Indirect, X)", indy => "(Indirect), Y" ); # Pragma dispatch table my %pragmas = ( raw => \&pragma_raw, ascii => \&pragma_ascii, alias => \&pragma_alias, address => \&pragma_address, include => \&pragma_include, advance => \&pragma_advance, link => \&pragma_link, org => \&pragma_org ); sub tokenize { my $input = shift; my @result = (); my $value = ""; my ($quotemode, $backspacemode) = (0, 0); my @chars = split //, $input; for (@chars) { if ($backspacemode) { $backspacemode = 0; $value .= $_; } elsif ($_ eq "\\") { $backspacemode = 1; } elsif ($quotemode) { if ($_ eq '"') { $quotemode = 0; } else { $value .= $_; } } else { if ($_ =~ /\s/) { if ($value ne "") { last if ($value =~ /^;/); # skip comments push @result, $value; $value = ""; } } elsif ($_ =~ /[\#\,\(\)\:\.]/) { if ($value ne "") { last if ($value =~ /^;/); # skip comments push @result, $value; $value = ""; } push @result, $_; } elsif ($_ eq '"') { if ($value ne "") { last if ($value =~ /^;/); # skip comments push @result, $value; } $value = '"'; $quotemode = 1; } else { $value .= $_; } } } if ($backspacemode) { asmerror("Cannot end a line with a backspace"); } if ($quotemode) { asmerror("Unterminated string constant"); } if (($value ne "") && ($value !~ /^;/)) { push @result, $value; $value = ""; } push @result, "#EOL#"; return @result; } # Parsing data is global! my @line; # Current line my $pc; # Current program counter my $linenum; # Current line number my $currentfile; # Current file name my %labels = ( ); # Label -> PC hash my @IR = ( ); # Intermediate Representation list my $errorcount = 0; sub asmerror { my $err = shift; print "ERROR: $currentfile:$linenum: $err\n"; $errorcount++; } sub argval { my $arg = shift; my $firstchar = substr($arg, 0, 1); my $rest = substr($arg,1); # print "Parsing $arg, checking possible flag char $firstchar.\n"; # Check '<', '>' prefixes if ($firstchar eq '<') { # print "Desires low byte.\n"; my $result = argval($rest); return -1 if ($result == -1); return $result % 256; } elsif ($firstchar eq '>') { # print "Desires high byte.\n"; my $result = argval($rest); return -1 if ($result == -1 || $result > 0xFFFF); return int($result / 256); # Todo: Check for anonymous label refs here # Check arithmetic suffixes } elsif ($arg =~ /(.*)\s*\+([0-9]+)$/) { my $toadd = int $2; my $base = argval($1); return -1 if ($base == -1); return $base+$toadd; } elsif ($arg =~ /(.*)\s*\-([0-9]+)$/) { my $tosub = int $2; my $base = argval($1); return -1 if ($base == -1); return $base-$tosub; } elsif ($firstchar eq '$') { if ($rest =~ /^[0-9a-f]+$/i) { my $result = hex $rest; return $result; } else { asmerror("Expected a hex value, not '$rest'"); return 0; } } elsif ($firstchar =~ /[1-9]/) { # print "Reading dec value: "; if ($arg =~ /^[0-9]+$/i) { my $result = int $arg; # print "$result\n"; return $result; } else { asmerror("Expected a decimal value, not '$rest'"); return 0; } } elsif ($firstchar eq '0') { # print "Reading oct value: "; if ($arg =~ /^[0-7]+$/i) { my $result = oct $arg; # print "$result\n"; return $result; } else { asmerror("Expected an octal value, not '$rest'"); return 0; } } elsif ($firstchar eq "%") { # print "Reading bin value: "; if ($rest =~ /^[01]+$/) { my $result = 0; my @bits = split //, $rest; for (@bits) { $result *= 2; $result += $_; } # print "$result\n"; return $result; } else { asmerror("Expected a binary value, not '$rest'"); return 0; } } elsif ($firstchar eq "'") { my $char = substr($rest, 0, 1); my $result = ord $char; # print "Reading character value: '$char' => $result\n"; return $result; } else { # print "Label lookup: '$arg' => "; if (exists($labels{$arg})) { my $result = $labels{$arg}; # print "$result\n"; return $result; } else { # print "unknown" return -1; } } } sub islabel { my $tok = shift; return ($tok !~ /^[\$%0-9<>\#\,\(\)\:\.\"]/); } sub expect { my $actual = shift @line; for (@_) { if (lc($actual) eq lc) { return $actual; } } my @expected = ( ); for (@_) { if ($_ eq "#EOL#") { push @expected, "end of line"; } else { push @expected, $_; }} my $expected = join '", "', @expected; asmerror "Expected \"$expected\""; } sub expect_one_byte { my $tok = shift @line; if ($tok =~ /[\#\,\(\)\:\.\"]/) { asmerror "Expected argument"; return (0, 0, ""); } my $val = argval $tok; if ($val > 0xff) { asmerror "Argument out of range (1-byte)"; return (0, 0, ""); } return ($val < 0) ? (1, 0xff, $tok) : (0, $val, ""); } sub expect_two_bytes { my $tok = shift @line; if ($tok =~ /[\#\,\(\)\:\.\"]/) { asmerror "Expected argument"; return (0, 0, ""); } my $val = argval $tok; if ($val > 0xffff) { asmerror "Argument out of range (2-byte)"; return (0, 0, ""); } return ($val < 0) ? (1, 0xffff, $tok) : (0, $val, ""); } sub expect_string { my $tok = shift @line; if ($tok !~ /[^\"]/) { asmerror "Expected string"; return ""; } return substr($tok,1); } sub expect_label { my $tok = shift @line; if (!islabel($tok)) { asmerror "Expected label"; return "#badlabel#"; } return $tok; } sub add_label { my ($newlabel, $pc) = @_; if (exists $labels{$newlabel}) { asmerror "Duplicate label '$newlabel'"; } else { $labels{$newlabel} = $pc; } } sub parse_line { if (@line > 1) { if ($line[1] eq ":") { my $newlabel = expect_label(); expect ":"; add_label($newlabel, $pc); parse_line(); return; } } if (@line > 0) { if ($line[0] eq ".") { parse_pragma(); } elsif ($line[0] eq "#EOL#") { return; } else { parse_instr(); } return; } } sub parse_pragma { expect("."); my $pragma = shift @line; if (exists $pragmas{$pragma}) { &{$pragmas{$pragma}}(@line); } else { asmerror "Unknown pragma .$pragma"; } } sub pragma_raw { pop @line; # Scrap the eol my $rest = join '', @line; if ($rest =~ /^([0-9a-f][0-9a-f])*$/i) { my @data= map hex, $rest =~ /../g; push @IR, [$linenum, $currentfile, "DATA", @data]; $pc += @data; } else { asmerror "Invalid .raw data"; } } sub pragma_ascii { my $str = expect_string(); expect("#EOL#"); my @data = map ord, split (//, $str); push @IR, [$linenum, $currentfile, "DATA", @data]; $pc += @data; } sub pragma_address { my ($forward, $addr, $label) = expect_two_bytes(); expect("#EOL#"); if ($forward) { push @IR, [$linenum, $currentfile, "FORWARD-DATA", $label]; } else { push @IR, [$linenum, $currentfile, "DATA", $addr % 256, int($addr / 256)]; } $pc += 2; } sub pragma_advance { my ($forward, $target, $undef) = expect_two_bytes(); expect("#EOL#"); if ($forward || ($target < $pc)) { asmerror "Invalid .advance"; } else { push @IR, [$linenum, $currentfile, "BLOCK", $target - $pc, 0]; } $pc = $target; } sub pragma_alias { my $newlabel = expect_label(); if (islabel($newlabel)) { my ($forward, $target, $undef) = expect_two_bytes(); if ($forward) { asmerror "Aliases cannot be forward references"; } else { add_label($newlabel, $target); } } } sub pragma_include { pop @line; # remove #EOL# my $rest = join '', @line; parsefile($rest); } sub pragma_link { pop @line; # remove #EOL# if (@line > 1) { my $tok = pop @line; my $file = join '', @line; @line = ($tok); my ($forward, $target, $undef) = expect_two_bytes(); if ($forward) { asmerror "Cannot load to a forward-referenced address"; } else { $pc = $target; parsefile($file); } } else { asmerror ".link lacking filename or address"; } } sub pragma_org { my ($forward, $target, $undef) = expect_two_bytes(); if ($forward) { asmerror "Cannot set PC to a forward-referenced address"; } else { $pc = $target; } } sub parse_instr { my $opcode = lc(shift @line); my ($forward, $size, $mode, $arg, $label); my @sizes; my @modes; if ($instrs !~ /-$opcode-/) { asmerror "Illegal opcode '$opcode'"; $opcode = "nop"; @modes = ("imp"); @sizes = (1); $arg = 0; } elsif ($line[0] eq "#") { expect("#"); @modes = ("imm"); @sizes = (2); ($forward, $arg, $label) = expect_one_byte(); expect("#EOL#"); } elsif ($line[0] eq "(") { # Some indirect mode. expect("("); if (@line > 1 && $line[1] eq ",") { @modes = ("indx"); @sizes = (2); ($forward, $arg, $label) = expect_one_byte(); expect(","); expect("x"); expect(")"); expect("#EOL#"); } elsif (@line > 2 && $line[2] eq "#EOL#") { @modes = ("ind"); @sizes = (3); ($forward, $arg, $label) = expect_two_bytes(); expect(")"); expect("#EOL#"); } else { @modes = ("indy"); @sizes = (2); ($forward, $arg, $label) = expect_one_byte(); expect(")"); expect(","); expect("y"); expect("#EOL#"); } } elsif ($line[0] eq "#EOL#") { @modes = ("imp"); @sizes = (1); $arg = 0; expect("#EOL#"); } else { # Zero page or absolute (possibly indexed) or relative. ($forward, $arg, $label) = expect_two_bytes(); my $tok = expect("#EOL#", ","); if ($tok eq ",") { $tok = expect("x", "y"); if ($tok eq "x") { @modes = ($arg < 256) ? ("zpx", "absx") : ("absx"); @sizes = ($arg < 256) ? (2, 3) : (3); } else { @modes = ($arg < 256) ? ("zpy", "absy") : ("absy"); @sizes = ($arg < 256) ? (2, 3) : (3); } expect("#EOL#"); } else { @modes = ($arg < 256) ? ("zp", "abs", "rel") : ("abs", "rel"); @sizes = ($arg < 256) ? (2, 3, 2) : (3, 2); } } for (@modes) { $mode = $_; $size = shift @sizes; last if (exists $opcodes{"${opcode}_${mode}"}); } if ($forward) { push @IR, [ $linenum, $currentfile, "FORWARD-INSTR", $pc, $opcode, $mode, $label ]; } else { push @IR, [ $linenum, $currentfile, "INSTR", $pc, $opcode, $mode, $arg ]; } $pc += $size; } sub parsefile { my $filename = shift; local *INPUT; my $oldfilename = $currentfile; my $oldlinenum = $linenum; $currentfile = $filename; $linenum = 0; open INPUT, $filename or die "Cannot open $filename. Dying painful death"; while () { $linenum++; @line = tokenize($_); parse_line; } $linenum = $oldlinenum; $currentfile = $oldfilename; } # Dispatch table for tranforming the IR into code. my %backend = ( "INSTR" => \&ir_instr, "DATA" => \&ir_data, "BLOCK" => \&ir_block, "FORWARD-INSTR" => \&ir_forwardinstr, "FORWARD-DATA" => \&ir_forwarddata ); sub ir_instr { my ($pc, $op, $mode, $arg) = @_; my $opcode; my $modestr = $address_modes{$mode}; # print(" $currentfile:$linenum ${op}_$mode $arg\n"); if (exists $opcodes{"${op}_$mode"}) { $opcode = $opcodes{"${op}_$mode"}; if ($mode eq "rel") { my $reltarget = $arg - ($pc + 2); if ($reltarget < -128 or $reltarget > 127) { asmerror "Branch out of range"; return (0, 0); } return ($opcode, ($reltarget < 0) ? 256 + $reltarget : $reltarget); } elsif ($mode eq "imp") { return ($opcode); } elsif ($mode =~ /zp|zpx|zpy|indx|indy|imm/) { if ($arg < 0 || $arg > 255) { asmerror "$modestr argument out of range"; return (0, 0); } return ($opcode, $arg); } else { if ($arg < 0 || $arg > 0xffff) { asmerror "$modestr argument out of range"; return (0, 0); } return ($opcode, $arg%256, int($arg/256)); } } else { asmerror("$op does not have addressing mode: $modestr"); return (); } } sub ir_data { # print(" $currentfile:$linenum (@_)\n"); return @_; } sub ir_forwardinstr { my ($pc, $opcode, $mode, $unk) = @_; @line = ($unk); my ($forward, $data, $lbl) = expect_two_bytes(); if ($forward) { asmerror("Unresolved label '$lbl'"); } return ir_instr($pc, $opcode, $mode, $data); } sub ir_forwarddata { @line = @_; my ($forward, $data, $lbl) = expect_two_bytes(); if ($forward) { asmerror("Unresolved label '$lbl'"); } return ir_data($data % 256, int($data/256)); } sub ir_block { my ($number, $type) = @_; # print ("(insert $number '${type}'s here...)\n"); return ($type) x $number; } # Main routine. $pc = $0; parsefile($ARGV[0]); my @code = ( ); for (@IR) { my @inst = @$_; $linenum = shift @inst; $currentfile = shift @inst; my $ir_type = shift @inst; if (exists $backend{$ir_type}) { push @code, &{$backend{$ir_type}}(@inst); } else { asmerror "Unknown IR type $ir_type"; } } if($errorcount == 0) { my $outfile = $ARGV[1]; $outfile = "p65.out" if ($outfile eq ""); open OUTPUT, ">$outfile" or die "Failed to create $outfile"; binmode OUTPUT; print OUTPUT pack "c*", @code; } else { print "$errorcount errors"; }