brainf*ck compiler for 32 bit linux.

Linux specific questions.
Post Reply
yetifoot
Posts: 1710
Joined: Sep 11, 2005 7:08
Location: England
Contact:

brainf*ck compiler for 32 bit linux.

Post by yetifoot »

Here's a fun little program. It takes as its input a brainf*ck program, and outputs a 32 bit linux static binary. Who needs such luxuries as an assembler or linker when you can output RAW x86 MACHINE CODE and your own ELF header?

Instructions.

Save the files in the code blocks as bfc.bas and hello.b

Do these instruction at a console.

fbc bfc.bas
./bfc < hello.b > hello
chmod +x hello
./hello

bfc.bas

Code: Select all

#include once "crt.bi"

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' WELCOME to a horrendous piece of filth!  This program generates static binaries
' suitable for running on 32 bit linux.  It uses lots of horrible hax and trix
' to try and keep the output code small, and sometimes not so slow either.
' You can use this program like this:
' ./bfc < hello.b > hello
' chmod +x hello
' ./hello

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' The ELF header for linux binaries.  We don't need the section header one here
' as it's all one section.
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

#define EI_NIDENT 16

type Elf32_Half as ushort
type Elf32_Word as uinteger
type Elf32_Off  as uinteger
type Elf32_Addr as uinteger

type Elf32_Ehdr field=1
        as ubyte           e_ident(0 to EI_NIDENT - 1)
        as Elf32_Half      e_type
        as Elf32_Half      e_machine
        as Elf32_Word      e_version
        as Elf32_Addr      e_entry
        as Elf32_Off       e_phoff
        as Elf32_Off       e_shoff
        as Elf32_Word      e_flags
        as Elf32_Half      e_ehsize
        as Elf32_Half      e_phentsize
        as Elf32_Half      e_phnum
        as Elf32_Half      e_shentsize
        as Elf32_Half      e_shnum
        as Elf32_Half      e_shtrndx
end type

type Elf32_Phdr field=1
        as Elf32_Word      p_type
        as Elf32_Word      p_offset
        as Elf32_Word      p_vaddr
        as Elf32_Word      p_paddr
        as Elf32_Word      p_filesz
        as Elf32_Word      p_memsz
        as Elf32_Word      p_flags
        as Elf32_Word      p_align
end type

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Functions that deal with reading the program from the stdin
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

dim shared as string getchar_buffer ' a buffer will be kept of the input to allow
                                    ' peeking for things like [-] in order to optimize

' get_bf_char reads from stdin until it finds a legit bf instruction char or EOF, then
' returns it
function get_bf_char() as integer

	dim as integer c

	c = getchar()

	if c = EOF_ then return c

	select case chr(c)
		case "+", "-", ">", "<", ".", ",", "[", "]"
			return c
		case else
			return get_bf_char() ' was a none bf instruction character
			                     ' so try again.
	end select

end function

' Try and keep the buffer full up to 1024 bytes
sub fill_buffer()

	dim as integer c

	while len(getchar_buffer) < 1024
		c = get_bf_char()
		if c = EOF_ then exit while
		getchar_buffer += chr(c)
	wend

end sub

' The function that the actual parser will use to read an instruction
function readinst() as integer

	fill_buffer()

	if len(getchar_buffer) = 0 then
		return EOF_
	else
		function = getchar_buffer[0]
		getchar_buffer = mid(getchar_buffer, 2)
	end if

end function

' Peek in the buffer, for looking ahead
function peekinst(byval idx as integer) as integer

	fill_buffer()

	if idx >= len(getchar_buffer) then
		return EOF_
	else
		function = getchar_buffer[idx]
	end if

end function

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''

dim shared as string code, rtlib ' code will contain the x86 code, rtlib will contain the
                                 ' x86 code to implement getchar/putchar
dim shared as integer c ' c is the current char/instruction

sub parse()

	' loop over the bf code
	while c <> EOF_
		dim as integer cnt
		dim as integer op = c

		' look for repeating instructions, in order to optimze
		select case chr(c)
			case "+", "-", ">", "<"
				while c = op
					cnt += 1
					c = readinst()
				wend
		end select

		' create the x86 code!
		select case chr(op)
			case "+"
				if cnt = 1 then
					code += !"\xFE\x01" ' inc byte [ecx]
				else
					code += !"\x80\x01" & chr(cnt and 255) ' add byte [ecx], cnt
				end if
				continue while
			case "-"
				if cnt = 1 then
					code += !"\xFE\x09" ' dec byte [ecx]
				else
					code += !"\x80\x29" & chr(cnt and 255) ' sub byte [ecx], cnt
				end if
				continue while
			case ">"
				if cnt < 6 then
					for i as integer = 0 to cnt - 1
						code += !"\x41" ' inc ecx
					next i
				else
					code += !"\x81\xC1" & mki(cnt) ' add ecx, cnt
				end if
				continue while
			case "<"
				if cnt < 6 then
					for i as integer = 0 to cnt - 1
						code += !"\x49" ' dec ecx
					next i
				else
					code += !"\x81\xE9" & mki(cnt) ' sub ecx, cnt
				end if
				continue while
			case "."
				code += !"\xFF\xD7" ' call edi (write)
			case ","
				code += !"\xFF\xD6" ' call esi (read)
			case "["
				if (peekinst(0) = asc("-")) and (peekinst(1) = asc("]")) then
					' [-] in bf code effectively zeroes a cell, so optimize that
					code += !"\xC6\x01" & chr(0) ' mov byte [ecx],0x0
					c = readinst() ' dump -
					c = readinst() ' dump ]
				else
					c = readinst() ' dump [

					dim as integer addr1 = len(code)
					code += !"\x80\x39" & chr(&H00)     ' cmp byte [ecx],0x0
					code += !"\x0F\x84\xFF\xFF\xFF\xFF" ' jz dword 0xFFFFFFFF
					dim as integer addr1a = len(code)   ' store patch address

					parse() ' recurse! to deal with the inside of the loop
	
					dim as integer addr2 = len(code)
					code += !"\x80\x39" & chr(&H00)          ' cmp byte [ecx],0x0
					dim as integer temp = addr1 - addr2
					if (temp >= -132) and (temp <= 123) then
						code += !"\x75" & chr(temp+4)      ' jnz 0xFF
					else
						code += !"\x0F\x85" & mki(temp)  ' jnz dword 0xFFFFFFFF
					end if
					dim as integer addr2a = len(code)

					*cast(uinteger ptr, @code[addr1+5]) = addr2a - addr1a ' patch the first jz
				end if
			case "]"
				exit sub ' unrecurse!
		end select
		c = readinst()
	wend

end sub

' The runtime library... putchar/getchar implemented as syscalls
rtlib += !"\x31\xC0"         ' xor eax,eax
rtlib += !"\x0C\x04"         ' or al,0x4 (sys_write)
rtlib += !"\x31\xDB"         ' xor ebx,ebx
rtlib += !"\x43"             ' inc ebx (fd=stdout)
rtlib += !"\xCD\x80"         ' int 0x80
rtlib += !"\xC3"             ' ret
rtlib += !"\x31\xC0"         ' xor eax,eax
rtlib += !"\x0C\x03"         ' or al,0x3 (sys_read)
rtlib += !"\x31\xDB"         ' xor ebx,ebx (fd=stdin)
rtlib += !"\xCD\x80"         ' int 0x80
rtlib += !"\xC3"             ' ret

' These 3 lines will use a trick to find the real address of the runtime library
' and store the address of the putchar function in edi
code += !"\xE8" & mki(0)                ' call dword 0x5
code += !"\x5F"                         ' pop edi
code += !"\x83\xEF" & chr(5+len(rtlib)) ' sub edi,byte +0x5

' and store the getchar function address in esi
code += !"\x89\xFE"                     ' mov esi,edi
code += !"\x83\xC6\x0A"                 ' add esi,byte +0xa

' allocate 30000 bytes on the stack, and put a pointer to it in ecx.  ecx will
' then be used by all code as the data pointer for the program.  This is
' convenient because the syscalls read/write take their pointer in ecx too.
code += !"\x81\xEC" & mki(30000) ' sub esp,0x7530
' Perhaps at this point some code should be created that will zero that memory..
' but at least on my machine the stack appears to be pre-zeroed.  This is likely
' a security feature, so if the memory is not zero on your box, then maybe your
' distro is not very secure!  Anyway, this whole program is a hack, so I don't
' mind relying on something like this.
code += !"\x89\xE1"              ' mov ecx,esp
code += !"\xBA" & mki(&H01)      ' mov edx,0x1 (count=1) this is used for the syscalls

c = readinst() ' prime the pump!

' Now parse and generate the actual program code
parse()

' And finalise the code with a syscall to make a clean exit.
code += !"\x31\xC0"     ' xor eax,eax
code += !"\x40"         ' inc eax (sys_exit)
code += !"\x31\xDB"     ' xor ebx,ebx
code += !"\xCD\x80"     ' int 0x80

' Now we build the ELF headers
dim as Elf32_Ehdr eh
dim as Elf32_Phdr ph
dim as uinteger origin = &H08048000 ' where the program will be loaded

eh.e_ident(0) = &H7F
eh.e_ident(1) = asc("E")
eh.e_ident(2) = asc("L")
eh.e_ident(3) = asc("F")
eh.e_ident(4) = 1
eh.e_ident(5) = 1
eh.e_ident(6) = 1

eh.e_type = 2
eh.e_machine = 3
eh.e_version = 1
eh.e_entry = origin + sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) + len(rtlib)
eh.e_phoff = sizeof(Elf32_Ehdr)
eh.e_ehsize = sizeof(Elf32_Ehdr)
eh.e_phentsize = sizeof(Elf32_Phdr)
eh.e_phnum = 1

ph.p_type = 1
ph.p_vaddr = origin
ph.p_paddr = origin
ph.p_filesz = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) + len(code) + len(rtlib)
ph.p_memsz = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) + len(code) + len(rtlib)
ph.p_flags = 5
ph.p_align = &H1000

' transfer those headers to strings so they can be PRINT'ed

dim as string eh_s = space(sizeof(Elf32_Ehdr))
dim as string ph_s = space(sizeof(Elf32_Phdr))

*cast(Elf32_Ehdr ptr, strptr(eh_s)) = eh
*cast(Elf32_Phdr ptr, strptr(ph_s)) = ph

' Now send the whole thing to stdout! (You should do ./fbc < hello.b > hello or similar)
print eh_s; ph_s; rtlib; code;

' FIN
hello.b ripped from the wikipedia page on brainf*ck

Code: Select all

+++++ +++++             initialize counter (cell #0) to 10
[                       use loop to set the next four cells to 70/100/30/10
    > +++++ ++              add  7 to cell #1
    > +++++ +++++           add 10 to cell #2 
    > +++                   add  3 to cell #3
    > +                     add  1 to cell #4
    <<<< -                  decrement counter (cell #0)
]                   
> ++ .                  print 'H'
> + .                   print 'e'
+++++ ++ .              print 'l'
.                       print 'l'
+++ .                   print 'o'
> ++ .                  print ' '
<< +++++ +++++ +++++ .  print 'W'
> .                     print 'o'
+++ .                   print 'r'
----- - .               print 'l'
----- --- .             print 'd'
> + .                   print '!'
> .                     print '\n'
wolfman775
Posts: 104
Joined: Apr 30, 2009 15:20
Location: Dumbarton, Scotland

Post by wolfman775 »

Interesting language. I couldn't stop loling at this from wikipedia tho:
it is 1.5 times more memory hungry than the previous example, requiring 3 bytes of memory
D.J.Peters
Posts: 8586
Joined: May 28, 2005 3:28
Contact:

Post by D.J.Peters »

wolfman775 wrote:Interesting language. I couldn't stop loling at this from wikipedia tho:
it is 1.5 times more memory hungry than the previous example, requiring 3 bytes of memory
:lol:
cha0s
Site Admin
Posts: 5319
Joined: May 27, 2005 6:42
Location: USA
Contact:

Post by cha0s »

You're sick, yetifoot! Sick!
rugxulo
Posts: 219
Joined: Jun 30, 2006 5:31
Location: Usono (aka, USA)
Contact:

Post by rugxulo »

Nice job! Awesome stuff.

BTW, I hope this doesn't rain on your parade (and no, I didn't write it). ;-)
cha0s
Site Admin
Posts: 5319
Joined: May 27, 2005 6:42
Location: USA
Contact:

Post by cha0s »

rugxulo wrote:Nice job! Awesome stuff.

BTW, I hope this doesn't rain on your parade (and no, I didn't write it). ;-)
... Now that truly lives up to the name.
Post Reply