Fast filled polygons in BASIC (all video modes + clipping)

D.J.Peters · Post by **D.J.Peters** » Feb 02, 2015 2:27

Fast filled convex polygons in BASIC (all video modes + clipping)

Joshy

type vector2d
  as integer x,y
end type

sub FilledPolygon(TargetPtr as any ptr, _ ' 0 = screen otherwise image ptr
                  p()       as vector2d , _ ' the screen coords (x,y)
                  n         as integer  , _ ' how many coords in array
                  red       as ubyte    , _ ' color
                  green     as ubyte    , _
                  blue      as ubyte)

  static as integer palflag=0
  dim as integer TargetWidth=any,TargetHeight=any,TargetBytes=any,TargetPitch=any
  dim as integer f=any,t=any,b=any,l=any,r=any
  dim as integer lc=any,nlc=any,rc=any,nrc=any
  dim as integer d1=any,s1=any,d2=any,s2 =any,cl=any,cr=any
  dim as ubyte  c8 =any
  dim as ushort c16=any
  dim as ulong  c24=any
  dim as any ptr row=any

  n-=1
  if n<2 then exit sub

  if TargetPtr=0 then
    TargetPtr=screenptr() ' first pixel top left on screen
    if TargetPtr=0 then exit sub
    ScreenInfo    _
    TargetWidth , _
    TargetHeight,, _
    TargetBytes , _
    TargetPitch
  else
    ImageInfo     _
    TargetPtr   , _
    TargetWidth , _
    TargetHeight, _
    TargetBytes , _
    TargetPitch , _
    TargetPtr
  end if

  select case as const TargetBytes
  case 1
    if palflag=0 then ' define a RGB palette only once
      dim as integer r8,g8,b8
      for i as uinteger= 0 to 255
        r8=(((i shr 5) and &H07) * 255) / 7
        g8=(((i shr 2) and &H07) * 255) / 7
        b8=(((i shr 0) and &H03) * 255) / 3
        palette i,r8,g8,b8
      next
     palflag=1
    end if
    #define RGB8(_r,_g,_b) ( (_r and &HE0) or ((_g and &HE0) shr 3) or ((_b and &HC0) shr 6) )
    c8=rgb8(red,green,blue)
    #undef RGB8
  case 2
    palflag=0
    #define RGB16(_r,_g,_b) (((_r shr 3) shl 11) or ((_g shr 2) shl 5) or (_b shr 3))
    c16=rgb16(red,green,blue)
    #undef RGB16
  case 4
    palflag=0
    c24=rgb(red,green,blue)
  end select

  ' top bottom left right (clipping)
  #define mr 1000000
  t= mr: b=-mr :  l= mr : r=-mr
  #undef mr
  for nc as integer=0 to n
    with p(nc)
      if .y<t then t=.y:f=nc ' top
      if .y>b then b=.y      ' bottom
      if .x<l then l=.x      ' left
      if .x>r then r=.x      ' right
    end with
  next
  ' clip
  if l>=TargetWidth  then exit sub  ' left is outside
  if r<1             then exit sub  ' right is outside
  if t>=TargetHeight then exit sub  ' top is outside
  if b<0             then exit sub  ' bottom is outside
  if (r-l)<1         then exit sub  ' 0 pixels width
  if b>=TargetHeight then b=TargetHeight-1 ' clip bottom
  if (b-t)<1         then exit sub  ' 0 pixels height
  ' left and next left counter
  lc=f:nlc=lc-1:if nlc<0 then nlc=n
  ' right and next right counter
  rc=f:nrc=rc+1:if nrc>n then nrc=0
 
  if p(nlc).x>p(nrc).x then exit sub
  
  row=TargetPtr+t*TargetPitch
  #define SHIFTS 8 ' fixed point format
  
  ' from top to bottom
  while t<b
    if t=p(lc).y then
      while p(lc).y=p(nlc).y
        lc=nlc:nlc-=1:if nlc<0 then nlc=n
      wend
      d1=p(lc).x shl SHIFTS
      s1=((p(nlc).x-p(lc).x) shl SHIFTS)/(p(nlc).y-p(lc).y)
      lc = nlc
    end if
    if t=p(rc).y then
      while p(rc).y=p(nrc).y
        rc=nrc:nrc+=1:if nrc>n then nrc=0
      wend
      d2=p(rc).x shl SHIFTS
      s2=((p(nrc).x-p(rc).x) shl SHIFTS)/(p(nrc).y-p(rc).y)
      rc=nrc
    end if
    if t>-1 then
      l=d1 shr SHIFTS ' most left  pixel
      r=d2 shr SHIFTS ' most right pixel
      if l>r then swap l,r
      if l<TargetWidth andalso r>-1 then
        if l<0 then l=0
        if r>=TargetWidth then r=TargetWidth-1
        select case as const TargetBytes
        case 1
          var s=cptr(ubyte ptr,row)+l
          var e=cptr(ubyte ptr,row)+r
          while s<e : *s=c8  : s+=1:wend
          *e=c8
        case 2
          var s=cptr(ushort ptr,row)+l
          var e=cptr(ushort ptr,row)+r
          while s<e : *s=c16 : s+=1:wend
          *e=c16
        case 4
          var s=cptr(ulong ptr,row)+l
          var e=cptr(ulong ptr,row)+r
          while s<e : *s=c24 : s+=1:wend
          *e=c24
        end select
      end if
    end if
    t+=1
    d1+=s1
    d2+=s2
    row+=TargetPitch
  wend
  #undef SHIFTS
end sub

'
' main
'
const n=11
dim as vector2d t(2),q(3),p(n)
dim as integer fps,frames,mx,my,ox,oy
dim as single  w1,w2,w3
dim as double  t1,t2



screenres 640,480,8,2
screenset 1,0

t1=timer
while inkey=""
  if getmouse(ox,oy)=0 then
    mx=ox:my=oy
  end if
  for i as integer=0 to 2
    with t(i) ' triangle
      .x=mx+cos(w1+i*6.28/3)*50-100
      .y=my+sin(w1+i*6.28/3)*50
    end with
  next
  for i as integer=0 to 3
    with q(i) ' quad
      .x=mx+cos(w2+i*6.28/4)*50
      .y=my+sin(w2+i*6.28/4)*50
    end with
  next
  for i as integer=0 to n
    with p(i) ' polygon
      .x=mx+cos(w3+i*6.28/n)*50+100
      .y=my+sin(w3+i*6.28/n)*50
    end with
  next
  w1+=0.0001
  w2+=0.0002
  w3+=0.0003
  cls
  FilledPolygon 0,t(),3, 255, 32, 32 ' triangle
  FilledPolygon 0,q(),4,  32, 32,255 ' quad
  FilledPolygon 0,p(),n,  32,255, 32 ' polygon
  draw string (0,0),"move the mouse ... fps: " & fps,255
  flip
  frames+=1
  if frames mod 1000=0 then
    t2=timer
    fps=1000/(t2-t1): t1=t2
  end if
  ' sleep 10 ' disabled only for fps measurement
wend

BasicCoder2 · Post by **BasicCoder2** » Feb 02, 2015 3:35

I don't know what algorithm you are using but decided to see what kinds of polygons it could handle.

Code: Select all

'
' main
'
'read points
dim as integer n
restore shape1      'restore to whatever shape in data statements required
read n
dim as vector2d p(n)
for i as integer = 0 to n-1
    read p(i).x
    read p(i).y
    p(i).x = p(i).x + 200
    p(i).y = p(i).y + 200
next i


screenres 640,480,32 ' you can use 8,15,16,24 or 32 bits
  
  'polygone targetPtr, coord, number, r, g, b
  
  screenlock
    cls
    polygone 0,p(),n,  100,255, 100
    'overlay outline
    for i as integer = 0 to  n-2
        line (p(i).x,p(i).y)-(p(i+1).x,p(i+1).y),rgb(255,255,255)
    next i
    line (p(0).x,p(0).y)-(p(n-1).x,p(n-1).y),rgb(255,255,255)
    
  screenunlock
  
sleep

shape1:        'complex polygon bow tie
data 4   'number of points
data -40,-40
data 40,40
data 40,-40
data -40,40

shape2:
data 4    'number of points
data -40,-40    'square
data  40,-40
data  40, 40
data -40, 40

shape3:
data 8
data -40,-40
data  40,-40
data  40, 40
data -40, 40
data -40, 20
data  20, 20
data  20,-20
data -40,-20

D.J.Peters · Post by **D.J.Peters** » Feb 02, 2015 4:24

BasicCoder2 your shape1 isn't a convex or concave polygon looks more as a polyline. (I don't know the right word in english)

My algo is primary for fast filled convex polygons.

You can describe any filled shape with a set of filled triangles.

For filled polylines you need an algo with rules for the right winding order.

Back to my algo GCC don't optimize it and you can use RGB with 8 and 16-bit also.

The tricky part are i scan the borders with fixed point math not slow float or double.

Joshy

BasicCoder2 · Post by **BasicCoder2** » Feb 02, 2015 5:04

D.J.Peters,
BasicCoder2 your shape1 isn't a convex or concave polygon looks more as a polyline. (I don't know the right word in english)

A complex (self-intersecting) polygon.

dafhi · Post by **dafhi** » Feb 06, 2015 17:40

First i must congratulate DJP on this fine concept. I've reduced the number of parameters in Polygone(), and added some Dims to compensate. Everything following the Dims is unchanged.

Anyway I found a bug.

Code: Select all

type vector2d
  as integer x,y
end type

sub Polygone(TargetPtr as any ptr, _ ' 0 = screen otherwise image ptr
             p()       as vector2d , _ ' the coords (x,y)
             _col      as uinteger)

  dim as ubyte    red = _col shr 16
  dim as ubyte    green = _col shr 8
  dim as ubyte    blue = _col
  static as integer palflag=0
  dim as integer  TargetWidth=any,TargetHeight=any,TargetBytes=any,TargetPitch=any
  dim as integer  n = ubound(p)-lbound(p)+1
  dim as integer  f  =any,t =any,b=any,l =any,r=any
  dim as integer  lc =any,nlc=any,rc=any,nrc=any
  dim as integer  d1 =any,s1 =any,d2=any,s2 =any,cl=any,cr=any
  dim as ubyte    c8=any ' 8 bit color
  dim as ushort   c16=any ' 16 bit color
  dim as uinteger c24=any ' 24 bit color
  dim as any ptr  row=any

  n-=1:if n<2 then exit sub

  if TargetPtr=0 then
    screeninfo    _
    TargetWidth , _
    TargetHeight, _
    TargetBytes ,,_
    TargetPitch
    TargetBytes shr=3 ' bits to bytes
    TargetPtr=screenptr() ' first pixel top left
  else
    TargetBytes  = cptr(uinteger ptr,TargetPtr)[1]
    TargetWidth  = cptr(uinteger ptr,TargetPtr)[2]
    TargetHeight = cptr(uinteger ptr,TargetPtr)[3]
    TargetPitch  = cptr(uinteger ptr,TargetPtr)[4]
    TargetPtr    += 32 ' first pixel after the header
  end if

  select case as const TargetBytes
  case 1
    if palflag=0 then
      dim as integer r8,g8,b8
      for i as uinteger= 0 to 255
        r8=(((i shr 5) and &H07) * 255) / 7
        g8=(((i shr 2) and &H07) * 255) / 7
        b8=(((i shr 0) and &H03) * 255) / 3
        palette i,r8,g8,b8
      next
     palflag=1
    end if
    #define RGB8(_r,_g,_b) ( (_r and &HE0) or ((_g and &HE0) shr 3) or ((_b and &HC0) shr 6) )
    c8=rgb8(red,green,blue)
    #undef RGB8
  case 2
    palflag=0
    #define RGB16(_r,_g,_b) (((_r shr 3) shl 11) or ((_g shr 2) shl 5) or (_b shr 3))
    c16=rgb16(red,green,blue)
    #undef RGB16
  case 4
    palflag=0
    c24=rgb(red,green,blue)
  end select

  ' top bottom left right (clipping)
  #define mr 1000000
  t= mr: b=-mr :  l= mr : r=-mr
  #undef mr
  for nc as integer=0 to n
    with p(nc)
      if .y<t then t=.y:f=nc ' top
      if .y>b then b=.y      ' bottom
      if .x<l then l=.x      ' left
      if .x>r then r=.x      ' right
    end with
  next
  ' clip
  if l>=TargetWidth  then exit sub  ' left is outside
  if r<1             then exit sub  ' right is outside
  if t>=TargetHeight then exit sub  ' top is outside
  if b<0             then exit sub  ' bottom is outside
  if (r-l)<1         then exit sub  ' 0 pixels width
  if b>=TargetHeight then b=TargetHeight-1 ' clip bottom
  if (b-t)<1         then exit sub  ' 0 pixels height
  ' left and next left counter
  lc=f:nlc=lc-1:if nlc<0 then nlc=n
  ' right and next right counter
  rc=f:nrc=rc+1:if nrc>n then nrc=0
 
  if p(nlc).x>p(nrc).x then exit sub
  row=TargetPtr+t*TargetPitch
  #define SHIFTS   8 ' 24:8 fixed point format
  ' from top to bottom
  while t<b
    if t=p(lc).y then
      while p(lc).y=p(nlc).y
        lc=nlc:nlc-=1:if nlc<0 then nlc=n
      wend
      d1=p(lc).x shl SHIFTS
      s1=((p(nlc).x-p(lc).x) shl SHIFTS)/(p(nlc).y-p(lc).y)
      lc = nlc
    end if
    if t=p(rc).y then
      while p(rc).y=p(nrc).y
        rc=nrc:nrc+=1:if nrc>n then nrc=0
      wend
      d2=p(rc).x shl SHIFTS
      s2=((p(nrc).x-p(rc).x) shl SHIFTS)/(p(nrc).y-p(rc).y)
      rc=nrc
    end if
    if t>-1 then
      l=d1 shr SHIFTS ' most left  pixel
      r=d2 shr SHIFTS ' most right pixel
      if l>r then swap l,r
      if l<TargetWidth andalso r>-1 then
        if l<0 then l=0
        if r>=TargetWidth then r=TargetWidth-1
        select case as const TargetBytes
        case 4
          var s=cptr(uinteger ptr,row)+l
          var e=cptr(uinteger ptr,row)+r
          while s<e : *s=c24 : s+=1:wend
          *e=c8
        case 2
          var s=cptr(ushort ptr,row)+l
          var e=cptr(ushort ptr,row)+r
          while s<e : *s=c16 : s+=1:wend
          *e=c16
        case 1
          var s=cptr(ubyte ptr,row)+l
          var e=cptr(ubyte ptr,row)+r
          while s<e : *s=c8 : s+=1:wend
          *e=c8
        end select
      end if
    end if
    t+=1:d1+=s1:d2+=s2:row+=TargetPitch
  wend
  #undef SHIFTS
end sub


'
' main
'
#define SCRX 600
#define SCRY 400

type dbl_v2d
  as double   x, y
end type

Type poly
  As uinteger n, col
  as double   ang, iang, x,y
  as dbl_v2d  _v(Any)
  Declare Sub newpoint(x As double=0, y As double=0)
  Declare Sub calculate
 Private:
  As Double   px,py, pa
End Type
Sub poly.newpoint(x as double, y as double)
  redim preserve _v(n)
  _v(n).x = x: _v(n).y = y: n += 1
End Sub
Sub poly.calculate
  if n < 1 then exit sub
  
  dim as double dx=x-px, dy=y-py
  if dx <> 0 or dy <> 0 then
    for p as dbl_v2d ptr = @_v(0) to @_v(n-1)
      p->x += dx:  p->y += dy
    next: px=x: py=y
  end if
  
  if pa <> ang then
    dim as double da=ang-pa
    dim as double sina = sin(da), cosa = cos(da)
    for p as dbl_v2d ptr = @_v(0) to @_v(n-1)
      dx=p->x-x: dy=p->y-y
      dim as double tx=x+cosa*dx, ty=y+cosa*dy
      p->x=tx-sina*dy: p->y=ty+sina*dx
    next: pa=ang
  end if
  
End Sub


Sub NewPolys(pol() As poly)
  Const pi      = 4*Atn(1)
  Const TwoPi   = 8*Atn(1)
  Const border  = 50
  const fieldx  = (SCRX-1) - 2*border
  const fieldy  = (SCRY-1) - 2*border
  For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
    p->n=0: redim p->_v(0)
    dim as integer  points_ubound = 2'+rnd*19
    dim as single   q=-1
    for i as integer = 0 to points_ubound
      dim as double a = Rnd*TwoPi, r = Rnd*150
      q += .9
      r = q * 50
      p->newpoint r*cos(a), r*sin(a)
    next
    p->x=350'border+Rnd*fieldx
    p->y=100'border+Rnd*fieldy
    p->col = RGB(Rnd*255,Rnd*255,Rnd*255)
    p->iang = (Rnd+0.5) * pi / 200
  Next
End Sub


Dim As string             kstr = InKey
Dim As integer            asc_key, BOOL_NEW = 1

Dim As poly               pol(0)

screenres SCRX,SCRY,32,2
screenset 0,1

Do
  
    kstr = InKey:  asc_key = Asc(kstr)
    
    If BOOL_NEW Then NewPolys pol()
    BOOL_NEW = asc_key <> 0
    
    cls
    For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
      p->calculate
      dim as vector2d v(p->n-1)
      for i as integer = 0 to p->n - 1
        v(i).x = p->_v(i).x
        v(i).y = p->_v(i).y
      next
      Polygone 0, v(), p->col
    Next
    flip
    
    For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
      p->ang += p->iang
    next
    
    sleep 15
loop until multikey(1)

Dr_D · Post by **Dr_D** » Feb 12, 2015 3:33

Not sure about the bug... I didn't study the code enough to see how it all works. It was smokin' fast though. ;)

dafhi · Post by **dafhi** » Feb 13, 2015 20:47

microsoft onedrive - this is what i see

D.J.Peters · Post by **D.J.Peters** » Feb 14, 2015 3:42

dafhi wrote:Anyway I found a bug.

but not in my code.

Joshy

dafhi · Post by **dafhi** » Feb 14, 2015 4:48

your original Polygone() and video

Code: Select all

type vector2d
  as integer x,y
end type

sub Polygone(TargetPtr as any ptr, _ ' 0 = screen otherwise image ptr
             p()       as vector2d , _ ' the coords (x,y)
             n         as integer  , _ ' how many coords in array
             red       as ubyte    , _ ' color
             green     as ubyte    , _
             blue      as ubyte)  

  static as integer palflag=0
  dim as integer TargetWidth=any,TargetHeight=any,TargetBytes=any,TargetPitch=any
  dim as integer f  =any,t =any,b=any,l =any,r=any
  dim as integer lc =any,nlc=any,rc=any,nrc=any
  dim as integer d1 =any,s1 =any,d2=any,s2 =any,cl=any,cr=any
  dim as ubyte c8=any ' 8 bit color
  dim as ushort c16=any ' 16 bit color
  dim as uinteger c24=any ' 24 bit color
  dim as any ptr row=any

  n-=1:if n<2 then exit sub

  if TargetPtr=0 then
    screeninfo    _
    TargetWidth , _
    TargetHeight, _
    TargetBytes ,,_
    TargetPitch
    TargetBytes shr=3 ' bits to bytes
    TargetPtr=screenptr() ' first pixel top left
  else
    TargetBytes  = cptr(uinteger ptr,TargetPtr)[1]
    TargetWidth  = cptr(uinteger ptr,TargetPtr)[2]
    TargetHeight = cptr(uinteger ptr,TargetPtr)[3]
    TargetPitch  = cptr(uinteger ptr,TargetPtr)[4]
    TargetPtr    += 32 ' first pixel after the header
  end if

  select case as const TargetBytes
  case 1
    if palflag=0 then
      dim as integer r8,g8,b8
      for i as uinteger= 0 to 255
        r8=(((i shr 5) and &H07) * 255) / 7
        g8=(((i shr 2) and &H07) * 255) / 7
        b8=(((i shr 0) and &H03) * 255) / 3
        palette i,r8,g8,b8
      next
     palflag=1
    end if
    #define RGB8(_r,_g,_b) ( (_r and &HE0) or ((_g and &HE0) shr 3) or ((_b and &HC0) shr 6) )
    c8=rgb8(red,green,blue)
    #undef RGB8
  case 2
    palflag=0
    #define RGB16(_r,_g,_b) (((_r shr 3) shl 11) or ((_g shr 2) shl 5) or (_b shr 3))
    c16=rgb16(red,green,blue)
    #undef RGB16
  case 4
    palflag=0
    c24=rgb(red,green,blue)
  end select

  ' top bottom left right (clipping)
  #define mr 1000000
  t= mr: b=-mr :  l= mr : r=-mr
  #undef mr
  for nc as integer=0 to n
    with p(nc)
      if .y<t then t=.y:f=nc ' top
      if .y>b then b=.y      ' bottom
      if .x<l then l=.x      ' left
      if .x>r then r=.x      ' right
    end with
  next
  ' clip
  if l>=TargetWidth  then exit sub  ' left is outside
  if r<1             then exit sub  ' right is outside
  if t>=TargetHeight then exit sub  ' top is outside
  if b<0             then exit sub  ' bottom is outside
  if (r-l)<1         then exit sub  ' 0 pixels width
  if b>=TargetHeight then b=TargetHeight-1 ' clip bottom
  if (b-t)<1         then exit sub  ' 0 pixels height
  ' left and next left counter
  lc=f:nlc=lc-1:if nlc<0 then nlc=n
  ' right and next right counter
  rc=f:nrc=rc+1:if nrc>n then nrc=0
 
  if p(nlc).x>p(nrc).x then exit sub
  row=TargetPtr+t*TargetPitch
  #define SHIFTS   8 ' 24:8 fixed point format
  ' from top to bottom
  while t<b
    if t=p(lc).y then
      while p(lc).y=p(nlc).y
        lc=nlc:nlc-=1:if nlc<0 then nlc=n
      wend
      d1=p(lc).x shl SHIFTS
      s1=((p(nlc).x-p(lc).x) shl SHIFTS)/(p(nlc).y-p(lc).y)
      lc = nlc
    end if
    if t=p(rc).y then
      while p(rc).y=p(nrc).y
        rc=nrc:nrc+=1:if nrc>n then nrc=0
      wend
      d2=p(rc).x shl SHIFTS
      s2=((p(nrc).x-p(rc).x) shl SHIFTS)/(p(nrc).y-p(rc).y)
      rc=nrc
    end if
    if t>-1 then
      l=d1 shr SHIFTS ' most left  pixel
      r=d2 shr SHIFTS ' most right pixel
      if l>r then swap l,r
      if l<TargetWidth andalso r>-1 then
        if l<0 then l=0
        if r>=TargetWidth then r=TargetWidth-1
        select case as const TargetBytes
        case 4
          var s=cptr(uinteger ptr,row)+l
          var e=cptr(uinteger ptr,row)+r
          while s<e : *s=c24 : s+=1:wend
          *e=c8
        case 2
          var s=cptr(ushort ptr,row)+l
          var e=cptr(ushort ptr,row)+r
          while s<e : *s=c16 : s+=1:wend
          *e=c16
        case 1
          var s=cptr(ubyte ptr,row)+l
          var e=cptr(ubyte ptr,row)+r
          while s<e : *s=c8 : s+=1:wend
          *e=c8
        end select
      end if
    end if
    t+=1:d1+=s1:d2+=s2:row+=TargetPitch
  wend
  #undef SHIFTS
end sub

sub drawTriangleAHS(dest as integer ptr = 0,_
                    x0 as double, y0 as double,_
                    x1 as double, y1 as double,_
                    x2 as double, y2 as double,_
                    col as integer)
                   
    #define ALLIGNMENT 16
    #define PACKED_DATA_SIZE 108
    #define FIX_BITS 4
    #define FIX_BITS_VALUE (1 shl FIX_BITS)
    #define MIN_BLOCK_X 8
    #define MIN_BLOCK_y 8
    #define MAX_BLOCK_X 64
    #define MAX_BLOCK_Y 32
    #define _0_   0
    #define _1_   4
    #define _2_   8
   
    #define _hconst0_ 0
    #define _hconst1_ 16
    #define _hconst2_ 32
    #define _dx0_ 48
    #define _dx1_ 64
    #define _dx2_ 80
    #define _dy0_ 96
    #define _dy1_ 112
    #define _dy2_ 128
    #define _cxy_ 144
    #define _pblockCY_ 160
    #define _pblockDX_ 176
    #define _pblockDY_ 192
    #define _cyx_ 208
    #define _pblockQFD0_ 224
    #define _pblockQFD1_ 240
    #define _pblockQFD2_ 256
    #define _pblockQSD0_ 272
    #define _pblockQSD1_ 288
    #define _pblockQSD2_ 304
    #define _pblockQSB0_ 320
    #define _pblockQSB1_ 336
    #define _pblockQSB2_ 352
    #define _pblockQAB0_ 368
    #define _pblockQAB1_ 384
    #define _pblockQAB2_ 400
    #define _bcol_ 416
   
    #define GET_SIZE_X(x) 1 shl (((x) and &h03) + 3)
    #define GET_SIZE_Y(x) 1 shl (((x) shr &h02) + 3)

    #macro EXPAND(x, y)
        x[y + 1] = x[y]
        x[y + 2] = x[y]
        x[y + 3] = x[y]
    #endmacro
   
    #define STACK_SIZE 16
   
    #macro PUSH_BLOCK()
        stackPtr += 1
        stack(stackPtr, 0) = scx
        stack(stackPtr, 1) = scy
        stack(stackPtr, 2) = splitMask
    #endmacro
   
    #macro POP_BLOCK()
        scx = stack(stackPtr, 0)
        scy = stack(stackPtr, 1)
        splitMask = stack(stackPtr, 2)
        stackPtr -= 1
    #endmacro
   
    #macro SPLIT_BLOCK_H()
        splitMask = splitY(splitMask)
        PUSH_BLOCK()
        scy += curBlockH shr 1
        PUSH_BLOCK()
    #endmacro
   
    #macro SPLIT_BLOCK_V()
        splitMask = splitX(splitMask)
        PUSH_BLOCK()
        scx += curBlockW shr 1
        PUSH_BLOCK()
    #endmacro
   
    #macro FAST_STEP_INIT()
        asm
            mov     eax,        [startLoc]
            mov     ebx,        [alligned_base]
            mov     ecx,        [stride]
            movdqa  xmm0,       _bcol_[ebx]  
        end asm
    #endmacro
   
    #macro FAST_STEP_64_LINE()
        asm
            movdqa  [eax],      xmm0
            movdqa  16[eax],    xmm0
            movdqa  32[eax],    xmm0
            movdqa  48[eax],    xmm0
            movdqa  64[eax],    xmm0
            movdqa  80[eax],    xmm0
            movdqa  96[eax],    xmm0
            movdqa  112[eax],   xmm0
            movdqa  128[eax],   xmm0
            movdqa  144[eax],   xmm0
            movdqa  160[eax],   xmm0
            movdqa  176[eax],   xmm0
            movdqa  192[eax],   xmm0
            movdqa  208[eax],   xmm0
            movdqa  224[eax],   xmm0
            movdqa  240[eax],   xmm0            
            add     eax,        ecx
        end asm
    #endmacro
    #macro FAST_STEP_32_LINE()
        asm
            movdqa  [eax],      xmm0
            movdqa  16[eax],    xmm0
            movdqa  32[eax],    xmm0
            movdqa  48[eax],    xmm0
            movdqa  64[eax],    xmm0
            movdqa  80[eax],    xmm0
            movdqa  96[eax],    xmm0
            movdqa  112[eax],   xmm0
            add     eax,        ecx
        end asm
    #endmacro
    #macro FAST_STEP_16_LINE()
        asm
            movdqa  [eax],      xmm0
            movdqa  16[eax],    xmm0
            movdqa  32[eax],    xmm0
            movdqa  48[eax],    xmm0
            add     eax,        ecx
        end asm
    #endmacro
    #macro FAST_STEP_8_LINE()
        asm
            movdqa  [eax],      xmm0
            movdqa  16[eax],    xmm0
            add     eax,        ecx
        end asm
    #endmacro
    #macro FAST_STEP_64()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
        FAST_STEP_64_LINE()
    #endmacro
    #macro FAST_STEP_32()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
        FAST_STEP_32_LINE()
    #endmacro
    #macro FAST_STEP_16()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
        FAST_STEP_16_LINE()
    #endmacro
    #macro FAST_STEP_8()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
        FAST_STEP_8_LINE()
    #endmacro
   
    dim as integer px0, py0
    dim as integer dst_w, dst_h
    dim as integer sdst_w, sdst_h
    dim as integer px1, py1
    dim as integer px2, py2
    dim as integer tl_x, tl_y
    dim as integer otl_x, otl_y
    dim as integer br_x, br_y
    dim as integer sotl_x, sotl_y
    dim as integer sbr_x, sbr_y
    dim as integer x, y
    dim as integer scx, scy
    dim as integer stackPtr
    dim as integer fdx(0 to 2)
    dim as integer fdy(0 to 2)
    dim as integer stack(0 to STACK_SIZE-1, 0 to 2)
    dim as integer slowStride
    dim as integer stride
    dim as integer curBlockW, curBlockH
    dim as integer splitMask
    dim as integer crnr(0 to 2)
    dim as integer crnrMask
    dim as integer splitX(0 to 11) = { -1,   0,   1,   2,_
                                       -1,   4,   5,   6,_
                                       -1,   8,   9,  10}
    dim as integer splitY(0 to 11) = { -1,  -1,  -1,  -1,_
                                        0,   1,   2,   3,_
                                        4,   5,   6,   7}

    dim as integer ptr dstPxls
    dim as integer ptr startLoc
   
    dim as integer ptr hconst    
    dim as integer ptr dx        
    dim as integer ptr dy        
    dim as integer ptr cxy    
    dim as integer ptr cyx
   
    dim as integer ptr pblockCY  
    dim as integer ptr pblockDX  
    dim as integer ptr pblockDY  
    dim as integer ptr pblockQFD
    dim as integer ptr pblockQSD
    dim as integer ptr pblockQAB
    dim as integer ptr pblockQSB

    dim as integer ptr bcol
   
    dim as integer ptr alligned_
    dim as integer ptr alligned_base
   
    if dest <> 0 then
        imageinfo dest, dst_w, dst_h, , ,dstPxls
    else
        screeninfo dst_w, dst_h
        dstPxls = screenptr
    end if
    sdst_w = dst_w shl FIX_BITS
    sdst_h = dst_h shl FIX_BITS

    px0 = cint(x0 * FIX_BITS_VALUE)
    py0 = cint(y0 * FIX_BITS_VALUE)
    px1 = cint(x1 * FIX_BITS_VALUE)
    py1 = cint(y1 * FIX_BITS_VALUE)
    px2 = cint(x2 * FIX_BITS_VALUE)
    py2 = cint(y2 * FIX_BITS_VALUE)

    alligned_ = allocate((sizeof(integer) * PACKED_DATA_SIZE) + (sizeof(byte) * (ALLIGNMENT - 1)))
    alligned_base = cast(integer ptr, (cast(integer, alligned_) + ALLIGNMENT) _
                    and (not (ALLIGNMENT - 1)))
   
    hconst     = @alligned_base[0]
    dx         = @alligned_base[12]
    dy         = @alligned_base[24]
    cxy        = @alligned_base[36]
    pblockCY   = @alligned_base[40]
    pblockDX   = @alligned_base[44]
    pblockDY   = @alligned_base[48]
    cyx        = @alligned_base[52]
    pblockQFD  = @alligned_base[56]
    pblockQSD  = @alligned_base[68]
    pblockQSB  = @alligned_base[80]
    pblockQAB  = @alligned_base[92]
    bcol       = @alligned_base[104]
   
    bcol[_0_] = col
    EXPAND(bcol, _0_)

    dx[_0_] = px0 - px1
    dy[_0_] = py0 - py1

    dx[_1_] = px1 - px2
    dy[_1_] = py1 - py2

    if dx[_0_] * dy[_1_] < dx[_1_] * dy[_0_] then
        swap px0, px1
        swap py0, py1
       
        dx[_0_] = -dx[_0_]
        dy[_0_] = -dy[_0_]
       
        dx[_1_] = px1 - px2
        dy[_1_] = py1 - py2
    end if

    dx[_2_] = px2 - px0
    dy[_2_] = py2 - py0
   
    fdx(0) = dx[_0_] shl FIX_BITS
    fdy(0) = dy[_0_] shl FIX_BITS
    fdx(1) = dx[_1_] shl FIX_BITS
    fdy(1) = dy[_1_] shl FIX_BITS
    fdx(2) = dx[_2_] shl FIX_BITS
    fdy(2) = dy[_2_] shl FIX_BITS
   
    pblockQSD[_0_ + 0] = 0
    pblockQSD[_0_ + 1] = pblockQSD[_0_ + 0] + fdy(0)
    pblockQSD[_0_ + 2] = pblockQSD[_0_ + 1] + fdy(0)
    pblockQSD[_0_ + 3] = pblockQSD[_0_ + 2] + fdy(0)
   
    pblockQSD[_1_ + 0] = 0
    pblockQSD[_1_ + 1] = pblockQSD[_1_ + 0] + fdy(1)
    pblockQSD[_1_ + 2] = pblockQSD[_1_ + 1] + fdy(1)
    pblockQSD[_1_ + 3] = pblockQSD[_1_ + 2] + fdy(1)  

    pblockQSD[_2_ + 0] = 0
    pblockQSD[_2_ + 1] = pblockQSD[_2_ + 0] + fdy(2)
    pblockQSD[_2_ + 2] = pblockQSD[_2_ + 1] + fdy(2)
    pblockQSD[_2_ + 3] = pblockQSD[_2_ + 2] + fdy(2)
   
    pblockQSB[_0_] = fdy(0) shl 2
    pblockQSB[_1_] = fdy(1) shl 2
    pblockQSB[_2_] = fdy(2) shl 2
   
    EXPAND(pblockQSB, _0_)
    EXPAND(pblockQSB, _1_)
    EXPAND(pblockQSB, _2_)
   
    pblockQAB[_0_] = fdx(0)
    pblockQAB[_1_] = fdx(1)
    pblockQAB[_2_] = fdx(2)
   
    EXPAND(pblockQAB, _0_)
    EXPAND(pblockQAB, _1_)
    EXPAND(pblockQAB, _2_)    
   
    pblockDX[_0_ + 0] = dx[_0_]
    pblockDX[_0_ + 1] = dx[_1_]
    pblockDX[_0_ + 2] = dx[_2_]
   
    pblockDY[_0_ + 0] = dy[_0_]
    pblockDY[_0_ + 1] = dy[_1_]
    pblockDY[_0_ + 2] = dy[_2_]  
   
    if px0 < px1 then
        if px0 < px2 then
            tl_x = px0
            if px1 < px2 then
                br_x = px2
            else
                br_x = px1
            end if
        else
            tl_x = px2
            br_x = px1
        end if
    else
        if px0 > px2 then
            br_x = px0
            if px2 > px1 then
                tl_x = px1
            else
                tl_x = px2
            end if
        else
            br_x = px2
            tl_x = px1
        end if
    end if
       
    if py0 < py1 then
        if py0 < py2 then
            tl_y = py0
            if py1 < py2 then
                br_y = py2
            else
                br_y = py1
            end if
        else
            tl_y = py2
            br_y = py1
        end if
    else
        if py0 > py2 then
            br_y = py0
            if py2 > py1 then
                tl_y = py1
            else
                tl_y = py2
            end if
        else
            br_y = py2
            tl_y = py1
        end if
    end if  
   
    if (br_x < 0) orElse (br_y < 0) then exit sub
    if (tl_x >= sdst_w) orElse (tl_y >= sdst_h) then exit sub
   
    stackPtr = -1
   
    if tl_x < 0 then tl_x = 0
    if tl_y < 0 then tl_y = 0
    if br_x >= sdst_w then br_x = sdst_w
    if br_y >= sdst_h then br_y = sdst_h
   
    tl_x = (tl_x + (FIX_BITS_VALUE-1)) shr FIX_BITS
    tl_y = (tl_y + (FIX_BITS_VALUE-1)) shr FIX_BITS
    br_x = (br_x + (FIX_BITS_VALUE-1)) shr FIX_BITS
    br_y = (br_y + (FIX_BITS_VALUE-1)) shr FIX_BITS
   
    otl_x = tl_x
    otl_y = tl_y
    tl_x and= not(MAX_BLOCK_X - 1)
    tl_y and= not(MAX_BLOCK_Y - 1)
   
    sotl_x = otl_x shl FIX_BITS
    sotl_y = otl_y shl FIX_BITS
    sbr_x = br_x shl FIX_BITS
    sbr_y = br_y shl FIX_BITS
   
    hconst[_0_] = dy[_0_]*px0 - dx[_0_]*py0 + _
                  iif((dy[_0_] < 0) orElse ((dy[_0_] = 0) andAlso (dx[_0_] > 0)), 1, 0)
                 
    hconst[_1_] = dy[_1_]*px1 - dx[_1_]*py1 + _
                  iif((dy[_1_] < 0) orElse ((dy[_1_] = 0) andAlso (dx[_1_] > 0)), 1, 0)
                 
    hconst[_2_] = dy[_2_]*px2 - dx[_2_]*py2 + _
                  iif((dy[_2_] < 0) orElse ((dy[_2_] = 0) andAlso (dx[_2_] > 0)), 1, 0)    
       
    pblockCY[_0_ + 0] = hconst[_0_]
    pblockCY[_0_ + 1] = hconst[_1_]
    pblockCY[_0_ + 2] = hconst[_2_]
   
    slowStride = (dst_w - 4) shl 2
   
    EXPAND(hconst, _0_)
    EXPAND(hconst, _1_)
    EXPAND(hconst, _2_)
   
    EXPAND(dx, _0_)
    EXPAND(dx, _1_)
    EXPAND(dx, _2_)
   
    EXPAND(dy, _0_)
    EXPAND(dy, _1_)
    EXPAND(dy, _2_)
       
    crnr(0) = 0
    crnr(1) = 0
    crnr(2) = 0
   
    for y = tl_y to br_y step MAX_BLOCK_Y
        for x = tl_x to br_x step MAX_BLOCK_X
           
            splitMask = 11
            scx = x
            scy = y
            PUSH_BLOCK()
           
            while(stackPtr > -1)
                POP_BLOCK()
                curBlockW = GET_SIZE_X(splitMask)
                curBlockH = GET_SIZE_Y(splitMask)      
                               
                cxy[_0_ + 0] = scx shl FIX_BITS
                cxy[_0_ + 1] = scy shl FIX_BITS
                cxy[_0_ + 2] = (scx + curBlockW - 1) shl FIX_BITS
                cxy[_0_ + 3] = (scy + curBlockH - 1) shl FIX_BITS

                if (cxy[_0_ + 0] < sbr_x) andAlso _
                   (cxy[_0_ + 2] >= sotl_x) andAlso _
                   (cxy[_0_ + 1] < sbr_y) andAlso _
                   (cxy[_0_ + 3] >= sotl_y) then

                    asm
                        mov         eax,        [alligned_base]
                        pxor        xmm4,       xmm4
                       
                        movdqa      xmm0,       _cxy_[eax]
                        movdqa      xmm1,       xmm0
                        pshufd      xmm6,       xmm0,               0xF5  
                        pshufd      xmm7,       xmm1,               0x88    
                        movdqa      xmm0,       xmm6
                        movdqa      xmm1,       xmm7

                        movdqa      xmm2,       _dx0_[eax]
                        movdqa      xmm3,       _dy0_[eax]
                       
                        pmulld      xmm0,       xmm2
                        pmulld      xmm1,       xmm3
                        psubd       xmm0,       xmm1
                        movdqa      xmm2,       _hconst0_[eax]
                        paddd       xmm0,       xmm2
                       
                        pcmpgtd     xmm0,       xmm4
                        packssdw    xmm0,       xmm0
                        packsswb    xmm0,       xmm0
                        movd        ebx,        xmm0
                        not         ebx
                        mov         0[crnr],    ebx
                       
                        movdqa      xmm0,       xmm6
                        movdqa      xmm1,       xmm7
                       
                        movdqa      xmm2,       _dx1_[eax]
                        movdqa      xmm3,       _dy1_[eax]
                       
                        pmulld      xmm0,       xmm2
                        pmulld      xmm1,       xmm3
                        psubd       xmm0,       xmm1
                        movdqa      xmm2,       _hconst1_[eax]
                        paddd       xmm0,       xmm2
                       
                        pcmpgtd     xmm0,       xmm4
                        packssdw    xmm0,       xmm0
                        packsswb    xmm0,       xmm0
                        movd        ebx,        xmm0
                        not         ebx
                        mov         4[crnr],    ebx
                       
                        movdqa      xmm0,       xmm6
                        movdqa      xmm1,       xmm7
                       
                        movdqa      xmm2,       _dx2_[eax]
                        movdqa      xmm3,       _dy2_[eax]
                       
                        pmulld      xmm0,       xmm2
                        pmulld      xmm1,       xmm3
                        psubd       xmm0,       xmm1
                        movdqa      xmm2,       _hconst2_[eax]
                        paddd       xmm0,       xmm2
                       
                        pcmpgtd     xmm0,       xmm4
                        packssdw    xmm0,       xmm0
                        packsswb    xmm0,       xmm0
                        movd        ebx,        xmm0
                        not         ebx
                        mov         8[crnr],    ebx

                    end asm
                   
                    if (crnr(0) <> 0) andAlso (crnr(1) <> 0) andAlso (crnr(2) <> 0) then
                        if (crnr(0) = &hFFFFFFFF) andAlso (crnr(1) = &hFFFFFFFF) andAlso (crnr(2) = &hFFFFFFFF) then
                           
                            startLoc = @dstPxls[(scy * dst_w) + scx]
                            stride = dst_w shl 2
                            select case as const splitMask
                            case 0
                                FAST_STEP_INIT()
                                FAST_STEP_8()
                            case 1
                                FAST_STEP_INIT()
                                FAST_STEP_16()
                            case 2
                                FAST_STEP_INIT()
                                FAST_STEP_32()
                            case 3
                                FAST_STEP_INIT()
                                FAST_STEP_64()
                            case 4
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        2
                                    dtAHS_block4_loop:
                                end asm
                                FAST_STEP_8()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block4_loop
                                end asm
                            case 5
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        2
                                    dtAHS_block5_loop:
                                end asm
                                FAST_STEP_16()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block5_loop
                                end asm
                            case 6
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        2
                                    dtAHS_block6_loop:
                                end asm
                                FAST_STEP_32()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block6_loop
                                end asm
                            case 7
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        2
                                    dtAHS_block7_loop:
                                end asm
                                FAST_STEP_64()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block7_loop
                                end asm
                            case 8
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        4
                                    dtAHS_block8_loop:
                                end asm
                                FAST_STEP_8()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block8_loop
                                end asm
                            case 9
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        4
                                    dtAHS_block9_loop:
                                end asm
                                FAST_STEP_16()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block9_loop
                                end asm
                            case 10
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        4
                                    dtAHS_block10_loop:
                                end asm
                                FAST_STEP_32()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block10_loop
                                end asm
                            case 11
                                FAST_STEP_INIT()
                                asm
                                        mov     edx,        4
                                    dtAHS_block11_loop:
                                end asm
                                FAST_STEP_64()
                                asm
                                        dec     edx
                                        jnz     dtAHS_block11_loop
                                end asm
                            end select
                        else
                            if splitX(splitMask) = -1 then
                                if splitY(splitMask) = -1 then
                                    cyx[_0_ + 0] =  cxy[_0_ + 1]
                                    EXPAND(cxy, _0_)
                                    EXPAND(cyx, _0_)
                                   
                                    asm
                                       
                                            mov         ecx,                [alligned_base]
                                            pxor        xmm7,               xmm7
                                            mov         ebx,                0xffffffff
                                            movd        xmm6,               ebx
                                            pshufd      xmm6,               xmm6,               0x00
                                           
                                            movdqa      xmm0,               _pblockDX_[ecx]
                                            movdqa      xmm1,               _cyx_[ecx]
                                            pmulld      xmm0,               xmm1
                                           
                                            movdqa      xmm2,               _pblockDY_[ecx]
                                            movdqa      xmm3,               _cxy_[ecx]
                                            pmulld      xmm2,               xmm3
                                           
                                            psubd       xmm0,               xmm2
                                           
                                            movdqa      xmm1,               _pblockCY_[ecx]
                                            paddd       xmm1,               xmm0
                                           
                                            pshufd      xmm3,               xmm1,               0x00
                                            psubd       xmm3,               _pblockQSD0_[ecx]            
                                            pshufd      xmm4,               xmm1,               0x55
                                            psubd       xmm4,               _pblockQSD1_[ecx]
                                            pshufd      xmm5,               xmm1,               0xAA
                                            psubd       xmm5,               _pblockQSD2_[ecx]
                                            movdqa      _pblockQFD0_[ecx],  xmm3
                                            movdqa      _pblockQFD1_[ecx],  xmm4
                                            movdqa      _pblockQFD2_[ecx],  xmm5
                                       
                                            mov         eax,                [dst_w]
                                            mov         ebx,                [scy]
                                            mul         ebx
                                            add         eax,                [scx]
                                            shl         eax,                2
                                            add         eax,                [dstPxls]
                                            mov         edx,                8
                                           
                                        dtAHS_slow_rows:
                                                                                       
                                            movdqa      xmm0,               xmm3
                                            movdqa      xmm1,               xmm4
                                            movdqa      xmm2,               xmm5
                                            pcmpgtd     xmm0,               xmm7
                                            pcmpgtd     xmm1,               xmm7
                                            pcmpgtd     xmm2,               xmm7
                                            por         xmm0,               xmm1
                                            por         xmm0,               xmm2
                                            movdqa      xmm1,               xmm0
                                            pxor        xmm1,               xmm6
                                            movdqa      xmm2,               [eax]
                                            pand        xmm0,               xmm2    
                                            pand        xmm1,               _bcol_[ecx]
                                            por         xmm0,               xmm1
                                            movdqa      [eax],              xmm0
                                            psubd       xmm3,               _pblockQSB0_[ecx]
                                            psubd       xmm4,               _pblockQSB1_[ecx]
                                            psubd       xmm5,               _pblockQSB2_[ecx]
                                            add         eax,                16
                                                                                       
                                            movdqa      xmm0,               xmm3
                                            movdqa      xmm1,               xmm4
                                            movdqa      xmm2,               xmm5
                                            pcmpgtd     xmm0,               xmm7
                                            pcmpgtd     xmm1,               xmm7
                                            pcmpgtd     xmm2,               xmm7
                                            por         xmm0,               xmm1
                                            por         xmm0,               xmm2
                                            movdqa      xmm1,               xmm0
                                            pxor        xmm1,               xmm6
                                            movdqa      xmm2,               [eax]
                                            pand        xmm0,               xmm2
                                            pand        xmm1,               _bcol_[ecx]
                                            por         xmm0,               xmm1
                                            movdqa      [eax],              xmm0
                                            add         eax,                [slowStride]
                                           
                                            movdqa      xmm0,               _pblockQFD0_[ecx]
                                            paddd       xmm0,               _pblockQAB0_[ecx]
                                            movdqa      xmm3,               xmm0
                                            movdqa      _pblockQFD0_[ecx],  xmm0
                                           
                                            movdqa      xmm0,               _pblockQFD1_[ecx]
                                            paddd       xmm0,               _pblockQAB1_[ecx]
                                            movdqa      xmm4,               xmm0
                                            movdqa      _pblockQFD1_[ecx],  xmm0
                                           
                                            movdqa      xmm0,               _pblockQFD2_[ecx]
                                            paddd       xmm0,               _pblockQAB2_[ecx]
                                            movdqa      xmm5,               xmm0
                                            movdqa      _pblockQFD2_[ecx],  xmm0
                                           
                                            dec         edx
                                            jnz         dtAHS_slow_rows
                                           
                                    end asm

                                else
                                    SPLIT_BLOCK_H()
                                end if
                            elseif splitY(splitMask) = -1 then
                                SPLIT_BLOCK_V()
                            else
                                crnrMask = crnr(0) and crnr(1) and crnr(2)
                                crnrMask = ((crnrMask shr 21) and &h08) or ((crnrMask shr 14) and &h04) or _
                                           ((crnrMask shr  7) and &h02) or (crnrMask and &h01)
                                if crnrMask = 3 orElse crnrMask = 12 then
                                    SPLIT_BLOCK_H()
                                elseif crnrMask = 5 orElse crnrMask = 10 then
                                    SPLIT_BLOCK_V()
                                else
                                    SPLIT_BLOCK_H()
                                end if
                            end if
                        end if
                    end if
                end if
            wend
        next x
    next y
   
    deallocate(alligned_)
end sub


'
' main
'
#define SCRX 600
#define SCRY 400

type dbl_v2d
  as double   x, y
end type

Type poly
  As uinteger n, col
  as double   ang, iang, x,y
  as dbl_v2d  _v(Any)
  Declare Sub newpoint(x As double=0, y As double=0)
  Declare Sub calculate
 Private:
  As Double   px,py, pa
End Type
Sub poly.newpoint(x as double, y as double)
  redim preserve _v(n)
  _v(n).x = x: _v(n).y = y: n += 1
End Sub
Sub poly.calculate
  if n < 1 then exit sub
  
  dim as double dx=x-px, dy=y-py
  if dx <> 0 or dy <> 0 then
    for p as dbl_v2d ptr = @_v(0) to @_v(n-1)
      p->x += dx:  p->y += dy
    next: px=x: py=y
  end if
  
  if pa <> ang then
    dim as double da=ang-pa
    dim as double sina = sin(da), cosa = cos(da)
    for p as dbl_v2d ptr = @_v(0) to @_v(n-1)
      dx=p->x-x: dy=p->y-y
      dim as double tx=x+cosa*dx, ty=y+cosa*dy
      p->x=tx-sina*dy: p->y=ty+sina*dx
    next: pa=ang
  end if
  
End Sub


Sub NewPolys(pol() As poly)
  Const pi      = 4*Atn(1)
  Const TwoPi   = 8*Atn(1)
  Const border  = 50
  const fieldx  = (SCRX-1) - 2*border
  const fieldy  = (SCRY-1) - 2*border
  For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
    p->n=0: redim p->_v(0)
    dim as integer  points_ubound = 2'+rnd*19
    for i as integer = 0 to points_ubound
      dim as double a = Rnd*TwoPi, r = Rnd*50
      p->newpoint r*cos(a), r*sin(a)
    next
    p->x=border+Rnd*fieldx
    p->y=border+Rnd*fieldy
    p->col = RGB(Rnd*255,Rnd*255,Rnd*255)
    p->iang = (Rnd+0.5) * pi / 120
  Next
End Sub

dim as single             anim_fps = 260, anim_f, ianim = 1 / anim_fps
dim as single             phys_fps = 55, phys_f, iphys = 1 / phys_fps
  
dim as double             t = Timer, t_newtri = t
Dim As single             message_fps = 1/2, t_message_fps=t, message_fps_frame
dim as double             tDemoExit = t+50

Dim As string             kstr = InKey, str_message_fps
Dim As integer            asc_key, BOOL_NEW = 1

Dim As poly               pol(17)

screenres SCRX,SCRY,32,2
screenset 0,1

Do
  
    kstr = InKey:  asc_key = Asc(kstr)
    
    Dim As double tp = t:  t = Timer
    Dim As double dt = t - tp
    
    If BOOL_NEW Then NewPolys pol():  t_newtri = t+7
    BOOL_NEW = asc_key <> 0 'Or t >= t_newtri
    
    if anim_f <= 0 then
      cls
      For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
        p->calculate
        dim as vector2d v(p->n-1)
        for i as integer = 0 to p->n - 1
          v(i).x = p->_v(i).x
          v(i).y = p->_v(i).y
        next
        
'        drawTriangleAHS ,p->_v(0).x, p->_v(0).y, p->_v(1).x, p->_v(1).y, p->_v(2).x, p->_v(2).y, p->col
        polygone 0, v(), p->n, p->col shr 16, p->col shr 8, p->col
        
      Next
      message_fps_frame += 1
      If t >= t_message_fps Then
        Dim As single actual_fps = message_fps_frame * message_fps
        str_message_fps = Str((actual_fps * 1000)/1000)
        t_message_fps += 1/message_fps:  message_fps_frame=0
      End if
      Locate 1,1:  Print "fps:  "; str_message_fps
      anim_f += ianim
      flip
    end If
    
    anim_f -= dt
    phys_f += dt
    
    while phys_f > 0
      For p As poly ptr = @pol(LBound(pol)) To @pol(UBound(pol))
        p->ang += p->iang
      next
      phys_f -= iphys
    Wend
    
    sleep 15
loop until multikey(1)

dodicat · Post by **dodicat** » Feb 15, 2015 1:20

Thought I would try out some polygons the old fashioned way (paint).
Lighting from front.

Code: Select all

 
Randomize 1
Type V3
    As Integer x,y,z
    As Uinteger col
    #define map(a,b,x,c,d) ((d)-(c))*((x)-(a))/((b)-(a))+(c)
    #define Intrange(f,l) Int(Rnd*((l+1)-(f))+(f))
End Type
Type Point As V3
Function drawline(x As Integer,y As Integer,angle As Double,length As Double) As Point
    angle=angle*.0174532925199433  '=4*atn(1)/180
    var x2=x+length*Cos(angle)
    var y2=y-length*Sin(angle)
    Return Type<Point>(x2,y2)
End Function

Type float 'FLOATS FOR TRIG
    As Single sx,sy,sz
    As Single cx,cy,cz
End Type

Type poly
    As V3 pts(Any)
    As float ang
    As Uinteger col
    Declare Constructor 
    Declare Sub Draw
End Type


Constructor poly
Redim pts(0 To IntRange(3,12))
pts(0)=Type<V3>(IntRange(50,750),IntRange(50,550),0)     'centroid
col=Rgb(50+Rnd*205,50+Rnd*205,50+Rnd*205)
Dim As Integer ctr
For n As Integer=0 To 350 Step (360/Ubound(pts))
    ctr+=1
    pts(ctr)=drawline(pts(0).x,pts(0).y,n,100)
Next n
Redim Preserve pts(0 To Ubound(pts)+1)
pts(Ubound(pts))=Type<V3>(pts(0).x,pts(0).y,100)         'normal to shape
ang=Type<float>(2*(Rnd*.03-Rnd*.03),1.5*(Rnd*.03-Rnd*.03),Rnd*.03-Rnd*.03)
End Constructor

Sub poly.draw
    var u=Cptr(Ubyte Ptr,@col)
    var mp=map(0,600,Abs(pts(Ubound(pts)).z),.25,1)      'shader
    var col2=Rgb(u[2]*mp,u[1]*mp,u[0]*mp)
    For n As Integer=1 To Ubound(pts)-2
        Line (pts(n).x,pts(n).y)-(pts(n+1).x,pts(n+1).y),col2
    Next n
    Line -(pts(1).x,pts(1).y),col2
    Paint (pts(0).x,pts(0).y),col2,col2
End Sub


Function RotatePoint(c As Point,p As Point,a As float,scale As float=Type<float>(1,1,1)) As Point
    Dim As Single dx=p.x-c.x,dy=p.y-c.y,dz=p.z-c.z
    Return Type<Point>((scale.sx)*((a.cy*a.cz)*dx+(-a.cx*a.sz+a.sx*a.sy*a.cz)*dy+(a.sx*a.sz+a.cx*a.sy*a.cz)*dz)+c.x,_
    (scale.sy)*((a.cy*a.sz)*dx+(a.cx*a.cz+a.sx*a.sy*a.sz)*dy+(-a.sx*a.cz+a.cx*a.sy*a.sz)*dz)+c.y,_
    (scale.sz)*((-a.sy)*dx+(a.sx*a.cy)*dy+(a.cx*a.cy)*dz)+c.z,p.col)
End Function 

Function perspective(p As Point,eyepoint As Point) As Point
    Dim As Single   w=1+(p.z/eyepoint.z)
    Return Type<Point>((p.x-eyepoint.x)/w+eyepoint.x,(p.y-eyepoint.y)/w+eyepoint.y,(p.z-eyepoint.z)/w+eyepoint.z,p.col)
End Function

Sub spinpoly(p As poly,a As float,rot As poly) 
    rot=p
    For n As Integer=1 To Ubound(p.pts)
        rot.pts(n)=RotatePoint(p.pts(0),p.pts(n),a,Type<float>(1,1,1))
        rot.pts(n)=perspective(rot.pts(n),Type<V3>(400,300,1500))
    Next n
End Sub

Function Regulate(Byval MyFps As Integer,Byref fps As Integer) As Integer
    Static As Double timervalue,lastsleeptime,t3,frames
    Var t=Timer
    frames+=1
    If (t-t3)>=1 Then t3=t:fps=frames:frames=0
    Var sleeptime=lastsleeptime+((1/myfps)-T+timervalue)*1000
    If sleeptime<1 Then sleeptime=1
    lastsleeptime=sleeptime
    timervalue=T
    Return sleeptime
End Function

'=======================================================
Dim As poly p(1 To 20),tmp
Screen 19,32,2
Screenset 1,0
Dim As Single pi2=8*Atn(1)
Dim As Single ax(1 To 20),ay(1 To 20),az(1 To 20)
Dim As Integer fps

Do
    Cls
    Draw String(20,20), "Framerate " & fps
    For n As Integer=1 To 20
        ax(n)+=p(n).ang.sx:If Abs(ax(n))>=pi2 Then ax(n)=0
        ay(n)+=p(n).ang.sy:If Abs(ay(n))>=pi2 Then ay(n)=0
        az(n)+=p(n).ang.sz:If Abs(az(n))>=pi2 Then az(n)=0
        var f=Type<float>(Sin(ax(n)),Sin(ay(n)),Sin(az(n)), _
                          Cos(ax(n)),Cos(ay(n)),Cos(az(n)))
        spinpoly(p(n),f,tmp)
        tmp.draw
    Next n
    Flip
    Sleep regulate(64,fps)
Loop Until Len(Inkey)
Sleep

badidea · Post by **badidea** » Dec 18, 2016 20:25

There seems to be a bug in this code, see following example:

Code: Select all

type vector2d
  as integer x,y
end type

sub Polygon(TargetPtr as any ptr, _ ' 0 = screen otherwise image ptr
            p()       as vector2d , _ ' the screen coords (x,y)
            n         as integer  , _ ' how many coords in array
            red       as ubyte    , _ ' color
            green     as ubyte    , _
            blue      as ubyte)

  static as integer palflag=0
  dim as integer TargetWidth=any,TargetHeight=any,TargetBytes=any,TargetPitch=any
  dim as integer f=any,t=any,b=any,l=any,r=any
  dim as integer lc=any,nlc=any,rc=any,nrc=any
  dim as integer d1=any,s1=any,d2=any,s2 =any,cl=any,cr=any
  dim as ubyte  c8 =any
  dim as ushort c16=any
  dim as ulong  c24=any
  dim as any ptr row=any

  n-=1
  if n<2 then exit sub

  if TargetPtr=0 then
    TargetPtr=screenptr() ' first pixel top left on screen
    if TargetPtr=0 then exit sub
    ScreenInfo    _
    TargetWidth , _
    TargetHeight, _
    TargetBytes ,,_
    TargetPitch
    TargetBytes shr=3 ' bits to bytes
   
  else
    ImageInfo     _
    TargetPtr   , _
    TargetWidth , _
    TargetHeight, _
    TargetBytes , _
    TargetPitch , _
    TargetPtr         ' first pixel after image header
  end if

  select case as const TargetBytes
  case 1
    if palflag=0 then ' define a RGB palette only once
      dim as integer r8,g8,b8
      for i as uinteger= 0 to 255
        r8=(((i shr 5) and &H07) * 255) / 7
        g8=(((i shr 2) and &H07) * 255) / 7
        b8=(((i shr 0) and &H03) * 255) / 3
        palette i,r8,g8,b8
      next
     palflag=1
    end if
    #define RGB8(_r,_g,_b) ( (_r and &HE0) or ((_g and &HE0) shr 3) or ((_b and &HC0) shr 6) )
    c8=rgb8(red,green,blue)
    #undef RGB8
  case 2
    palflag=0
    #define RGB16(_r,_g,_b) (((_r shr 3) shl 11) or ((_g shr 2) shl 5) or (_b shr 3))
    c16=rgb16(red,green,blue)
    #undef RGB16
  case 4
    palflag=0
    c24=rgb(red,green,blue)
  end select

  ' top bottom left right (clipping)
  #define mr 1000000
  t= mr: b=-mr :  l= mr : r=-mr
  #undef mr
  for nc as integer=0 to n
    with p(nc)
      if .y<t then t=.y:f=nc ' top
      if .y>b then b=.y      ' bottom
      if .x<l then l=.x      ' left
      if .x>r then r=.x      ' right
    end with
  next
  ' clip
  if l>=TargetWidth  then exit sub  ' left is outside
  if r<1             then exit sub  ' right is outside
  if t>=TargetHeight then exit sub  ' top is outside
  if b<0             then exit sub  ' bottom is outside
  if (r-l)<1         then exit sub  ' 0 pixels width
  if b>=TargetHeight then b=TargetHeight-1 ' clip bottom
  if (b-t)<1         then exit sub  ' 0 pixels height
  
  ' left and next left counter
  lc=f: nlc=lc-1: if nlc<0 then nlc=n
  ' right and next right counter
  rc=f: nrc=rc+1: if nrc>n then nrc=0
 
  if p(nlc).x > p(nrc).x then exit sub 'What happens here???
  
  row=TargetPtr+t*TargetPitch
  #define SHIFTS 8 ' fixed point format
  ' from top to bottom
  while t<b
    if t=p(lc).y then
      while p(lc).y=p(nlc).y
        lc=nlc:nlc-=1:if nlc<0 then nlc=n
      wend
      d1=p(lc).x shl SHIFTS
      s1=((p(nlc).x-p(lc).x) shl SHIFTS)/(p(nlc).y-p(lc).y)
      lc = nlc
    end if
    if t=p(rc).y then
      while p(rc).y=p(nrc).y
        rc=nrc:nrc+=1:if nrc>n then nrc=0
      wend
      d2=p(rc).x shl SHIFTS
      s2=((p(nrc).x-p(rc).x) shl SHIFTS)/(p(nrc).y-p(rc).y)
      rc=nrc
    end if
    if t>-1 then
      l=d1 shr SHIFTS ' most left  pixel
      r=d2 shr SHIFTS ' most right pixel
      if l>r then swap l,r
      if l<TargetWidth andalso r>-1 then
        if l<0 then l=0
        if r>=TargetWidth then r=TargetWidth-1
        select case as const TargetBytes
        case 1
          var s=cptr(ubyte ptr,row)+l
          var e=cptr(ubyte ptr,row)+r
          while s<e : *s=c8  : s+=1:wend
          *e=c8
        case 2
          var s=cptr(ushort ptr,row)+l
          var e=cptr(ushort ptr,row)+r
          while s<e : *s=c16 : s+=1:wend
          *e=c16
        case 4
          var s=cptr(ulong ptr,row)+l
          var e=cptr(ulong ptr,row)+r
          while s<e : *s=c24 : s+=1:wend
          *e=c24
        end select
      end if
    end if
    t+=1
    d1+=s1
    d2+=s2
    row+=TargetPitch
  wend
  #undef SHIFTS
end sub

'
' main
'
const n=11
dim as vector2d t1(2)
dim as vector2d t2(2)
dim as integer i,j

screenres 640,480,8

t1(0).x = 200: t1(0).y = 213 
t1(1).x = 209: t1(1).y = 144 
t1(2).x = 279: t1(2).y = 133

Polygon 0,t1(),3, 255, 32, 32 ' triangle
for i = 0 to 2
	draw string (t1(i).x, t1(i).y), str(i)
	j = (i + 1) mod 3
	line(t1(i).x, t1(i).y)-(t1(j).x, t1(j).y), &h00670934 
next

t2(0).x = 409: t2(0).y = 144 
t2(1).x = 400: t2(1).y = 213 
t2(2).x = 479: t2(2).y = 133 

Polygon 0,t2(),3, 255, 32, 32 ' triangle
for i = 0 to 2
	draw string (t2(i).x, t2(i).y), str(i)
	j = (i + 1) mod 3
	line(t2(i).x, t2(i).y)-(t2(j).x, t2(j).y), &h00670934 
next
sleep

This line (96) makes the routine exit:

Code: Select all

  if p(nlc).x > p(nrc).x then exit sub 'What happens here???

Edit: Is it a check if the triangle/polygon is upside-down (points clockwise instead of counter-clockwise)?

Fast filled polygons in BASIC (all video modes + clipping)

Fast filled polygons in BASIC (all video modes + clipping)

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clippin

Re: Fast filled polygons in BASIC (all video modes + clipping)