Something along those lines, yeah. But rather than using an extra array and looping over one dimension, you can store the data in the original array and do a sort of binary-memcpy:
Code: Select all
#include "crt.bi"
#macro fillArray(p,sz,v)
scope
if (p = 0) then
print "Null pointer.": sleep: end
end if
if (sizeof(v) <> sizeof(*p)) then
print "Types don't match.": sleep: end
end if
dim as integer m = iif(sz > 32, 31,sz-1)
for i as integer = 0 to m
p[i] = v
next i
if (sz > 32) then
dim as integer type_sz = sizeof(v)
dim as integer j,i
j = 32
i = 64
while (i <= sz)
memcpy(p+j,p,type_sz*j)
j = i
i *= 2
wend
memcpy(p+j,p,type_sz*(sz-j))
end if
end scope
#endmacro
dim as integer sz_i = 600
redim as single array(sz_i-1,sz_i-1,sz_i-1)
dim as single ptr p = @array(0,0,0)
dim as uinteger sz = sz_i^3
dim as single v = 99.5
dim as double t1,t2
' initialize s.t. the memory is reserved
fillArray(p,sz,0)
print "array size = ";str(int(sizeof(*p)*sz/2^20+0.5));"MiB"
print "value to set: ";str(v)
t1 = timer
fillArray(p,sz,v)
t2 = timer
print str(t2-t1);" seconds"
print "value test: ";p[int(rnd*sz)]
sleep
On my machine this takes about half the time. The key seems to be this binary-memcpy usage, since the first loop in your code doesn't really add to the time taken.
Note that I used single as a type here in order to compare it with the 32-bit asm version. This is indeed a case where I don't see how to solve this with the "rep stos?" command. So either upgrading to 64-bit is necessary or falling back on slower solutions, like the one based on memcpy.
Edit: I've put this together which may or may not work correctly in all cases ;-)
Code: Select all
#include "crt.bi"
#macro fillArrayL(p,sz,v)
scope
print "Falling back onto a memcpy solution."
if (p = 0) then
print "Null pointer.": sleep: end
end if
if (sizeof(v) <> sizeof(*p)) then
print "Types don't match.": sleep: end
end if
dim as integer m = iif(sz > 32, 31,sz-1)
for i as integer = 0 to m
p[i] = v
next i
if (sz > 32) then
dim as integer type_sz = sizeof(v)
dim as integer j,i
j = 32
i = 64
while (i <= sz)
memcpy(p+j,p,type_sz*j)
j = i
i *= 2
wend
memcpy(p+j,p,type_sz*(sz-j))
end if
end scope
#endmacro
#macro fillArray(p,sz,v)
scope
if (p = 0) then
print "Null pointer.": sleep: end
end if
if (sizeof(v) <> sizeof(*p)) then
print "Types don't match.": sleep: end
end if
#ifdef __FB_64BIT__
' p is a pointer, thus sizeof(p)=8 bytes
asm mov rdi, p
asm mov rcx, qword ptr sz
select case sizeof(v)
case 1: asm
mov al, byte ptr v
rep stosb
end asm
case 2: asm
mov ax, word ptr v
rep stosw
end asm
case 4: asm
mov eax, dword ptr v
rep stosd
end asm
case 8: asm
mov rax, qword ptr v
rep stosq
end asm
case else: fillArrayL(p,sz,v)
end select
#else
' p is a pointer, thus sizeof(p)=4 bytes
asm mov edi, [p]
asm mov ecx, dword ptr [sz]
select case sizeof(v)
case 1: asm
mov al, byte ptr [v]
rep stosb
end asm
case 2: asm
mov ax, word ptr [v]
rep stosw
end asm
case 4: asm
mov eax, dword ptr [v]
rep stosd
end asm
case else: fillArrayL(p,sz,v)
end select
#endif
end scope
#endmacro
dim as integer sz_i = 470
dim as integer sz = sz_i^3
redim as double array(sz_i-1,sz_i-1,sz_i-1)
dim as double ptr p = @array(0,0,0)
dim as double v = -132.94
if (p = 0) then
print "Error: Null pointer.": sleep: end
end if
dim as double t1,t2
print "array size = ";str(int(sizeof(*p)*sz/2^20+0.5));"MiB"
print "value to set: ";str(v)
print "--------------------"
print "initialization (clear)"
t1 = timer
clear(*p,0,sz*sizeof(*p))
t2 = timer
print str(t2-t1);" seconds"
print "value test: ";p[int(rnd*sz)]
print "--------------------"
print "ASM"
t1 = timer
fillArray(p,sz,v)
t2 = timer
print str(t2-t1);" seconds"
print "value test: ";p[int(rnd*sz)]
print "--------------------"
print "clearing"
t1 = timer
clear(*p,0,sz*sizeof(*p))
t2 = timer
print str(t2-t1);" seconds"
print "value test: ";p[int(rnd*sz)]
print "--------------------"
print "loop"
t1 = timer
for i1 as integer = 0 to sz_i-1
for i2 as integer = 0 to sz_i-1
for i3 as integer = 0 to sz_i-1
array(i1,i2,i3) = v
next i3
next i2
next i1
t2 = timer
print str(t2-t1);" seconds"
print "value test: ";p[int(rnd*sz)]
sleep