Squares
Re: Squares
@Albert.
Until you get an algorithm working correctly, and find a real use for it, speed is not important.
To access strings Mid(,,) will be slow; string[index] will be faster for single bytes; pointers will be fastest for 1 to 8 bytes.
When you convert 4 random bits to bcd = digits 0 to 9, what do you do with the hexadecimal values A to F? What you do will introduce a statistical distribution error so the bcd will not be random.
Until you get an algorithm working correctly, and find a real use for it, speed is not important.
To access strings Mid(,,) will be slow; string[index] will be faster for single bytes; pointers will be fastest for 1 to 8 bytes.
When you convert 4 random bits to bcd = digits 0 to 9, what do you do with the hexadecimal values A to F? What you do will introduce a statistical distribution error so the bcd will not be random.
Re: Squares
@Richard
Hi, Richard!! , i haven't posted in a few days..
Question:
If i have a file that's 100,000 KB , and i compress 16 bits to 13 bits, whats the file output size?
Would it be 100,000 \ ( 16 \ 13 ) ?
Can you give me a formula?
Hi, Richard!! , i haven't posted in a few days..
Question:
If i have a file that's 100,000 KB , and i compress 16 bits to 13 bits, whats the file output size?
Would it be 100,000 \ ( 16 \ 13 ) ?
Can you give me a formula?
Re: Squares
i think this will work
13/16=0.8125
100.000Kb * 0.8125=81.250Kb
oh yeah you calculate it right :D
13/16=0.8125
100.000Kb * 0.8125=81.250Kb
oh yeah you calculate it right :D
Data Compressor
@Dodicat
I got a data compressor working... It can't decompress yet , it just compresses so far..
For different file types , you need to adjust the "size" var ; size = 256 works for *.exe files.
And you can compress the output more , with a normal zip program..
See the Compress() function to set the size var.
Thanks for the load file code.. I incorporated it into the program..
I got a data compressor working... It can't decompress yet , it just compresses so far..
For different file types , you need to adjust the "size" var ; size = 256 works for *.exe files.
And you can compress the output more , with a normal zip program..
See the Compress() function to set the size var.
Thanks for the load file code.. I incorporated it into the program..
Code: Select all
#define WIN_INCLUDEALL
#Include "windows.bi"
#Include "File.bi"
declare sub getfilename()
declare sub compress()
declare sub decompress()
declare function MultiBase_Number( byval Number as ulongint , byval Number_Base as ulongint ) as string
dim shared as string file , extension , file_data , bytes , file_name
dim as ubyte value1
Dim As MSG msg
Dim shared As HWND hWnd
screen 19
getfilename()
if FileExists(file) then
for a as ulongint = len(file)-1 to 0 step -1
bytes=chr(file[a])
if bytes = "\" then file_name = mid(file,a+2):exit for
next
print file_name
'Dodicat LoadFile
file_data = ""
var f = freefile
Open file For Binary Access Read As #f
file_data = String(Lof(f), 0)
Get #f, , file_data
Close #f
if extension = ".BDC" then
print
print "DECOMPRESSING:" , len(file_data)
decompress()
else
print
print "COMPRESSING:" , len(file_data)
'extension=".BDC"
compress()
end if
end if
'===============================================================================
'===============================================================================
'END Program
'===============================================================================
'===============================================================================
END
'===============================================================================
'==============================================================================
'===============================================================================
'===============================================================================
sub compress()
dim as double time1 , time2
time1 = timer
dim as longint size = 256
print "Turning file into Hexadecimal"
dim as string binari = ""
dim as string bin_in
dim as string b1,b2
for a as longint = 1 to len(file_data) step 2
b1 = right("00000000" + bin(asc(mid(file_data,a+0,1))),8)
b2 = right("00000000" + bin(asc(mid(file_data,a+1,1))),8)
bin_in = hex(valulng("&B"+b1+b2))
binari+=bin_in
next
print "Done turning file into Hexadecimal"
print "Begin compressing"
redim as string dict(0) : dict(0) = left(binari,size)
dim as string val_in
dim as ubyte toggle
dim as string sub_out = ""
dim as string ascii
for a as longint = 1 to len(binari) step size
val_in = mid(binari,a,size)
toggle = 0
for b as longint = 0 to ubound(dict)
if val_in = dict(b) then
ascii = right(string(size*8,"0") + bin(b),size*8)
sub_out+=ascii
toggle = 1
exit for
end if
next
if toggle = 0 then
redim preserve dict(ubound(dict)+1) : dict(ubound(dict)) = val_in
ascii = right(string(size*8,"0") + bin(ubound(dict)),size*8)
sub_out+=ascii
end if
next
print "Done compresing"
print
dim as longint bits = len(bin(ubound(dict)))
print "Output Bits = " ; bits
print
print "Building putput"
dim as string mid_out=""
for a as longint = 1 to len(sub_out) step size*8
ascii = mid(sub_out,a,size*8)
ascii = right(ascii,bits)
mid_out+= ascii
next
dim as string outputs=""
for a as longint = 1 to len(mid_out) step 8
ascii = mid(mid_out,a,8)
outputs+=chr(val("&B"+ascii))
next
dim as string dict_out = str(size) + "_" + str(bits) + "_"
for a as longint = 0 to ubound(dict)
ascii = dict(a)
for b as longint = 1 to len(ascii) step 2
dict_out+= chr(val("&H"+mid(ascii,b,2)))
next
next
print "Done building output"
print
time2 = timer
print "Dictionary Elements = " ; ubound(dict) , "Dictionary Size = " ; len(dict_out)
print
print "Output Size = " ; len(outputs) + len(dict_out)
print
print "Time = " ; time2 - time1
print
print file_name ; " " ; len(file_data)
print
'sleep
'sleep
open file_name+".ZZZ" for output as #1
print #1, dict_out + "!~END~!"
print #1,outputs
close #1
sleep
end sub
'===============================================================================
'===============================================================================
'Decompress
'===============================================================================
'===============================================================================
sub decompress()
'not written yet
end sub
'===============================================================================
'===============================================================================
'Get File Name
'===============================================================================
'===============================================================================
sub getfilename()
dim ofn as OPENFILENAME
dim filename as zstring * MAX_PATH+1
with ofn
.lStructSize = sizeof( OPENFILENAME )
.hwndOwner = hWnd
.hInstance = GetModuleHandle( NULL )
.lpstrFilter = strptr( !"All Files, (*.*)\0*.*\0\0" )
.lpstrCustomFilter = NULL
.nMaxCustFilter = 0
.nFilterIndex = 1
.lpstrFile = @filename
.nMaxFile = sizeof( filename )
.lpstrFileTitle = NULL
.nMaxFileTitle = 0
.lpstrInitialDir = NULL
'.lpstrTitle = @"File Open Test"
.lpstrTitle = @"File to Compress/Decompress"
.Flags = OFN_EXPLORER 'or OFN_FILEMUSTEXIST or OFN_PATHMUSTEXIST
.nFileOffset = 0
.nFileExtension = 0
.lpstrDefExt = NULL
.lCustData = 0
.lpfnHook = NULL
.lpTemplateName = NULL
end with
if( GetOpenFileName( @ofn ) = FALSE ) then
file = ""
return
else
file = filename
extension = right$(filename,4)
end if
end sub
Last edited by albert on Mar 07, 2019 3:03, edited 6 times in total.
Re: Squares
I like your confidence. But how do you know it is not just a hash function?Albert wrote:I got a data compressor working... It can't decompress yet , it just compresses so far..
Re: Squares
@Richard
My compressor:
It converts the file to hex and then steps through it by a size =??
it then checks the dictionary to see if its already in there.
If it's in the dictionary , then it records the binary value of the dict(element)
if its not in the dictionary , then it redims the dictionary+1 , and adds the value to the dictionary , then it records the binary value of the dict(element)
After the compression routine, when you know the ubound of the dictionary , var "bits"
it then trims the leading 0's off the recorded value according to the "bits" var , and then converts the binary recorded value into chrs(0 to 255).
It then turns the hex dictionary(elements) into chrs(0 to 255).
Then it prints the result , to a file with a .ZZZ extension.
You can load a *.ZZZ file , and it will compress it a little more , up to a point..
You can load a *.ZIP file and it will knock off a few hundred bytes.
You can ZIP a *.ZZZ file an it will knock off several kilobytes.
( Now i have to work on the de-compressor. )
My compressor:
It converts the file to hex and then steps through it by a size =??
it then checks the dictionary to see if its already in there.
If it's in the dictionary , then it records the binary value of the dict(element)
if its not in the dictionary , then it redims the dictionary+1 , and adds the value to the dictionary , then it records the binary value of the dict(element)
After the compression routine, when you know the ubound of the dictionary , var "bits"
it then trims the leading 0's off the recorded value according to the "bits" var , and then converts the binary recorded value into chrs(0 to 255).
It then turns the hex dictionary(elements) into chrs(0 to 255).
Then it prints the result , to a file with a .ZZZ extension.
You can load a *.ZZZ file , and it will compress it a little more , up to a point..
You can load a *.ZIP file and it will knock off a few hundred bytes.
You can ZIP a *.ZZZ file an it will knock off several kilobytes.
( Now i have to work on the de-compressor. )
Last edited by albert on Mar 06, 2019 21:14, edited 2 times in total.
Re: Squares
I Edited the Compressor above... And corrected some errors with the dict( elements ).
Now it starts the routine with the dict( 0 ) value preset.
Now it starts the routine with the dict( 0 ) value preset.
Re: Squares
To decompress it:
I think i need to turn the dict_out , from ascii , back into hex.
Then turn the outputs , back into binary.
Then create an output where the binary outputs ( dict( element pointers) ) turn back into hex dict values.
Then turn the output hex , back into ascii chars; for the final decompressed output.
The problem is ; it takes so long to compress.
I have to figure out how to speed it up.
A mega byte file , takes several minutes to compress..
I think i need to turn the dict_out , from ascii , back into hex.
Then turn the outputs , back into binary.
Then create an output where the binary outputs ( dict( element pointers) ) turn back into hex dict values.
Then turn the output hex , back into ascii chars; for the final decompressed output.
The problem is ; it takes so long to compress.
I have to figure out how to speed it up.
A mega byte file , takes several minutes to compress..
Re: Squares
@Richard
Here's an explanation of my old data compressor idea, i came up with back in 2005.
I still can't fathom how to rebuild the data from the outputs....
Here's an explanation of my old data compressor idea, i came up with back in 2005.
I still can't fathom how to rebuild the data from the outputs....
Code: Select all
data bit grid..
1 0 1 > 2
1 1 0 > 2
0 1 1 > 2
v v v
2 2 2
0 2 3 0 1 bottom left & up diag
1 1 2 1 1 top left across & down diag
total = 2+2+2 + 2+2+2 + 0+2+3+0+1 + 1+1+2+1+1
bits = 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 = 32 bits
grid = 9 bits in , 32 bits out.
When the grid gets up above a certain level , like 256 x 256 bits = 65,536 bits.
Then the outputs are less than the number of bits in the grid.
Then it compresses..
a 256 x 256 grid = 256 bytes + 256 bytes + 511 bytes + 511 bytes = 1,534 bytes , compared to the grids 8,192 bytes.
The problem i`m having is fathoming how to address and utilize the diagonals? and jimmy the bits around till it equals out..
Re: Squares
@Albert.
Data compression is trivial. You and many others have imaginative ideas for data compression. It is reversing the compression to regenerate the original data that is difficult. You should not post compression code unless you can reliably regenerate the data.
Try writing some code that looks for the longest repeated string in a file. Count those repeats.
Then look for the next longest repeating string, ignoring those you identified earlier.
Continue the process until you have a dictionary of all repeated strings.
The best data compressors take a long time to compress data because they are looking for patterns in the data that can be described very simply and so recorded in a smaller space.
Data compression is trivial. You and many others have imaginative ideas for data compression. It is reversing the compression to regenerate the original data that is difficult. You should not post compression code unless you can reliably regenerate the data.
Try writing some code that looks for the longest repeated string in a file. Count those repeats.
Then look for the next longest repeating string, ignoring those you identified earlier.
Continue the process until you have a dictionary of all repeated strings.
The best data compressors take a long time to compress data because they are looking for patterns in the data that can be described very simply and so recorded in a smaller space.
Odometer
I wrote a Tri-nary odometer.. values = 0 , 1 , 2
You can adjust it to any base..
You can adjust it to any base..
Code: Select all
screen 19
dim as longint size = 8 'set size of odometer here'
dim as ubyte bases = 3 'set base here 2 to 256
dim as string num = string(size,chr(0))
dim as longint count=-1
do
count+=1
num[size-1]+=1
if num[size-1] = bases then
for a as longint = size-1 to 0 step -1
if num[a] = bases then num[a] = 0 : num[a-1]+=1
next
end if
print num , count
sleep 10
if num = string(size,chr(bases-1)) then exit do
loop until inkey = chr(27)
'===============================================================================
'===============================================================================
print
print num
print"Done" , count ; " elements" , "bits = " ; len(bin(count))
sleep
end
Re: Squares
@Richard
Is there a formula to figure out how many bits are set to 1 in a binary number??
Without looping through the number.
I need to determine how many bits are set in a number , and what all numbers , have that number of bits set.
Is there a formula to figure out how many bits are set to 1 in a binary number??
Without looping through the number.
I need to determine how many bits are set in a number , and what all numbers , have that number of bits set.
Re: Squares
It is called the Hamming weight (https://en.wikipedia.org/wiki/Hamming_weight), apparently some processors have a instruction for that e.g. POPCNT.albert wrote:Is there a formula to figure out how many bits are set to 1 in a binary number??
Re: Squares
While you wait for a formula Albert, you could use this:
Code: Select all
Function TALLY(SomeString As String,PartString As String) As Long
Dim As Long LenP=Len(PartString),count
Dim As Long position=Instr(SomeString,PartString)
If position=0 Then Return 0
While position>0
count+=1
position=Instr(position+LenP,SomeString,PartString)
Wend
Return count
End Function
for n as long=1 to 1000
dim as string i=bin(n)
if tally(i,"1")=5 then
print i;tab(30);n
end if
next n
sleep
Re: Squares
Given k set bits, the smallest number with k bits set is (2^k)-1, the biggest number with k bits set is decided by the size of your representation of a binary number.
It requires factorials to count the number of different permutations of k set bits in an n place binary number.
Count = n! / ( k! * ( n – k)! ); but that probably does not help you.
To generate all numbers with k bits set requires shuffling all k bits from the RHS to the LHS using a permutation program. Shuffling more than 20 bits can get very slow and inefficient.
The algorithm to count bits will depend on how you represent the binary number.
As advised by @badidea, popcnt is the assembly code instruction for intel PCs.
A fast itterative equivalent for integer variables is;
It requires factorials to count the number of different permutations of k set bits in an n place binary number.
Count = n! / ( k! * ( n – k)! ); but that probably does not help you.
To generate all numbers with k bits set requires shuffling all k bits from the RHS to the LHS using a permutation program. Shuffling more than 20 bits can get very slow and inefficient.
The algorithm to count bits will depend on how you represent the binary number.
As advised by @badidea, popcnt is the assembly code instruction for intel PCs.
A fast itterative equivalent for integer variables is;
Code: Select all
Function pop_cnt( Byval x As Ulongint ) As Integer
Dim As Integer count = 0
Do While x
If x And 1 Then count += 1
If x And 2 Then count += 1
If x And 4 Then count += 1
If x And 8 Then count += 1
x = x Shr 4
Loop
Return count
End Function
For i As Integer = 0 To 20
Print i, pop_cnt( i )
Next i
Sleep