I'm in need of storing a very long list of numbers themselves quite long, which would lead to a very big file (it's the numbers related to randomize seed first random value affair - if you were curious). I want those numbers compacted so, but I need also this list to keep readable as it. No way to just use file compression and I'm counting then on a change of the number representation.
* For instance 2^22 = 4194304 ---> readability perfect, and 3 digits saved.
But in an other hand : 128 = 2^1+2^2+2^3....+2^6. This is rather long and we are loosing(!) However, we may be able to choose a mixed approach where we use base-n representation if we get an advantage, and stick at the original else. So to be sure to save space or nothing.
Second point we can handle, the number notation. Everyone knows about hexadecimal for instance. They can help save space also since *15_dec = F_hex*, which means that we need only one digit to store two over a given range of numbers (0-15 here). So to take advantage of it I propose using a wider notation base, like base 62 that I use in the code that I'm currently writting for this affair - see below. Why base 62? just as a showcase because it is possible to change a decimal (from 0 to 61), for : a digit, a lowcase character or an uppercase character... (After all considere also that decimal is a compaction of binary)
* For instance 77 777 decimal gives in base62 notation KET. Great space saved again.
Combining the 2 advantages I'm expecting some gain... But I would like to know here if someone has heard of a better idea for such a task?
Else, what would be the best compact choice of base in general, either for representation as power or for the notation issue ?... I'll try to formulate it better.
- There is a lot of choice of base for writting N =sum( a*base^b )
There is a lot of choice of base for number notation , octal , decimal, hexadecimal, base62
==> is there any best choice over all the possibilies?
==> is there any best choice over performance issue (to get the representation computed fast)
Here joined a first attempt to play with representations. Some clean-up required but should work as expected for demo purpose.
Code: Select all
' .---.
' | 0 |
' .---.
Const As ULongInt U_MAXULONGINT = 2^64
Const As ULongInt U_MAXLONGINT = 2^63
Const As ULongInt _MAXULONGINT = U_MAXULONGINT - 1
Const As LongInt _MAXLONGINT = U_MAXLONGINT-1
' .---.
' | 1 |
' .---.
Type BASEnCOMPACT
Declare Constructor(ByVal BASE_ As UByte = 2, NUMBER_ As ULongInt = 0)
Declare Property BSE() As UByte ''*
Declare Property BSE(As UByte) ''*
Declare Property NUMBER() As ULongInt
Declare Property NUMBER(As ULongInt)
Declare Property GETTEXT() As String
Declare Property REPRESENTATION() As String
Declare Property GETFACTOR(ByVal Index As UByte) As UByte
Declare Function GreaterExp() As LongInt
Declare Function BaseExp(ByRef Exp_ As LongInt, _
ByVal Num_ As ULongInt=1) As ULongInt
Declare Function BaseRep(Rep_() As UByte, _
ByVal Num_ As ULongInt=1) As ULongInt
Private :
As UByte _BASE = 2
As ULongInt _NUMBER = 0
As UByte _REPRESENTATION(63)
As LongInt _EXPONENT = 0
As ULongInt _ADDVALUE = 0
As String _TEXT = "2E0+A"
As LongInt Ptr _EXPONENTLIST
As ULongInt Ptr _FACTORLIST
End Type 'BASEnCOMPACT
Constructor BASEnCOMPACT(ByVal BASE_ As UByte = 2, ByVal NUMBER_ As ULongInt = 0)
This._BASE = BASE_
This._NUMBER = NUMBER_
For _index As UByte = 0 To 63
This._REPRESENTATION(_index) = 0
Next _index
Select Case NUMBER_
Case 0
This._EXPONENT = -1
This._ADDVALUE = 0
This._TEXT = "0"
Case Else
This.BaseExp(This._EXPONENT, NUMBER_)
This._ADDVALUE = NUMBER_ - BASE_^This._EXPONENT
This._TEXT = This.GETTEXT
End Select 'NUMBER_
End Constructor 'BASEnCOMPACT(UBYTE[=2],ULONGINT[=0])
Property BASEnCOMPACT.BSE() As UByte
Return This._BASE
End Property 'Get UBYTE:=BASEnCOMPACT.BSE()
Property BASEnCOMPACT.BSE(ByVal setBASE As UByte)
This._BASE = setBASE
This.NUMBER = This._NUMBER
End Property 'Set BASEnCOMPACT.BSE()
Property BASEnCOMPACT.NUMBER() As ULongInt
Return This._NUMBER
End Property 'Get ULONGINT:=BASEnCOMPACT.NUMBER()
Property BASEnCOMPACT.NUMBER(ByVal setNUMBER As ULongInt)
This._NUMBER = setNUMBER
''Fill fields accordingly
Select Case setNUMBER
Case 0
This._EXPONENT = -1
This._ADDVALUE = 0
This._TEXT = "0"
Case Else
This.BaseExp(This._EXPONENT, setNUMBER)
This._ADDVALUE = setNUMBER - This._BASE^This._EXPONENT
This._TEXT = This.GETTEXT
End Select 'setNUMBER
End Property 'Set ULONGINT:=BASEnCOMPACT.NUMBER()
Property BASEnCOMPACT.GETTEXT() As String
Dim As String S
S = Str(This._BASE)
S &= "^"& Str(This._EXPONENT)
S &= IIf(This._ADDVALUE=0, "", "+"& This._ADDVALUE)
Return S
End Property 'Get STRING:=BASEnCOMPACT.GETTEXT()
Property BASEnCOMPACT.REPRESENTATION() As String
For _index As UByte = 0 To 63
This._REPRESENTATION(_index) = 0
Next _index
This.BaseRep(This._REPRESENTATION(), This._NUMBER)
Dim As String R = "b"& This._BASE
R &= "<+"
For _index As UByte = 0 To 63
If This._REPRESENTATION(_index)<>0 Then
R &= This._REPRESENTATION(_index) & "*" & "^" & _index & "+"
End If
Next _index
R &= ">"
Return R
End Property 'Get BASEnCOMPACT.REPRESENTATION()
Property BASEnCOMPACT.GETFACTOR(ByVal Index As Ubyte) As UByte
Return This._REPRESENTATION(Index)
End Property '
Function BASEnCOMPACT.GreaterExp() As LongInt
Dim As UByte index = 63
This.BaseRep(This._REPRESENTATION(), This._NUMBER)
While Not (index<=0 OrElse This._REPRESENTATION(index))
index -= 1
Wend 'index>0
Return index
End Function 'LONGINT:=BASEnCOMPACT.GreaterExp()
Function BASEnCOMPACT.BaseExp(ByRef Exp_ As LongInt, _
ByVal Num_ As ULongInt=1) As ULongInt
Exp_ = 0
While Num_>=This._BASE
Exp_ += 1
Num_ \= This._BASE
Wend 'Num_>=This._BASE
Return Num_ - This._BASE^Exp_
End Function 'ULONGINT:=BASEnCOMPACT.BaseExp(REF_UINTEGER,ULONGINT[=1])
Function BASEnCOMPACT.BaseRep(Rep_() As UByte, _
ByVal Num_ As ULongInt=1) As ULongInt
Dim As UByte _Exp = 0
Dim As ULongInt _Num = Num_
Dim As ULongInt _Rem = Num_
For _index As UByte = 0 To 63
Rep_(_index) = 0
Next _index
While _Rem >= This._BASE
_Rem = _Num
_Exp = 0
While _Num >= This._BASE
_Exp += 1
_Num = _Num \ This._BASE
Wend
If _Exp = 0 Then
Rep_(_Exp) += _Rem
Else
Rep_(_Exp) += 1
_Num = _Rem - This._BASE^_Exp
EndIf
Wend
Return 0
End Function 'ULONGINT:=BASEnCOMPACT.BaseRep(REF_UBYTE(),ULONGINT[=1])
' .---.
' | 2 |
' .---.
Function Base62ToNotation(ByVal Cpn As BASEnCOMPACT) As String
Var initialBASE = Cpn.BSE
If initialBASE<>62 Then
Cpn.BSE=62
EndIf
Dim As String f62 = ""
For _index As Byte = Cpn.GreaterExp() To 0 Step -1
Select Case As Const Cpn.GETFACTOR(_index)
Case 0 To 9
f62 &= Chr(48+Cpn.GETFACTOR(_index))
Case 10 To 23
f62 &= Chr(65+Cpn.GETFACTOR(_index)-10)
Case 24
f62 &= Chr(64)
Case 25 To 35
f62 &= Chr(65+Cpn.GETFACTOR(_index)-10)
Case 36 To 61
f62 &= Chr(97+Cpn.GETFACTOR(_index)-36)
End Select 'As Const Cpn.GETFACTOR(_index)
Next '_index
Cpn.BSE = InitialBASE
Return f62
End Function 'STRING:=Base62ToNotation(STRING)
' .---.
' | 3 |
' .---.
Function SeedMapping(byval Seed as ULongInt) As ULongInt
Randomize Seed
Return Rnd()*1e+17
End Function 'ULONGINT:=SeedMapping(ULONGINT)
' .---.
' | 4 |
' .---.
'------------------------------------------------------------
' DEMO
'------------------------------------------------------------
Dim As String NUM
Dim As BASEnCOMPACT Cpn = BASEnCOMPACT(2, 77777)
Do
Cls
Locate 1,1
? "DECIMAL NUMBER " ; Cpn.NUMBER;
? " REPRESENTATION BASE " ; Cpn.BSE
? "--------------";
? " -------------------"
?
? "REPRESENTATION 1 " ; Cpn.GETTEXT
? "----------------"
?
? "REPRESENTATION 2"
? "----------------"
? Cpn.REPRESENTATION
?
? "REPRESENTATION 3 (BASE 62)"
? "--------------------------"
? Base62ToNotation(Cpn)
Locate 24, 10 : Input "New decimal number OR [Q] to quit :"; NUM
Cpn.NUMBER = CUlngInt(NUM)
Loop Until LCase (NUM) = "q"
'------------------------------------------------------------