Hi folks,
I update and bugfixed the prime program (as I mentioned in last post) - but something went wrong with attaching the file. So I repost it here.
rgds
cobold
include c:\masm32\include\masm32rt.inc
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
DumpArray PROTO :DWORD ; count and/or show/store the prime numbers found
.data?
pMem dd ? ; pointer to array of numbers[0..cByt]
cByt dd ? ; find primes up to this value
primesfound dd ? ; number of primes between [0..cByt]
ms_init dd ? ; time needed to initialize array
ms_sieve dd ? ; time needed for sieving
ms_count dd ? ; .....for counting and/or saving primes
.data
startmsg db 13,10,"Sieve of Eratosthenes - find primes up to: ",0
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««
.code
start:
print addr startmsg
mov cByt,uval(input()) ; user enters max_number (unsigned, because
; there are no negative primes! :-)
; thanks to jj2007
.if cByt==0 ; if user enters nothing or NULL
mov cByt,500000 ; we take 500000 as default
.endif
add cByt,1 ; array starts with [0], so add 1 to cByt
; otherwise, we search only up to cByt - 1
invoke GlobalAlloc,GMEM_FIXED,cByt ; allocate cByt bytes of memory
mov pMem,eax ; store array-address to pMem
.if eax==NULL
print "Not enough memory!"
exit
.endif
; -------------------------------------------------------------------------
print "Looking for primes from 1 to "
dec cByt
print ustr$(cByt), " - please wait...",13,10,13,10
inc cByt
;---------------------------------------------------------------
; Start timing: initialize Array
;---------------------------------------------------------------
invoke GetTickCount
mov ms_init,eax
@@:
invoke GetTickCount ; wait until next tick
.if ms_init==eax ; (timing is more reliable, because
jmp @B ; we start _exactly_ at the beginning
.endif ; of a new tick)
mov ms_init,eax
;---------------------------------------------------------------
; initialize Array
;---------------------------------------------------------------
mov edx,0
mov esi,pMem
align 4
.repeat
mov byte ptr[esi+edx],0
add edx,1
.until edx==cByt
invoke GetTickCount
sub eax,ms_init
mov ms_init,eax
print ustr$(ms_init)," ms to initialize array",13,10
;---------------------------------------------------------------
; Start timing: sieving
;---------------------------------------------------------------
invoke GetTickCount
mov ms_sieve,eax
@@:
invoke GetTickCount
.if ms_sieve==eax
jmp @B
.endif
mov ms_sieve,eax
;---------------------------------------------------------------
; sieve primes from 0 .. cByt+1
;---------------------------------------------------------------
mov ebx,2 ; ebx = i
mov eax,2
mul eax ; eax = i*i
align 4
.while eax < cByt ; while i*i < N
.if byte ptr[esi+ebx]==0 ; if not deleted[i] then
mov edx,eax ; ebx = j
align 4
.while edx < cByt ; for j = i*i to N
mov byte ptr[esi+edx],1 ; deleted[j] = true
add edx,ebx
.endw
.endif
inc ebx ; i = i+1
mov eax,ebx
mul eax ; eax = i*i
.endw
;---------------------------------------------------------------
invoke GetTickCount
sub eax,ms_sieve
mov ms_sieve,eax
print ustr$(ms_sieve)," ms to sieve the primes",13,10
invoke GetTickCount
mov ms_count,eax
invoke DumpArray,esi ; process bytearray, inc primesfound for every
; number that is not deleted
invoke GlobalFree,pMem
.if eax!=NULL
push eax
print chr$(13,10)
pop eax
print hex$(eax)
print " - ERROR GlobalFree"
.endif
exit
;---------------------------------------------------------------
align 4
DumpArray proc pArr:DWORD
LOCAL hFile:DWORD
LOCAL rlen :DWORD
mov esi,pArr
mov ebx,2
mov primesfound,0
; Uncomment next block, if you want to store the primes to "primes.txt"
; -------------------------------------------------------------------------
;
; invoke CreateFile,chr$("primes.txt"),
; GENERIC_READ or GENERIC_WRITE,
; NULL,
; NULL,
; CREATE_ALWAYS,
; FILE_ATTRIBUTE_NORMAL,
; NULL
; mov hFile,eax
;
; -------------------------------------------------------------------------
.data
szPrim db 10 dup(?),0
.code
align 4
.while ebx < cByt
.if byte ptr[esi+ebx]==0
;--------------------------------------------------------
; uncomment following 2 lines to save result in "primes.txt"
;
;invoke dwtoa,ebx,addr szPrim ; convert prime to ASCII
;invoke WriteFile,hFile,ADDR szPrim,10,ADDR rlen,NULL
;
;invoke WriteFile,hFile,ebx,4,ADDR rlen,NULL ; write 'raw'
;
; or
;
;print ustr$(ebx),9
;
; for output in console
;--------------------------------------------------------
inc primesfound
.endif
add ebx,1
.endw
invoke GetTickCount
sub eax,ms_count
[attachment deleted by admin]
Primes are fun. :wink
I seem to recall some mathematical "tricks" for determining if large numbers are evenly divisible, without actually carrying out the division. Perhaps this can be a fast way of determining if some numbers are prime? Here is a list of these "tricks" up to 47: http://www.egge.net/~savory/maths1.htm
For the purpose of finding primes, all numbers with even last digits could be skipped, as well as numbers ending in 5, since those always contain factors.
For the astute mathematician, take a look at: http://www.cut-the-knot.org/blue/divisibility.shtml
Hi Mark,
thanks for the interesting link.
There's a trick to test if a number is odd/even without carrying out a division:
If you take a look at the binary representation of numbers, you'll see that bit 0 of EVERY even number = 0, therefore for every odd number bit0=1
f.ex.
;dec bin
;2 0010
;3 0011
;4 0100
;5 0101
;So instead of
xor edx,edx
mov ebx,2
mov eax,number
div ebx
.if edx == 0
; number is even
.endif
; you could:
test,number,1
.if ZERO?
; Number is even
.endif
This is a lot faster.
In my tests the extra code required to eliminate the even numbers slowed the generator down for anything less than about 10000 primes.
EDIT: removed evidence of my stupidity :green
I just tweaked this to only use TEST so its a single line test if the number is odd or not.
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
include \masm32\include\masm32rt.inc
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
comment * -----------------------------------------------------
Build this template with
"CONSOLE ASSEMBLE AND LINK"
----------------------------------------------------- *
.code
start:
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
call main
inkey
exit
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
main proc
push ebx
push esi
push edi
xor ebx, ebx
mov esi, 16
@@:
test ebx, 00000000000000000000000000000001b
jz nxt
print str$(ebx)," odd",13,10
nxt:
add ebx, 1
sub esi, 1
jnz @B
pop edi
pop esi
pop ebx
ret
main endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
end start
Hi,
Mark Jones' idea to skip all even numbers and Micheal's proposal:
.if (ebx & 1) && (byte ptr[esi+ebx]==0)
inspired me to optimize the sieve even further.
Well, I made several changes and now the algo performes approx. 40% - 41% faster.
Here are the results from my machine:
Algo Numbers Time-Init Time-sieve Time-count Time-total primes loopcount
sieb1 100.000.000 310 16.163 341 16.814 5.761.455 242.570.204
siebneu 100.000.000 70 9.684 280 10.034 5.761.455 96.285.414
What have I done?
To speed up initializing the array, I preset offset 0 to 3 with
mov dword ptr[esi+0],000000101h
then initialize the rest of the array DWORD by DWORD, instead of BYTE by BYTE; and also preset all even number with 1:
push cByt
shr cByt,2
add cByt,1
mov edx,4
align 4
.repeat
mov dword ptr[esi+edx],000010001h ;preset all even numbers = 1
add edx,4
.until edx >= cByt
pop cByt
RESULT: Initialization is 4,4 times faster. (70ms vs. 310 ms for 100M)
-----
The variable for the outer loop (eax == i*i) and ebx can now be set to 3, so we skip flagging 2,4,6,8 and so on, because we've done this already in init.
mov cnt_loop,0
mov ebx,3 ; ebx = i we start with 3
mov eax,3
mul eax ; eax = i*i
And in the loop itself
.while eax < cByt ; while i*i < N
;pushad
;print chr$(13,10)
;print ustr$(ebx),"=ebx",9
;print ustr$(cnt_loop),"=cnt_loop",13,10
;popad
.if byte ptr[esi+ebx]==0 ; if not deleted[i] then
mov edx,eax ; ebx = j
align 4
.while edx < cByt ; for j = i*i to N
[color=Blue] .if (edx & 1) ; if j mod 2 = 0[/color] inc cnt_loop
;pushad
;print ustr$(edx),"=edx streichen "
;print ustr$(cnt_loop),13,10
;popad
mov byte ptr[esi+edx],1 ; deleted[j] = true
.endif
add edx,ebx
.endw
.endif
[color=Blue]add ebx,2 ; now becomes 5,7, and so on, don't need 2,4,6[/color]
mov eax,ebx
mul eax ; eax = i*i
.endw
RESULT: reduces loop-count from 242.570.204 to 96.285.414 ---> 60% reduction
-----
When counting/printing:
we manually print "2", set primesfound to 1, index (ebx) to 3, then we process only odd numbers in the loop (add ebx,2).
My conclusion: The idea was simple, effect is quite considerable.
My idea broke the generator, I just failed to see it. The other method I tried was to pre-process the flags array using memfill to set all of the even elements to 1, but for some reason in my code this had no affect on the number of inner loops.
Michael,
I also had some trouble with getting the even numbers out of the loop, but finally I got it running.
(attached 'siebnew.zip' = siebneu.asm & exe)
[attachment deleted by admin]
I once created a prime tester to skip multiples of 3 also. Pseudo-code follows...
read x
if x == 2 or x == 3
print "Prime"
exit
else if x mod 2 == 0 or x mod 3 == 0
print "Composite"
exit
i = 5
k = 0
while i <= sqrt(x)
if x mod i == 0
print "Composite"
exit
i = i + 2 + k
k = k xor 2
print "Prime"
Well, okay - but the code you posted is kind of optimized "brute force" (ist uses div and mod) - and I suppose it doesn't perform as fast as a sieve.
btw. do you mind posting your source?
Hi Cobold
Only odd numbers enter the inner loop, so if you only test every second of them, the even numbers are sorted out automatically.
new inner loop:
.while edx < cByt ; for j = i*i to N
;pushad
;print ustr$(edx),"=edx streichen "
;print ustr$(cnt_loop),13,10
;popad
mov byte ptr[esi+edx],1 ; deleted[j] = true
add edx,ebx
add edx,ebx ;skip the even numbers, no test for evenness required
.endw
Yup! Good idea - save a few milliseconds!
Thanks!
Hi Cobold:
When I try to write file primes.txt we get a problem at about 65539.
I think : is 3Ah Ascii 9 plus 1. Or is something to do with a overflow problem?
65521 65537 : :65539 : =65543 : :65551 = C65557
at about 69233 it sorts it self out.
Regards herge
Hello Herge,
honestly - I cannot reproduce the failure you mentioned. I removed the comments and recompiled siebneu.asm.
For 10.000.000 it produces 'primes.txt' with the following output:
(Anyway - I repost both the source, exe and primes.txt(with primes up to 70000 otherwise attachement very large - give it a try and let me know if it works on your machine!)
C:\Asm>siebneu
Sieve of Eratosthenes - find primes up to: 10000000
Looking for primes from 1 to 10000000 - please wait...
10 ms to initialize array
841 ms to sieve the primes
661 ms to count the primes
1512 ms total time.
Number of primes found between 1 and 10000000 = 664579
8925144 loop-counter
and
12.08.2008 21:51 6.645.780 primes.txt (664579*10 - 10[2 is not written to file, so it starts with 3, containing 664578 numbers, with 10 bytes each)
when i look to the file in notepad or type primes.txt everthing looks fine.
invoke CreateFile,chr$("primes.txt"),GENERIC_READ or GENERIC_WRITE,
NULL,NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL
mov hFile,eax
...
...
;and in loop of DumpArray - have you defined szPrim as db 10 dup(?),0 ???
; this is the ASCII-buffer for dwtoa, which converts the number to a string, maybe your buffer is too small ?? don' know! I have chosen 10 bytes because thats sufficient for upto MAXDW 4294967296 (10 bytes in ASCII)
invoke dwtoa,ebx,addr szPrim ; convert prime to ASCII
invoke WriteFile,hFile,ADDR szPrim,10,ADDR rlen,NULL
;
;invoke WriteFile,hFile,ebx,4,ADDR rlen,NULL ; write 'raw'
[attachment deleted by admin]
Hi cobold:
I must have screwed up the CR and LF logic.
Regards herge
[attachment deleted by admin]
Hello,
invoke crt__ultoa,ebx,addr szPrim , 10 ; convert prime to ASCII
invoke WriteFile,hFile,ADDR szPrim,10,ADDR rlen,NULL
;
invoke WriteFile,hFile,ebx,4,ADDR rlen,NULL ; write 'raw' <-------- COMMENT OUT THIS LINE, it saves the primes in binary,
so either use the first WriteFile to save them in ASCII - OR - second WriteFile to save them in binary, but not BOTH ;-)
rgds
cobold
Hello cobold:
Thanks!
Regards herge