News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Updated Eratothenes (sieb1new.zip)

Started by cobold, July 31, 2008, 07:20:14 PM

Previous topic - Next topic

cobold

Hi folks,

I update and bugfixed the prime program (as I mentioned in last post) - but something went wrong with attaching the file. So I repost it here.

rgds
cobold

include     c:\masm32\include\masm32rt.inc
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

DumpArray PROTO :DWORD  ; count and/or show/store the prime numbers found

.data?

    pMem        dd  ?   ; pointer to array of numbers[0..cByt]
    cByt        dd  ?   ; find primes up to this value
    primesfound dd  ?   ; number of primes between [0..cByt]
   
    ms_init     dd  ?   ; time needed to initialize array
    ms_sieve    dd  ?   ; time needed for sieving
    ms_count    dd  ?   ; .....for counting and/or saving primes
   
.data
    startmsg    db  13,10,"Sieve of Eratosthenes - find primes up to: ",0
; «««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««««

.code
start:
    print addr startmsg
    mov cByt,uval(input())  ; user enters max_number (unsigned, because
                            ; there are no negative primes!  :-)
                            ; thanks to jj2007
                             
    .if cByt==0             ; if user enters nothing or NULL
        mov cByt,500000     ; we take 500000 as default
    .endif
    add cByt,1  ; array starts with [0], so add 1 to cByt
                ; otherwise, we search only up to cByt - 1
       
    invoke GlobalAlloc,GMEM_FIXED,cByt  ; allocate cByt bytes of memory
    mov pMem,eax                        ; store array-address to pMem
    .if eax==NULL
        print "Not enough memory!"
        exit
    .endif
; -------------------------------------------------------------------------
    print "Looking for primes from 1 to "
    dec cByt
    print ustr$(cByt), " - please wait...",13,10,13,10
    inc cByt
;---------------------------------------------------------------
; Start timing: initialize Array
;---------------------------------------------------------------
    invoke GetTickCount
    mov ms_init,eax
@@:
    invoke GetTickCount     ; wait until next tick
    .if ms_init==eax        ; (timing is more reliable, because
        jmp @B              ; we start _exactly_ at the beginning
    .endif                  ; of a new tick)
    mov ms_init,eax
;---------------------------------------------------------------
; initialize Array
;---------------------------------------------------------------
    mov edx,0
    mov esi,pMem
align 4
    .repeat
        mov byte ptr[esi+edx],0
        add edx,1
    .until edx==cByt
    invoke GetTickCount
    sub eax,ms_init
    mov ms_init,eax
    print ustr$(ms_init)," ms to initialize array",13,10
;---------------------------------------------------------------
; Start timing: sieving
;---------------------------------------------------------------
    invoke GetTickCount
    mov ms_sieve,eax
@@:
    invoke GetTickCount
    .if ms_sieve==eax
        jmp @B
    .endif
    mov ms_sieve,eax
;---------------------------------------------------------------
; sieve primes from 0 .. cByt+1
;---------------------------------------------------------------
    mov ebx,2                               ; ebx = i
    mov eax,2
    mul eax                                 ; eax = i*i
align 4
    .while eax < cByt                       ; while i*i < N
        .if byte ptr[esi+ebx]==0            ;   if not deleted[i] then
            mov edx,eax                     ;     ebx = j
            align 4
            .while edx < cByt               ;     for j = i*i to N
                mov byte ptr[esi+edx],1     ;        deleted[j] = true
                add edx,ebx
            .endw
        .endif
        inc ebx                             ; i = i+1
        mov eax,ebx
        mul eax                             ; eax = i*i
    .endw
;---------------------------------------------------------------
    invoke GetTickCount
    sub eax,ms_sieve
    mov ms_sieve,eax
    print ustr$(ms_sieve)," ms to sieve the primes",13,10
   
    invoke GetTickCount
    mov ms_count,eax
   
    invoke DumpArray,esi    ; process bytearray, inc primesfound for every
                            ; number that is not deleted
   
    invoke GlobalFree,pMem
    .if eax!=NULL
        push eax
        print chr$(13,10)
        pop eax
        print hex$(eax)
        print " - ERROR GlobalFree"
    .endif
    exit
;---------------------------------------------------------------
align 4
DumpArray proc pArr:DWORD

    LOCAL hFile:DWORD
    LOCAL rlen :DWORD
       
    mov esi,pArr
    mov ebx,2
    mov primesfound,0

; Uncomment next block, if you want to store the primes to "primes.txt"
; -------------------------------------------------------------------------
;
;    invoke CreateFile,chr$("primes.txt"),
;                      GENERIC_READ or GENERIC_WRITE,
;                      NULL,
;                      NULL,
;                      CREATE_ALWAYS,
;                      FILE_ATTRIBUTE_NORMAL,
;                      NULL
;    mov hFile,eax
;   
; -------------------------------------------------------------------------

.data
    szPrim  db 10 dup(?),0
.code   
align 4
    .while ebx < cByt
        .if byte ptr[esi+ebx]==0
            ;--------------------------------------------------------
            ; uncomment following 2 lines to save result in "primes.txt"
            ;
            ;invoke dwtoa,ebx,addr szPrim   ; convert prime to ASCII
            ;invoke WriteFile,hFile,ADDR szPrim,10,ADDR rlen,NULL
            ;
            ;invoke WriteFile,hFile,ebx,4,ADDR rlen,NULL    ; write 'raw'
            ;
            ; or
            ;
            ;print ustr$(ebx),9
            ;
            ; for output in console           
            ;--------------------------------------------------------
            inc primesfound
        .endif
        add ebx,1
    .endw
    invoke GetTickCount
    sub eax,ms_count

[attachment deleted by admin]

Mark Jones

Primes are fun. :wink

I seem to recall some mathematical "tricks" for determining if large numbers are evenly divisible, without actually carrying out the division. Perhaps this can be a fast way of determining if some numbers are prime? Here is a list of these "tricks" up to 47:  http://www.egge.net/~savory/maths1.htm

For the purpose of finding primes, all numbers with even last digits could be skipped, as well as numbers ending in 5, since those always contain factors.

For the astute mathematician, take a look at:  http://www.cut-the-knot.org/blue/divisibility.shtml
"To deny our impulses... foolish; to revel in them, chaos." MCJ 2003.08

cobold

Hi Mark,

thanks for the interesting link.

There's a trick to test if a number is odd/even without carrying out a division:

If you take a look at the binary representation of numbers, you'll see that bit 0 of EVERY even number = 0, therefore for every odd number bit0=1

f.ex.

;dec bin
;2   0010
;3   0011
;4   0100
;5   0101

;So instead of

xor edx,edx
mov ebx,2
mov eax,number
div ebx
.if edx == 0
   ; number is even
.endif

; you could:

test,number,1
.if ZERO?
   ; Number is even
.endif


This is a lot faster.

MichaelW

#3
In my tests the extra code required to eliminate the even numbers slowed the generator down for anything less than about 10000 primes.

EDIT: removed evidence of my stupidity :green




eschew obfuscation

hutch--

I just tweaked this to only use TEST so its a single line test if the number is odd or not.


; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
    include \masm32\include\masm32rt.inc
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

comment * -----------------------------------------------------
                        Build this  template with
                       "CONSOLE ASSEMBLE AND LINK"
        ----------------------------------------------------- *

    .code

start:
   
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

    call main
    inkey
    exit

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

main proc

    push ebx
    push esi
    push edi

    xor ebx, ebx
    mov esi, 16

  @@:
    test ebx, 00000000000000000000000000000001b
    jz nxt
    print str$(ebx)," odd",13,10
  nxt:
    add ebx, 1
    sub esi, 1
    jnz @B

    pop edi
    pop esi
    pop ebx

    ret

main endp

; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤

end start

Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

cobold

Hi,

Mark Jones' idea to skip all even numbers and Micheal's proposal:
         .if (ebx & 1) && (byte ptr[esi+ebx]==0)
inspired me to optimize the sieve even further.

Well, I made several changes and now the algo performes approx. 40% - 41% faster.
Here are the results from my machine:
Algo   Numbers   Time-Init   Time-sieve   Time-count   Time-total   primes   loopcount
sieb1   100.000.000   310   16.163   341   16.814   5.761.455   242.570.204
siebneu   100.000.000   70   9.684   280   10.034   5.761.455   96.285.414

What have I done?
To speed up initializing the array, I preset offset 0 to 3 with
mov dword ptr[esi+0],000000101h
then initialize the rest of the array DWORD by DWORD, instead of BYTE by BYTE; and also preset all even number with 1:
   push cByt
   shr cByt,2
   add cByt,1
   mov edx,4
align 4
   .repeat
       mov dword ptr[esi+edx],000010001h  ;preset all even numbers = 1
       add edx,4
   .until edx >= cByt
   pop cByt

RESULT: Initialization is 4,4 times faster. (70ms vs. 310 ms for 100M)
-----
The variable for the outer loop (eax == i*i) and ebx can now be set to 3, so we skip flagging 2,4,6,8 and so on, because we've done this already in init.
   mov cnt_loop,0
   mov ebx,3                               ; ebx = i we start with 3
   mov eax,3
   mul eax                                 ; eax = i*i

And in the loop itself
   .while eax < cByt                       ; while i*i < N
       ;pushad
       ;print chr$(13,10)
       ;print ustr$(ebx),"=ebx",9
       ;print ustr$(cnt_loop),"=cnt_loop",13,10
       ;popad
       .if byte ptr[esi+ebx]==0            ; if not deleted[i] then
           mov edx,eax                     ;    ebx = j
           align 4
           .while edx < cByt               ; for j = i*i to N
[color=Blue]                .if (edx & 1)               ;   if j mod 2 = 0[/color]                    inc cnt_loop
                   ;pushad
                   ;print ustr$(edx),"=edx streichen "
                   ;print ustr$(cnt_loop),13,10
                   ;popad
                   mov byte ptr[esi+edx],1 ;     deleted[j] = true
               .endif
               add edx,ebx
           .endw
       .endif
       [color=Blue]add ebx,2 ; now becomes 5,7, and so on, don't need 2,4,6[/color]
       mov eax,ebx
       mul eax                             ; eax = i*i
   .endw

RESULT: reduces loop-count from 242.570.204 to 96.285.414  ---> 60% reduction
-----
When counting/printing:
we manually print "2", set primesfound to 1, index (ebx) to 3, then we process only odd numbers in the loop (add ebx,2).
My conclusion: The idea was simple, effect is quite considerable.


MichaelW

My idea broke the generator, I just failed to see it. The other method I tried was to pre-process the flags array using memfill to set all of the even elements to 1, but for some reason in my code this had no affect on the number of inner loops.
eschew obfuscation

cobold

Michael,

I also had some trouble with getting the even numbers out of the loop, but finally I got it running.
(attached 'siebnew.zip' = siebneu.asm & exe)

[attachment deleted by admin]

xmetal

I once created a prime tester to skip multiples of 3 also. Pseudo-code follows...


    read x
   
    if x == 2 or x == 3
        print "Prime"
        exit
    else if x mod 2 == 0 or x mod 3 == 0
        print "Composite"
        exit
   
    i = 5
    k = 0
   
    while i <= sqrt(x)
        if x mod i == 0
            print "Composite"
            exit
        i = i + 2 + k
        k = k xor 2
   
    print "Prime"

cobold

Well, okay - but the code you posted is kind of optimized "brute force" (ist uses div and mod) - and I suppose it doesn't perform as fast as a sieve.

btw. do you mind posting your source?



PerO

Hi Cobold

Only odd numbers enter the inner loop, so if you only test every second of them, the even numbers are sorted out automatically.

new inner loop:

         .while edx < cByt               ; for j = i*i to N
               ;pushad
               ;print ustr$(edx),"=edx streichen "
               ;print ustr$(cnt_loop),13,10
               ;popad
               mov byte ptr[esi+edx],1 ;     deleted[j] = true
               add edx,ebx
               add edx,ebx        ;skip the even numbers, no test for evenness required
           .endw


cobold

Yup! Good idea - save a few milliseconds!

Thanks!

herge


Hi Cobold:

When I try to write file primes.txt we get a problem at about 65539.
I think : is 3Ah Ascii 9 plus 1. Or is something to do with a overflow problem?


65521     65537      : :65539      : =65543      : :65551      = C65557


at about  69233  it sorts it self out.

Regards herge

// Herge born  Brussels, Belgium May 22, 1907
// Died March 3, 1983
// Cartoonist of Tintin and Snowy

cobold

Hello Herge,

honestly - I cannot reproduce the failure you mentioned. I removed the comments and recompiled siebneu.asm.
For 10.000.000 it produces 'primes.txt' with the following output:
(Anyway - I repost both the source, exe and primes.txt(with primes up to 70000 otherwise attachement very large - give it a try and let me know if it works on your machine!)
C:\Asm>siebneu

Sieve of Eratosthenes - find primes up to: 10000000

Looking for primes from 1 to 10000000 - please wait...

10 ms to initialize array
841 ms to sieve the primes
661 ms to count the primes

1512 ms total time.

  Number of primes found between 1 and 10000000 = 664579
8925144 loop-counter
and

12.08.2008  21:51         6.645.780 primes.txt (664579*10 - 10[2 is not written to file, so it starts with 3, containing 664578 numbers, with 10 bytes each)

when i look to the file in notepad or type primes.txt everthing looks fine.

    invoke CreateFile,chr$("primes.txt"),GENERIC_READ or GENERIC_WRITE,
                      NULL,NULL,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL
    mov hFile,eax
...
...
;and in loop of DumpArray - have you defined szPrim as db 10 dup(?),0  ???
; this is the ASCII-buffer for dwtoa, which converts the number to a string, maybe your buffer is too small ?? don' know! I have chosen 10 bytes because thats sufficient for upto MAXDW 4294967296 (10 bytes in ASCII)

            invoke dwtoa,ebx,addr szPrim   ; convert prime to ASCII
            invoke WriteFile,hFile,ADDR szPrim,10,ADDR rlen,NULL
            ;
            ;invoke WriteFile,hFile,ebx,4,ADDR rlen,NULL    ; write 'raw'


[attachment deleted by admin]

herge

Hi cobold:

I must have screwed up the CR and LF logic.

Regards herge

[attachment deleted by admin]
// Herge born  Brussels, Belgium May 22, 1907
// Died March 3, 1983
// Cartoonist of Tintin and Snowy