News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

huge table access

Started by porphyry5, December 08, 2009, 04:20:43 PM

Previous topic - Next topic

TNick

Quote from: porphyry5 on December 12, 2009, 04:45:32 PM
... avoid Virtual, ...
Wait... what?

.486
.MODEL FLAT, STDCALL
OPTION CASEMAP:NONE

        INCLUDE         windows.inc
        INCLUDE         kernel32.inc
        INCLUDELIB      kernel32.lib
        INCLUDE         user32.inc
        INCLUDELIB      user32.lib
       
        u64     STRUCT
                l       DWORD           ?
                h       DWORD           ?
        u64     ENDS


.DATA
        hFile                   DWORD           INVALID_HANDLE_VALUE
        pFContent               DWORD           0
        ReadEcho                DWORD           0
        F_Size                  u64             <>
        strFileToRead           BYTE            "TstVirtual.asm",0
       
        strErrCapt              BYTE            "Virtual Test has a problem. Please don't tell $M about it!!!",0
        strERR_CantOpen         BYTE            "File could not be opened",0
        strERR_Size             BYTE            "GetFileSizeEx failed on selected file",0
        strERR_Sane             BYTE            "File too large to process",0
        strERR_Virt             BYTE            "The system was unable to provide memory for this operation",0
        strERR_Read             BYTE            "Could not read selected file",0


.CODE

AppEntryPoint:



        INVOKE  CreateFile,     ADDR strFileToRead,GENERIC_READ,FILE_SHARE_READ,NULL,OPEN_EXISTING,0,NULL
        cmp     eax,            INVALID_HANDLE_VALUE
        mov     hFile,          eax
        je      ERR_CantOpen
        INVOKE  GetFileSizeEx,  eax,ADDR F_Size
        test    eax,            eax
        jz      ERR_Size
        cmp     F_Size.h,       0
        jne     ERR_Sane
        cmp     F_Size.l,       0FFFFFh
        ja      ERR_Sane
        INVOKE  VirtualAlloc,   NULL,F_Size.l,MEM_COMMIT or MEM_RESERVE,PAGE_READWRITE
        test    eax,            eax
        mov     pFContent,      eax
        jz      ERR_Virt
        INVOKE  ReadFile,       hFile,eax,F_Size.l,ADDR ReadEcho,NULL
        test    eax,            eax
        jz      ERR_Read
       
        ; do what you need to do with the file content
        mov     edi,            pFContent
        mov     ecx,            F_Size.l
        xor     eax,            eax
        repne   scasb

        jmp     CleanExit

       
ERR_CantOpen:
        INVOKE  MessageBox,     NULL,ADDR strERR_CantOpen,ADDR strErrCapt,MB_OK or MB_ICONERROR
        jmp     CleanExit
       
ERR_Size:
        INVOKE  MessageBox,     NULL,ADDR strERR_Size,ADDR strErrCapt,MB_OK or MB_ICONERROR
        jmp     CleanExit

ERR_Sane:
        INVOKE  MessageBox,     NULL,ADDR strERR_Sane,ADDR strErrCapt,MB_OK or MB_ICONERROR
        jmp     CleanExit

ERR_Virt:
        INVOKE  MessageBox,     NULL,ADDR strERR_Virt,ADDR strErrCapt,MB_OK or MB_ICONERROR
        jmp     CleanExit

ERR_Read:
        INVOKE  MessageBox,     NULL,ADDR strERR_Read,ADDR strErrCapt,MB_OK or MB_ICONERROR
        ;jmp    CleanExit

       
CleanExit:
        cmp     hFile,          INVALID_HANDLE_VALUE
        je      @F
                INVOKE  CloseHandle,    hFile
        @@:
        cmp     pFContent,      0
        je      @F
                INVOKE  VirtualFree,    pFContent,0,MEM_RELEASE
        @@:
       
        INVOKE  ExitProcess,    NULL


END AppEntryPoint




I would say heap functions are great for small chunks of memory. The downside with Virtual is that bytes after F_Size.l 'till end of the page have the potential to remain unused. However, when you process large files and use the heap that came with the process for current needs, you may find useful to use Virtual...
Do not avoid Virtual!!!  :bdg

Nick

dedndave

QuoteI would say heap functions are great for small chunks of memory.

i use them for large chunks, as well - you failed to mention a downside in that

TNick

Hello, Dave!

Heap function need to check if that kind of memory exists in what that heap already has and, if it does not, it will call Virtual function to add some more memory. It will get some full pages that it need to handle (manage those extra bytes at the end that the user does not need, write some headers for those pages or, maybe, add a pointer to allocated range to an array).

Using Virtual you may skip this overhead.

Nick

PS I have never done a study about how Windows Heap works. My thougths are based on how I would implement a heap. $M people may be far more smart and they may have found a way to avoid this overhead for large block, in which case I retract everything I said and get back to my cold. :red

dedndave

yah - if your request is under, let's say ~ 2 Gb, HeapAlloc shouldn't bark
if you poke around in the Laboratory sub-forum, i am sure you can find some timing results

TNick

Yes... or we can go here. (click it fast, before it becomes a dead link. :)
And since we're there, entire "Memory Management" category looks interesting, so I'm going back to read some more.

Nick

PS If you're too late and the link is broken, try this.

jj2007

Quote from: dedndave on December 12, 2009, 07:11:29 PM
if you poke around in the Laboratory sub-forum, i am sure you can find some timing results

Here are some cycle counts from an old proggie of mine. VirtualAlloc is really slow for small allocations - the overhead is a lot higher.

HeapAlloc, 00010000h bytes, 103 per kByte
HeapAlloc, 00001000h bytes, 846 per kByte
HeapAlloc, 00000100h bytes, 2228 per kByte

VirtualAlloc, 00010000h bytes, 51 per kByte
VirtualAlloc, 00001000h bytes, 3102 per kByte
VirtualAlloc, 00000100h bytes, 86028 per kByte

TNick

Hello, jj!
Do you mind sharing this test program? These are some interesting results right there...

Nick

redskull

Those timing are somewhat misleading, which I say cautiously having not seen the code.  There's no such thing as a VirtualAlloc call for x100 bytes; on an x86 system, you allocate memory in integer multiples of 4k pages, no discussion.  So to say 'x cycles per Kbyte' really doesn't make any sense.  A VirtualAlloc for x100 bytes will be (nearly) the same as a VirtualAlloc for x1000, but x1001 will actually be a call for x2000.  Also, to say that Heap functions are faster is self-evident; most of the time when you call a Heap function (for a small value), no more memory is actually allocated.  Also, the Heap will try to "defragment" those smaller chunks to free up room.  It's like saying allocating memory is slower than NOT allocating memory; most of the time, for small values, a call to HeapAlloc is unnecessary; your process already "owns" that memory, and can read and write to without needing to call the function.  A quick look at the Olly memory map will show this: anytime a new page is allocated, it will be highlighted in red.  If nothing changes after a call to the heap functions, then you flat wasted your time with the call: your process could have stored data there already.  Also, @dedndave, you can't ever allocate more than 2Gb with *any* of the functions; there's simply just not enough address space to go round.  There's no real "downside" to using the Heap Functions for large values; there's just no real upside to doing it either.  If you *know* you need at least another page allocated, why waste the time having Windows check to make sure?

-r
Strange women, lying in ponds, distributing swords, is no basis for a system of government

jj2007

Quote from: TNick on December 12, 2009, 08:41:39 PM
Hello, jj!
Do you mind sharing this test program? These are some interesting results right there...
Nick,
I didn't attach it because it's old and very messy. But you may use bits and pieces to roll your own. Here it is. No warranty whatsoever, all kinds of disclaimers apply :wink

@redskull: What you write is entirely correct. My timings just confirm common sense: Use HeapAlloc for small to medium allocations, switch to VirtualAlloc for big ones. Cautious with the statement "most of the time, for small values, a call to HeapAlloc is unnecessary; your process already "owns" that memory" - once in a while, this strategy will fail, and lead to very unpredictable behaviour...

redskull

I can't say for certain, but in most of my memories the heap starts at 3 pages long, and takes up about the first 2-1/2 for it's data structures; while you could certainly read and write to there, it would probably corrupt the heap and throw an exception.  And while that's certainly not a strategy to use for normal programming, the point is that if your process has a VAD for a committed page with R/W access, thats your memory to do with what you please.  I just did a quick experiment: for heap allocations up to ~x650, no new memory was allocated, and the pointer returned was in that "last half a page".  For greater allocations, it expanded that 3 pages to 4, then 5, etc.  For a full 1GB allocation, it presumably couldn't find the address space down low (~14000) and stuck it up high (~410000).  I *am* interested in why the VirtualAlloc for x100 took so long in the test...

-r
Strange women, lying in ponds, distributing swords, is no basis for a system of government

hutch--

Graham,

VirtualAlloc() does have its place and this is why Windows has a range of memory allocation strategies. VirtualAlloc() apparently handles discontinuous blocks and presents them to the caller as a linear address range where allocation functions that grab a single block of linear space don't do this. It depends very much on your application but in your own case with tables loaded for speed of operation, it would be a single linear block of memory that will give you what you are after.

Apart from people quoting out of date Microsoft data without keeping up on what the functions actually do, (Microsoft have been changing their mind regularly for the last 30 years)  most memory allocation functions reduce down through kernel32.dll to ntdll.dll to ntoskrnl.exe and hal.dll and at the bottom, memory is memory, effectively pick the packaging that best suits your purpose. I suggested using GlobalAlloc() but ONLY with the GMEM_FIXED flag because its simpler to use, nothing is faster than it and it can allocate as much memory as your computer has available. Other style of GlobalAlloc() allocation are either for legacy special purpose tasks like the clipboard or are leftovers from 16 bit Windows where the distinctions no longer exist.

I am sorry you have been fed such a confusing range of alternatives as we run the Campus to avoid that but the diversity of views here is probably useful to you in understanding some of the different strategies available for allocating memory in Windows.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

porphyry5

Aha, so there are Virtual defenders, after all.

Hutch, thank you for the information about Global.  I do know that VirtualAlloc works, I used it alone at first and the table got built, but it didn't stay in place.  That may be due to the amount of time that elapsed before I discovered it.  I was so disbelieving of assembly's speed that I installed tests throughout the table-build process, literally counting and checking every byte each time it was moved.  I was single-stepping through these tests with Windbg, which took about 20 minutes.  Everything checked until the last test, a scan of the completed table to see if there were still embedded zeros in it from the initialization.  And there were, but when I viewed the area, it was not a mixture of zeros and text as expected, it was heterogeneous, I think executable code had been written there.  Hence my attempt to use VirtualLock.

Now my question, given assembly's speed, I anticipate the completed program will run about 7 seconds on average.  Is it reasonable to expect a memory area to remain intact for that long, or is that completely unpredictable?

dedndave

nah - lol
something else is putting data there - or, at least, you are not putting things in there like you think you are
i wondered what you wanted VirtualLock for - should be no need for that
you own that memory

hutch--

Graham,

If your machine is very close to its limit of physical memory you run the risk of the OS trying to page memory out to make more room for a later loaded program but usually its the case with GlobalAlloc() as fixed memory that it does not normally get paged out and if its allocated when the machine is low on memory the original allocation fails in the first place.

Now its simple enough to test if you get what you allocate but the next trick is to make sure your app only writes to memory addresses that your app owns otherwise it will go bang as soon as it tried to read or write outside that address range. This is roughly bounds checking stuff but once you are satisfied that its safe in its reads and writes, as long as the machine has enough memory left over not to effect normal operations you should be able to concentrate on tweaking the overall design to try and get it a bit faster.

Now another factor is if your memory usage is subject to reallocation to increase size, almost exclusively the address will change so if this is the situation, always calculate from the initial memory pointer and use offsets from that pointer for your targets. If you are ending up with rubbish in addresses that should have your own content you are getting leakage somewhere and its probably from within your own application.
Download site for MASM32      New MASM Forum
https://masm32.com          https://masm32.com/board/index.php

porphyry5

oh shit, I am so embarrassed now.  I abjectly apologize for taking you all on a wild goose chase.

Instead of

mov al,0         ; look for any byte in compacted tables still set to binary zero
lea edi,alltbls      ; start addr of compacted tables, ecx is preset to their total size
repne scasb      ; scan all of compacted tables for binary zero

I should have done

mov al,0         ; look for any byte in compacted tables still set to binary zero
mov edi,alltbls      ; start addr of compacted tables, ecx is preset to their total size
repne scasb      ; scan all of compacted tables for binary zero

The scan works fine, everything is where it should be, even 30 minutes later.  Again, I am totally sorry for creating this mare's-nest.   :'(