Hi
I am using some ASCII symbols when i write text file for tokening
txt File
®Name Surname°12300009999°E- MAIL°COUNTRY
-------------------------------------------------------------------------
Later getting them for listview is easy with below code
invoke CreateFile,ADDR DataFileName,GENERIC_READ,NULL,NULL,OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,NULL
mov oFile,eax
invoke GetFileSize,eax,addr okFileSize
mov okFileSize,eax
invoke GlobalAlloc,GMEM_MOVEABLE or GMEM_ZEROINIT,okFileSize
mov oMemory,eax
invoke GlobalLock,oMemory
mov okuMemory,eax
invoke ReadFile,oFile,okuMemory,okFileSize,ADDR SizeOKU,NULL
mov Aydin,0
invoke lstrlen,okuMemory
or eax,eax
jz close
;===================================== ============================
mov esi,okuMemory
starting:
mov edi,offset textOku ; edi is here for getting every line
;========================= CHECK IF THERE IS A NEW LINE ============
lineCheck: ; check first character
mov al,[esi]
inc esi
cmp al,174
jne close
;====================================================================================
strLine: ; Get Line
mov al,[esi]
inc esi
cmp al,10
je token
mov[edi],al
inc edi
jmp strLine
;=======================================================================================
token: ; Part Tokens
push esi
mov esi,offset textOku
mov edi,offset text
tokenbas:
mov edi,offset text
tokenise:
mov al,[esi]
inc esi
cmp al,176 ; is it end of line ?
je operation
cmp al,13 ; is it end of token?
je operation
mov [edi],al
inc edi
jmp tokenise
operation:
inc world
.if world==1
mov eax,Aydin
mov lvi.iSubItem,0
invoke SendMessage,hList,LVM_INSERTITEM,0,addr lvi
mov lvi.iItem,eax
.elseif world==4
inc lvi.iSubItem
invoke SendMessage,hList,LVM_SETITEM,0,addr lvi
inc Aydin
mov world,0
pop esi
xor eax,eax
clearT:
mov text[eax],0
inc eax
cmp eax,500
jne clearT
jmp starting
.else
inc lvi.iSubItem
invoke SendMessage,hList,LVM_SETITEM,0,addr lvi
.endif
xor eax,eax
clr2:
mov text[eax],0
inc eax
cmp eax,MAX_STRING
jne clr2
lea edi,text
jmp tokenbas
close:
invoke GlobalUnlock,okuMemory
invoke GlobalFree,oMemory
invoke CloseHandle,oFile
I dont know if its a good way or there is another simple way reading file line by line
and sending token to listview
Each line is terminated with a "magic" byte 0x0D,0x0A. So you can determined where the line end.
If you want to make it faster, you have to remember which byte the line ends and create a table based on it.
Be careful of Unix/C strings.. they only end with one of the "0Dh" or "0Ah".. forgot which :green2
I understand
thanks
Quote from: vanjast on February 27, 2012, 09:01:08 PM
Be careful of Unix/C strings.. they only end with one of the "0Dh" or "0Ah".. forgot which :green2
It's 0Ah, linefeeds. The snippet below translates Windows.inc to Unix format, for your testing.
include \masm32\MasmBasic\MasmBasic.inc ; download (http://www.masm32.com/board/index.php?topic=12460)
Init
Open "O", #1, "\Masm32\include\Linux.inc" ; won't work under Linux, just a demo for creating a Unix LF-only file
Recall "\Masm32\include\Windows.inc", L$()
For_ n=0 To eax-1
Print #1, L$(n), Lf$ ; write all strings to Linux.inc
Next
Close #1
Recall "\Masm32\include\Linux.inc", L$(), unix ; tell Recall that it's a Unix/Linux file, i.e. linefeed-only
For_ ebx=0 To eax-1
.if Instr_(L$(ebx), "RECT")
.if Instr_(L$(ebx), "STRUCT") ; show some results
mov ecx, ebx
.Repeat
Print Str$(ecx), Tb$, L$(ecx), CrLf$
inc ecx
.Until Instr_(L$(ecx-1), "ENDS", 1)
Print
.endif
.endif
Next
Inkey "ok"
Exit
end start
:bg
Just to add to the confusion,
DOS/Windows is usually ascii 13,10
Unix is usually ascii 10 only.
MAC is usually ascii 13 only.
Richedit 2 and 3 use ascii 13 only.
Some very old text formats use 13,13,10 so that the ancient printers can get back to start for the ascii 10.
The best technique i have found is to do a character count and evaluate the results,
13 = 10 == DOS/Windows.
10 = non zero && 13 = 0 == Unix.
13 = non zero && 10 = 0 == MAC
13 !+ 10 && 13 && 10 != 0 is probably 13,13,10.
Quote from: hutch-- on February 28, 2012, 11:36:57 PM
Richedit 2 and 3 use ascii 13 only.
Yes, a real nuisance. But exporting is relatively easy: GT_USECRLF (http://msdn.microsoft.com/en-us/library/bb787913%28VS.85%29.aspx) When copying text, translate each CR into a CR/LF.
don't forget the guys that sometimes use 10, 13 by mistake - i have seen this several times :P
I think I used like that b4
ASCII 10 + Name Surname°12300009999°E- MAIL°COUNTRY + ASCII 13
then First Line was empty in listview so I decided to use symbols
like that
Quote®Name Surname°12300009999°E- MAIL°COUNTRY
+ 13,10
DOS/Windows = 13,10
Linux/Unix = 10
Mac (old) = 13
Mac OS = 10
There are other crazy schemes, like 13,13 for a 'soft' line-break (for word-wrapping) but this shouldn't be saved to the file anyway; as for 10,13 - you can't guard against arbitrary mistakes, so don't even try (there will always be a better idiot.)
FWIW, ASCII also defines RECORD-SEPARATOR (30) and UNIT-SEPARATOR (31), so you might use these in your file and avoid any newline confusions.
QuoteSurname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
Surname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
Surname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
...
Quote from: Tedd on February 29, 2012, 11:37:19 PM
FWIW, ASCII also defines RECORD-SEPARATOR (30) and UNIT-SEPARATOR (31), so you might use these in your file and avoid any newline confusions.
QuoteSurname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
Surname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
Surname -US- Name -US- 12300009999 -US- Email -US- Country -RS-
...
Interesting, Tedd. Although the whole world uses TAB instead...
Yes its sensible
if I use it for a listview
for example there will be 4 columns again in my new project
if I understand true
i need to record it like that
QuoteSurname Name -US- 12300009999 -US- Email -US- Country -RS-
the problem is .... how can i find new line ?
may be i need to add 10 end of line not RS or i need to use something for starting line
I was checking first character to find new line b4 ...but if user click space for 1st character
and I code cmp al,0; for 1st byte
je exit
it is a kind of error for program bcoz i cant get other lines anymore
Quote from: jj2007 on March 01, 2012, 06:16:01 AM
Interesting, Tedd. Although the whole world uses TAB instead...
Everyone does what microsoft does, but that doesn't make it right :P
This way you don't have any newline problems, or worries about escaping 'special' characters in your strings.
Quote from: Force on March 01, 2012, 09:40:13 AM
Yes its sensible
if I use it for a listview
for example there will be 4 columns again in my new project
if I understand true
i need to record it like that
QuoteSurname Name -US- 12300009999 -US- Email -US- Country -RS-
the problem is .... how can i find new line ?
may be i need to add 10 end of line not RS or i need to use something for starting line
I was checking first character to find new line b4 ...but if user click space for 1st character
and I code cmp al,0; for 1st byte
je exit
it is a kind of error for program bcoz i cant get other lines anymore
The file starts with the first line, and first line ends at RS. The next line starts immediately after, there are no newline characters - it's a data file, not a text file (but most of data happens to be text.)
Only your program reads and writes the file, not the user, so you control its content, and you can filter any user input so that it conforms to the required formatting.
Tedd
when i write this code ;;;;; invoke ReadFile,oFile,okMemory,okFileSize,ADDR SizeOKU,NULL
i move data from file to okMemory already
i get line and part it
mov esi,okMemory :Data is inside of this
lea edi,textBuf
;GET LINE
strLine:
mov al,[esi]
inc esi
cmp al,10;get data till new line
je token
mov[edi],al
inc edi
jmp strLine
token: ; Part Tokens
......
.....
.....
inserting to listview
and turning back again for next line
lineCheck: ; check first character
mov al,[esi]
inc esi
cmp al,174;ASCII for start line
jne close
then my way is wrong
There's no need to delimit both the start and end of a line - choose one or the other.
It's more common to indicate the end of a line, as you know the file starts are the beginning of the first line.
So the first line is all characters from the beginning of the file to the first newline character.
The second line is all characters from the next character to the next newline character.
And so on..
You just read one line at a time, stopping at each newline character. Process the line. Then continue for the next line.
For the end of the file, you can either add a 0, or if you want to keep everything in text, add a blank line (another newline, with no 'data') and exit when you try to process a line of zero length.
ok ok Thanks Tedd :U
I understand what you mean now and ur recommend will make my code shorter
no i wont add '0' eof .... by doing mov ecx,filesize is better way
so i can control Loop with > dec ecx
but i dont know if using filesize is a good idea or not yet
i'd be more interested in line length :P
get the length of each line, either 13 or 10 terminates a line
add the length of the previous line to the start pointer - if that value exceeds the end of file, truncate
if you come across a line that has 0 length, you have a 13,10 or similar
add 1 and try again
Yes Dave
its another good idea
i will try all of them in next project hope i will find the best way
yah - if it is practical, make an array with 2 dwords for each line - the pointer and the length
then, when you are processing lines, you work from the array
you want to see what is in line 45 ? - index into the array and bang, you have the pointer and the length
this keeps you from having to move a bunch of strings around or editing the EOL chars
Depends what you have to do with the text. Here is a test piece with 2 versions of the same algo, one with and the other without the line counter. It does nothing more than create an array of pointers to the start of every line of text. The purpose of this technique is to be able to find the start of lines when you want to process text line by line but be able to scan either direction at a byte level.
IF 0 ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
Build this template with "CONSOLE ASSEMBLE AND LINK"
ENDIF ; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
include \masm32\include\masm32rt.inc
ttok PROTO psrc:DWORD,parr:DWORD
ttok2 PROTO psrc:DWORD,parr:DWORD
.code
start:
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
call main
inkey
exit
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
main proc
LOCAL pMem :DWORD
LOCAL parr :DWORD
LOCAL lcnt :DWORD
LOCAL pbuf :DWORD
LOCAL buffer[1024]:BYTE
mov pMem, InputFile("vistaxp.h")
REPEAT 8
; -------------------------------------
; pMem is the source to tokenise
; ADDR parr is a variable that receives
; the address of the array of pointers
; -------------------------------------
invoke GetTickCount
push eax
invoke ttok,pMem,ADDR parr
mov lcnt, eax
invoke GetTickCount
pop ecx
sub eax, ecx
print ustr$(eax)," ms",13,10
;; free parr
; ----------------------------
invoke GetTickCount
push eax
invoke ttok2,pMem,ADDR parr
mov lcnt, eax
invoke GetTickCount
pop ecx
sub eax, ecx
print ustr$(eax)," ms",13,10
ENDM
free parr
free pMem
ret
main endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
ttok proc psrc:DWORD,parr:DWORD
; ----------------------------------------------------------------------------------
; scan source text and create an array of pointers to the beginning of each line of text
;
; psrc = the text tokenise
; parr = the address of a variable that has the array memory address written to it
;
; return values
; 1. the line count in psrc
; 2. an array of pointers written to parr
;
; parr should be deallocated with GlobalFree() or the "free" macro within the scope
; that the variable "parr" was allocated in
; ----------------------------------------------------------------------------------
LOCAL hMem :DWORD
LOCAL lcnt :DWORD
mov edx, psrc
sub edx, 1
xor eax, eax
; -------------------
; count ASCII 10 (LF)
; -------------------
jmp lbl0
pre0:
add eax, 1
lbl0:
add edx, 1
movzx ecx, BYTE PTR [edx]
cmp ecx, 10
je pre0
test ecx, ecx
jnz lbl0
mov lcnt, eax ; store the line count
lea eax, [eax*4] ; mul EAX by 4
mov hMem, alloc(eax) ; allocate "lcnt" * 4
mov edx, hMem ; store address in EDX
mov eax, psrc
mov [edx], eax ; write pointer to 1st line
add edx, 4
; ------------------------------------------------------
; loop through text writing a pointer to each line start
; ------------------------------------------------------
sub eax, 1
mainloop:
add eax, 1
backin:
movzx ecx, BYTE PTR [eax]
test ecx, ecx
jz zero
cmp ecx, 10 ; test for LF
jne mainloop
wrtptr:
add eax, 1 ; inc to next char past LF
mov [edx], eax ; write line start to array member
add edx, 4
jmp backin
zero:
mov ecx, parr ; load passed handle address into ECX
mov edx, hMem
mov [ecx], edx ; store local array handle at address of passed handle
mov eax, lcnt ; return the line count
ret
ttok endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
align 16
ttok2 proc psrc:DWORD,parr:DWORD
; ----------------------------------------------------------------------------------
; scan source text and create an array of pointers to the beginning of each line of text
;
; psrc = the text tokenise
; parr = the address of a variable that has the array memory address written to it
;
; return values
; 1. the line count in psrc
; 2. an array of pointers written to parr
;
; parr should be deallocated with GlobalFree() or the "free" macro within the scope
; that the variable "parr" was allocated in
; ----------------------------------------------------------------------------------
LOCAL hMem :DWORD
LOCAL lcnt :DWORD
mov hMem, alloc(1024*1024*8)
mov edx, hMem ; store address in EDX
mov eax, psrc
mov [edx], eax ; write pointer to 1st line
add edx, 4
; ------------------------------------------------------
; loop through text writing a pointer to each line start
; ------------------------------------------------------
sub eax, 1
mainloop:
add eax, 1
backin:
movzx ecx, BYTE PTR [eax]
test ecx, ecx
jz zero
cmp ecx, 10 ; test for LF
jne mainloop
wrtptr:
add eax, 1 ; inc to next char past LF
mov [edx], eax ; write line start to array member
add edx, 4
jmp mainloop
zero:
mov ecx, parr ; load passed handle address into ECX
mov edx, hMem
mov [ecx], edx ; store local array handle at address of passed handle
mov eax, lcnt ; return the line count
ret
ttok2 endp
; ¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤¤
end start
Hutch
I cant Assemble n link ur code
in fact I noticed that I cant assemble any code including masm32rt.inc
I got this problem 1st time I dont know Whats wrong I m thinking now What I changed in my pc :red
I mean There is no error but it dsnt work
Something in your installation must be broken, just re-install it again and make sure its all there when you finish.
re-installing but
still same problem :red
the masm32 folder must be in the root
C:\masm32 will work
C:\Programming\masm32 will not work
make sure the files you are trying to build are on the same drive as masm32
it's a good idea to close all other programs when installing
I re-installed a few times i got same problem
after uninstalling a C compiler which i installed yesterday
I installed masm32 again .It works without problem now :eek
Thanks Hutch
sure ur code will help me
btw I wanna ask you a question
when I play with codes i made a simple project
"sending message to editbox of another program and pushing button of it then message is in messagebox"
I used Findwindow,Findwindowex Method
I wanna post it to forum but i m not sure if its illegal for this forum or not
we all use FindWindow from time to time :P
as long as you are not reversing someone elses software and it isn't virus related, Hutch doesn't usually mind
...oh - and post no pix of nakie ladies
....unless they are blonde
......and not Kylie Minogue - lol
lol Dave :bdg