News:

MASM32 SDK Description, downloads and other helpful links
MASM32.com New Forum Link
masmforum WebSite

Comma Seperated File (CSV) Component Extraction

Started by Draakie, February 10, 2009, 05:03:26 PM

Previous topic - Next topic

Draakie

Quote
; Strips ASCII contents to assigned buffer between a double quotation marks (") - ignores commas (,)
; until end of file or sentinel 2nd double quotation is reached.
; Reports in EAX (File NOT EOF = TRUE // File EOF = FALSE)
; Reports number of INPUT chars in ESI
; Requirements : CSVFile    = Valid File Handle
; Requirements : CSVBuffer    = Destination Buffer < CSVLength
; Requirements : CSVLength    = Expected Destination Buffer Size
StripCSV proc CSVFile:DWORD,CSVBuffer:DWORD,CSVLength:DWORD
S0:   mov ebx, CSVBuffer          ; Buffer address
   xor esi, esi            ; Incremental count
   xor edi, edi            ; Still Valid or NOT input
S1:   invoke ReadFile,CSVFile, ebx ,1,ADDR ReadWrite,NULL  ; Read 1 Byte at a Time
   test eax, eax            ; Test for end of file // 1 = TRUE Values in Buffer  // 0 = EOF/ERROR
   jz  S4
   cmp byte ptr [ebx], '"'       ; Was the Read value a double quotation?
   je  S2
   cmp byte ptr [ebx], ','      ; If Comma Read ignore and continue....(May be a error prone approach)
   je  S1
   cmp esi, CSVLength         ; Is the Input what we expected..Thus under CSVlength requirements?
   jg  S4      
   add ebx, 1               ; It was valid input so lets read some more.... Increment step count
   add esi, 1               ; Increase Valid String count
   jmp S1
S2:   mov byte ptr [ebx], 0       ; We ignore the first or last double quotation mark//or SET String TERM
   test edi, edi            ; Was it the first quotation mark ? EDI = 0 equals YES else = 1 thus NO
   jnz S3
   add edi, 1                  ; Mark the beginning of valid Input
   jmp S1
S3: mov eax, 1               ; So we have Captured all the Valid input - OUT of HERE. Report TRUE= 1
   ret
S4:   xor eax, eax            ; So we have Captured all the INValid input -Report FALSE = 0   
   ret
StripCSV endp
EXAMPLE :

   invoke StripCSV,FileHandle, addr MRType, 4  ; Read into buffer MRType an expected ACII length of  3
                                                                             ; (minus 1 for zero termination)
   test eax, eax
   jz RMS1

Hi PPL,

See if you can fix this to serve any purpose (multi line for example) - in other words more generic implementations and
obviously faster call rates.

[EDIT] Strangely enough these mundane code segments are awe-inspiring when done correctly - which I obviously didn't.... :eek
Makes me feel baby-ish in the extreme.........

Thanks
Draakie
Does this code make me look bloated ? (wink)

vanjast

I used this many moons ago to pull stuff out of a CSV file and place it into a ListView object.
It extracts the headers, and then the data items.
There are also different versions of CSV EOL indicators - I think I accounted for those here
:8)


;==================================================================================================
;
;==================================================================================================
ExtractListHDR proc USES esi edi ecx

Local iColWidth :DWORD

xor eax, eax ;
mov bEndOfLine, eax ;EOL = FALSE
mov iNumColumns, eax ;ColCount = 0
mov bDoubleDot, eax ;DoubleDot = FALSE
mov bSingleDot, eax ;SingleDot = FALSE

mov esi, ptrCSVMemBuff ;Source = FileBuffer

;------------ CLEAR TEMP BUFFER ----------------
HDRZeroTBuff:
mov edi, OFFSET TempBuff ;Clear temp buffer
mov ecx, 8 ;
@@:
xor eax, eax ;
mov [edi], eax ;
add edi, 4 ;
loop @B ;

;------------- MAIN HEADER LOOP ----------------
mov edi, OFFSET TempBuff ; Destination = TempBuffer
mov iTBCount, cMaxTempBuff ;
mov iColWidth, 0
HDR_Loop:
cmp iTBCount, 0 ;TempBuff counter check
jbe HDR_ERROR ;No strings longer than cMaxTempBuff

cmp iFSizeCount, 0 ;FileSize counter check
jbe HDR_ERROR ;YO! - past the file limit

mov al, [esi] ;Get byte
inc esi ;Adjust pointer
dec iFSizeCount ;Adjust counter

cmp al, 0ah ;Separator type 1
jne @F ;No -  check type 2

mov bEndOfLine, TRUE ;So it says
.if bSingleDot == TRUE ;We're not supposed to have this yet
jmp HDR_ERROR ;
.elseif bDoubleDot == TRUE ;We're not supposed to have this yet
jmp HDR_ERROR ;
.else ;Ah - That's better
mov bSingleDot, TRUE ;
.endif
jmp HDR_DoColumnAdd ;

;------------- 0X0D END-OF-LINE ----------------
@@:
cmp al, 0dh ;Separator type 2
jne @F ;

mov bEndOfLine, TRUE ;So it says again

.if bSingleDot == TRUE ;We're not supposed to have this yet
jmp HDR_ERROR ;
.elseif bDoubleDot == TRUE ;We're not supposed to have this yet
jmp HDR_ERROR ;
.else ;Ah - That's better
mov bDoubleDot, TRUE ;
inc esi ;Adjust pointer
dec iFSizeCount ;Adjust counter
.endif ;
jmp HDR_DoColumnAdd ;

;------------- "," ITEM SEPARATOR --------------
@@:
cmp al, "," ;Item separator ?
jne @F ;Nope
jmp HDR_DoColumnAdd ;Yep - Insert column

;------------- SPACES ARE IGNORED --------------
@@:
cmp al, 020h ;Ignore Spaces
je HDR_Loop ;Iteration

invoke AlphaNum, eax ;Check char in set range
.if eax != 0
mov [edi], al ;Put char in TempBuff
inc edi ;Adjust TempBuff pointer
dec iTBCount ;Adjust TempBuff counter
inc iColWidth ;
.endif
jmp HDR_Loop ;Iteration

;------------- INSERT LISTVIEW COLUMN ----------
HDR_DoColumnAdd:

mov lvCol.imask, LVCF_TEXT or LVCF_WIDTH
mov lvCol.fmt, LVCFMT_LEFT
mov lvCol.pszText, OFFSET TempBuff
mov eax, iColWidth
imul eax, 10
mov lvCol.lx, eax
mov eax, iNumColumns
invoke SendMessage, hListview, LVM_INSERTCOLUMN, eax, ADDR lvCol


inc iNumColumns ;Adjust Column count
cmp bEndOfLine, TRUE ;End of the road ??
je HDR_Exit ;Yessir - exuent
jmp HDRZeroTBuff ;Nossir - look for new header

;------------- ERROR IN READING FILE -----------
HDR_ERROR:
invoke MessageBox, hWnd, ADDR dText5, ADDR dTitle, MB_OK
xor eax, eax
ret
;------------- BYE BYE -------------------------
HDR_Exit:
mov ptrCSVMemBuff, esi
mov eax, TRUE
ret
ExtractListHDR endp
;==================================================================================================
;
;==================================================================================================
ExtractListResults proc USES esi edi ecx

xor eax, eax ;Reset all variables
mov bEndOfLine, eax ;
mov bEndOfFile, eax ;
mov iSubItemCount, eax ;
mov iRowCount, eax ;
mov esi, ptrCSVMemBuff ;Pointer to In buffer

RSLTZeroTBuff:
mov edi, OFFSET TempBuff ;Clear TempBuff
mov ecx, 8 ;
@@:
xor eax, eax ;
mov [edi], eax ;
add edi, 4 ;
loop @B ;
mov iTBCount, cMaxTempBuff ;
mov edi, OFFSET TempBuff ; Destination = TempBuffer

RSLT_Loop:
cmp iTBCount, 0 ;TempBuff counter check
jbe DBG_001 ;No strings longer than cMaxTempBuff

cmp iFSizeCount, 0 ;FileSize counter check
jbe DBG_002 ;YO! - past the file limit

mov al, [esi] ;
inc esi ;
dec iFSizeCount ;
cmp al, 0ah ;Separator type 1
jne @F ;No

mov bEndOfLine, TRUE ;So it says
.if bSingleDot == TRUE ;Have we found file type
.elseif bDoubleDot == TRUE ;
.else ;
jmp RSLT_ERROR ;
.endif ;
jmp RSLT_InsertSubItem ;

;------------- 0X0D END-OF-LINE ----------------
@@:
cmp al, 0dh ;Separator type 2
jne @F ;

mov bEndOfLine, TRUE ;So it says again
.if bSingleDot == TRUE ;Have we found file type
mov bEndOfFile,TRUE ;Then its EOF - File probably corrupt
.elseif bDoubleDot == TRUE ;
inc esi ;Adjust pointer
dec iFSizeCount ;Adjust counter
.else ;
jmp RSLT_ERROR ;
.endif ;
jmp RSLT_InsertSubItem ;

;------------- "," ITEM SEPARATOR --------------
@@:
cmp al, "," ;Item separator ?
jne @F ;Nope
jmp RSLT_InsertSubItem ;Yep - Insert column

;------------- SPACES ARE IGNORED --------------
@@:
cmp al, 020h ;Ignore Spaces
je RSLT_Loop ;check for EOF

invoke AlphaNum, eax ;Check char in set range
.if eax != 0
mov [edi], al ;
inc edi ;
dec iTBCount ;Adjust TempBuff counter
.endif
jmp RSLT_Loop ;

;------------- INSERT LISTVIEW SUBITEM----------
RSLT_InsertSubItem:
;------------- CREATE LISTVIEW ROW --------------
.if iSubItemCount == 0
mov lvItm.imask, LVIF_TEXT ;
mov eax, iRowCount ;
mov lvItm.iItem, eax ;
mov lvItm.lParam, eax ;
mov lvItm.iSubItem, 0 ;
xor eax, eax ;
mov lvItm.pszText,0 ;
mov lvItm.iImage, 0 ;
INVOKE SendMessage, hListview, LVM_INSERTITEM, NULL, ADDR lvItm
.endif

@@:
;------------- INSERT LISTVIEW SUBITEMS ---------
mov lvItm.imask, LVIF_TEXT ;
mov eax, iRowCount ;
mov lvItm.iItem, eax ;
mov lvItm.lParam, eax ;
mov eax, iSubItemCount
mov lvItm.iSubItem, eax ;
mov lvItm.pszText,OFFSET TempBuff ;
mov lvItm.iImage, 0 ;
INVOKE SendMessage, hListview, LVM_SETITEM, NULL, ADDR lvItm

inc iSubItemCount ;
.if bEndOfLine == TRUE ;End of the current line ?
mov bEndOfLine, FALSE ;Reset EOL
mov iSubItemCount, 0 ; Subitem counter
inc iRowCount ;Adjust RowCounter
.else
mov eax, iSubItemCount ;
cmp eax, iNumColumns ;
jge RSLT_ERROR ; Must have Subitems = Columns
.endif ;

.if bEndOfFile == TRUE ;EOF
je RSLT_Exit ;Ja Boet!
.endif
jmp RSLTZeroTBuff ;Next line


DBG_001:
invoke MessageBox, hWnd, ADDR dText2, ADDR dTitle, MB_OK
DBG_002:
invoke MessageBox, hWnd, ADDR dText3, ADDR dTitle, MB_OK
;------------------------------------------------
RSLT_ERROR:
invoke MessageBox, hWnd, ADDR dText4, ADDR dTitle, MB_OK
xor eax, eax
ret

RSLT_Exit:
mov eax, TRUE
ret
ExtractListResults endp