412 lines
9.6 KiB
NASM
412 lines
9.6 KiB
NASM
;
|
|
; CLEAN --- a utility to filter text files.
|
|
; This program removes all control codes except
|
|
; for line feeds, carriage returns, and form
|
|
; feeds, strips off the high bit of all characters,
|
|
; and expands tabs. Can be used to make a Wordstar
|
|
; file acceptable for other screen or line editors,
|
|
; and vice versa.
|
|
;
|
|
|
|
|
|
cr equ 0dh ;ASCII carriage return
|
|
lf equ 0ah ;ASCII line feed
|
|
ff equ 0ch ;ASCII form feed
|
|
eof equ 01ah ;End of file marker
|
|
tab equ 09h ;ASCII tab character
|
|
|
|
command equ 80h ;buffer for command tail
|
|
|
|
blksize equ 1024 ;blocking/deblocking size
|
|
|
|
|
|
cseg segment para public 'CODE'
|
|
|
|
assume cs:cseg,ds:data,es:data,ss:stack
|
|
|
|
|
|
clean proc far ;entry point from PC-DOS
|
|
|
|
push ds ;save DS:0000 for final
|
|
xor ax,ax ;return to PC-DOS
|
|
push ax
|
|
mov ax,data ;make our data segment
|
|
mov es,ax ;addressable via ES register
|
|
call infile ;get path and file spec.
|
|
;for input file
|
|
mov ax,es ;set DS=ES for remainder
|
|
mov ds,ax ;of program
|
|
jnc clean1 ;jump, got acceptable name
|
|
mov dx,offset msg4 ;missing or illegal filespec,
|
|
jmp clean9 ;print error message and exit.
|
|
|
|
clean1: call outfile ;set up output file name
|
|
call open_input ;now try to open input file
|
|
jnc clean2 ;jump,opened input ok
|
|
mov dx,offset msg1 ;open of input file failed,
|
|
jmp clean9 ;print error msg and exit.
|
|
|
|
clean2:
|
|
call open_output ;try to open output file.
|
|
jnc clean25 ;jump,opened ok
|
|
mov dx,offset msg2 ;open of output file failed,
|
|
jmp clean9 ;print error message and exit.
|
|
|
|
clean25: ;set up buffers
|
|
call init_buffs
|
|
call sign_on ;print ident and file names
|
|
|
|
;files successfully opened,
|
|
clean3: ;now filter the file.
|
|
call get_char ;read 1 character from input.
|
|
and al,07fh ;strip off the high bit
|
|
cmp al,20h ;is it a control code?
|
|
jae clean4 ;no,write it to new file
|
|
;yes it is control code,
|
|
cmp al,eof ;is it end of file marker?
|
|
je clean6 ;yes,jump to close files.
|
|
cmp al,tab ;is it a tab command?
|
|
jz clean5 ;yes,jump to special processing.
|
|
cmp al,cr ;if control code other than
|
|
je clean35 ;tab or end-of-file mark, throw
|
|
cmp al,ff ;it away unless it is a
|
|
je clean35 ;form feed, carriage return,
|
|
cmp al,lf ;or line feed.
|
|
jne clean3
|
|
clean35: ;If it is one of those three,
|
|
mov column,0 ;incidentally initialize
|
|
jmp clean45 ;column count for tab processor.
|
|
|
|
clean4: ;count alphanumeric chars. sent.
|
|
inc column
|
|
|
|
clean45: ;write this character to
|
|
call put_char ;output file,
|
|
jnc clean3 ;if CY not set, write was
|
|
;ok so go get next char.
|
|
clean47:
|
|
call close_input ;if CY set, disk is full
|
|
call close_output ;so close files and exit
|
|
mov dx,offset msg5 ;with error message.
|
|
jmp clean9
|
|
|
|
clean5: ;process tab character
|
|
mov ax,column ;let DX:AX=column count
|
|
cwd
|
|
mov cx,8 ;divide it by eight...
|
|
idiv cx
|
|
sub cx,dx ;remainder is in DX.
|
|
add column,cx ;update column pointer.
|
|
clean55: ;8 minus the remainder
|
|
push cx ;gives us the number of
|
|
mov al,20h ;spaces to send out to
|
|
call put_char ;move to the next tab position
|
|
pop cx ;restore space count
|
|
jc clean47 ;jump if disk is full
|
|
loop clean55
|
|
jmp short clean3 ;get next character
|
|
|
|
clean6: ;end of file detected,
|
|
call put_char ;write end-of-file marker,
|
|
jc clean47 ;jump if disk was full
|
|
call flush_buffs ;write remaining data to disk
|
|
jc clean47 ;if CY set,disk was full
|
|
;otherwise file was written ok
|
|
call close_input ;close input and output
|
|
call close_output ;files.
|
|
mov dx,offset msg3 ;addr of success message,
|
|
|
|
clean9: ;print message and return
|
|
mov ah,9 ;control to PC-DOS
|
|
int 21h
|
|
ret
|
|
|
|
clean endp
|
|
|
|
|
|
infile proc near ;process name of input file
|
|
;DS:SI <- addr command line
|
|
mov si,offset command
|
|
;ES:DI <- addr filespec buffer
|
|
mov di,offset input_name
|
|
cld
|
|
lodsb ;any command line present?
|
|
or al,al ;return error status if not.
|
|
jz infile4
|
|
infile1: ;scan over leading blanks
|
|
lodsb ;to file name
|
|
cmp al,cr ;if we hit carriage return
|
|
jz infile4 ;filename is missing.
|
|
cmp al,20h ;is this a blank?
|
|
jz infile1 ;if so keep scanning.
|
|
|
|
infile2: ;found first char of name,
|
|
stosb ;move last char. to output
|
|
;file name buffer.
|
|
lodsb ;check next character, found
|
|
cmp al,cr ;carriage return yet?
|
|
je infile3 ;yes,exit with success code
|
|
cmp al,20h ;is this a blank?
|
|
jne infile2 ;if not keep moving chars.
|
|
|
|
infile3: ;exit with carry =0
|
|
clc ;for success flag
|
|
ret
|
|
|
|
infile4: ;exit with carry =1
|
|
stc ;for error flag
|
|
ret
|
|
infile endp
|
|
|
|
outfile proc near ;set up path and file
|
|
cld ;name for output file.
|
|
mov cx,64 ;length to move
|
|
mov si,offset input_name ;source addr
|
|
mov di,offset output_name ;dest addr
|
|
rep movsb ;transfer the string
|
|
mov di,offset output_name
|
|
outfile1: ;scan string looking for
|
|
mov al,[di] ;"." marking start of extension
|
|
or al,al ;or zero byte marking name end.
|
|
jz outfile2 ;if either is found,jump.
|
|
cmp al,'.'
|
|
je outfile2 ;bump string pointer, loop
|
|
inc di ;if neither '.' or zero found.
|
|
jmp outfile1
|
|
outfile2: ;found zero or '.',force the
|
|
;extension of the output file
|
|
;to '.CLN'
|
|
mov si,offset outfile_ext
|
|
mov cx,5
|
|
rep movsb
|
|
ret ;back to caller
|
|
outfile endp
|
|
|
|
open_input proc near ;open input file
|
|
;DS:DX=addr filename
|
|
mov dx,offset input_name
|
|
mov al,0 ;AL=0 for read only
|
|
mov ah,3dh ;function 3dh=open
|
|
int 21h ;handle returned in AX,
|
|
mov input_handle,ax ;save it for later.
|
|
ret ;CY is set if error
|
|
open_input endp
|
|
|
|
open_output proc near ;open output file
|
|
;DS:DX=addr filename
|
|
mov dx,offset output_name
|
|
mov al,1 ;AL=1 for write only
|
|
mov ah,3ch ;function 3ch=MAKE or
|
|
int 21h ;truncate existing file
|
|
;handle returned in AX
|
|
mov output_handle,ax;save it for later.
|
|
ret ;return CY=true if error
|
|
open_output endp
|
|
|
|
close_input proc near ;close input file
|
|
mov bx,input_handle ;BX=handle
|
|
mov ah,3eh
|
|
int 21h
|
|
ret
|
|
close_input endp
|
|
|
|
close_output proc near ;close output file
|
|
mov bx,output_handle;BX=handle
|
|
mov ah,3eh
|
|
int 21h
|
|
ret
|
|
close_output endp
|
|
|
|
get_char proc near ;get one character from input buffer
|
|
mov bx,input_ptr
|
|
cmp bx,blksize
|
|
jne get_char1
|
|
call read_block
|
|
mov bx,0
|
|
get_char1:
|
|
mov al,[input_buffer+bx]
|
|
inc bx
|
|
mov input_ptr,bx
|
|
ret
|
|
get_char endp
|
|
|
|
put_char proc near ;put one character into output buffer
|
|
mov bx,output_ptr
|
|
mov [output_buffer+bx],al
|
|
inc bx
|
|
mov output_ptr,bx
|
|
cmp bx,blksize ;buffer full yet?
|
|
jne put_char1 ;no,jump
|
|
call write_block ;yes,write the block
|
|
ret ;return CY as status code
|
|
put_char1:
|
|
clc ;return CY clear for OK status
|
|
ret
|
|
put_char endp
|
|
|
|
read_block proc near
|
|
mov bx,input_handle ;read first block of input
|
|
mov cx,blksize
|
|
mov dx,offset input_buffer
|
|
mov ah,3fh
|
|
int 21h
|
|
jnc read_block1 ;jump if no error status
|
|
mov ax,0 ;simulate a zero length read if error
|
|
read_block1:
|
|
cmp ax,blksize ;was full buffer read in?
|
|
je read_block2 ;yes,jump
|
|
mov bx,ax ;no, store End-of-File mark
|
|
mov byte ptr [input_buffer+bx],eof
|
|
read_block2:
|
|
xor ax,ax ;initialize input buffer pointer
|
|
mov input_ptr,ax
|
|
ret
|
|
read_block endp
|
|
|
|
write_block proc near ;write blocked output (blksize bytes)
|
|
mov dx,offset output_buffer
|
|
mov cx,blksize
|
|
mov bx,output_handle
|
|
mov ah,40h
|
|
int 21h
|
|
xor bx,bx ;initialize pointer to blocking buffer
|
|
mov output_ptr,bx
|
|
cmp ax,blksize ;was correct length written?
|
|
jne write_block1 ;no,disk must be full
|
|
clc ;yes,return CY=0 indicating all OK
|
|
ret
|
|
write_block1: ;disk is full, return CY =1
|
|
stc ;as error code
|
|
ret
|
|
write_block endp
|
|
|
|
init_buffs proc near
|
|
call read_block ;read 1st block of input
|
|
xor ax,ax ;initialize pointer to output
|
|
mov output_ptr,ax ;output blocking buffer
|
|
ret
|
|
init_buffs endp
|
|
|
|
flush_buffs proc near ;write any data in output buffer to disk
|
|
mov cx,output_ptr
|
|
or cx,cx
|
|
jz flush_buffs1 ;jump,buffer is empty
|
|
mov bx,output_handle
|
|
mov dx,offset output_buffer
|
|
mov ah,40h
|
|
int 21h
|
|
cmp ax,output_ptr ;was write successful?
|
|
jnz flush_buffs2 ;no,jump
|
|
flush_buffs1:
|
|
clc ;yes,return CY=0 for
|
|
ret ;success flag
|
|
flush_buffs2: ;disk was full so write failed,
|
|
stc ;return CY=1 as error flag
|
|
ret
|
|
flush_buffs endp
|
|
|
|
sign_on proc near ;print sign-on message
|
|
mov dx,offset msg6 ;title...
|
|
mov ah,9
|
|
int 21h
|
|
mov dx,offset msg7 ;input file:
|
|
mov ah,9
|
|
int 21h
|
|
mov dx,offset input_name
|
|
call pasciiz
|
|
mov dx,offset msg8 ;output file:
|
|
mov ah,9
|
|
int 21h
|
|
mov dx,offset output_name
|
|
call pasciiz
|
|
mov dx,offset msg9
|
|
mov ah,9
|
|
int 21h
|
|
ret
|
|
sign_on endp
|
|
|
|
pasciiz proc near ;call DX=offset of ASCIIZ string
|
|
mov bx,dx ;which will be printed on standard output
|
|
pasciiz1:
|
|
mov dl,[bx]
|
|
or dl,dl
|
|
jz pasciiz9
|
|
cmp dl,'A'
|
|
jb pasciiz2
|
|
cmp dl,'Z'
|
|
ja pasciiz2
|
|
or dl,20h
|
|
pasciiz2:
|
|
mov ah,2
|
|
int 21h
|
|
inc bx
|
|
jmp pasciiz1
|
|
pasciiz9:
|
|
ret
|
|
pasciiz endp
|
|
|
|
cseg ends
|
|
|
|
|
|
data segment para public 'DATA'
|
|
|
|
input_name db 64 dup (0) ;buffer for input filespec
|
|
output_name db 64 dup (0) ;buffer for output filespec
|
|
|
|
input_handle dw 0 ;token returned by PCDOS
|
|
output_handle dw 0 ;token returned by PCDOS
|
|
|
|
input_ptr dw 0 ;pointer to input blocking buffer
|
|
output_ptr dw 0 ;pointer to output blocking buffer
|
|
|
|
outfile_ext db '.CLN',0 ;extension for filtered file
|
|
|
|
column dw 0 ;column count for tab processing
|
|
|
|
msg1 db cr,lf
|
|
db 'Cannot find input file.'
|
|
db cr,lf,'$'
|
|
|
|
msg2 db cr,lf
|
|
db 'Failed to open output file.'
|
|
db cr,lf,'$'
|
|
|
|
msg3 db cr,lf
|
|
db 'File processing completed'
|
|
db cr,lf,'$'
|
|
|
|
msg4 db cr,lf
|
|
db 'Missing file name.'
|
|
db cr,lf,'$'
|
|
|
|
msg5 db cr,lf
|
|
db 'Disk is full.'
|
|
db cr,lf,'$'
|
|
|
|
msg6 db cr,lf
|
|
db 'Clean Word Processing File'
|
|
db cr,lf
|
|
db 'WWW.'
|
|
db cr,lf,'$'
|
|
|
|
msg7 db cr,lf,'Input file: $'
|
|
|
|
msg8 db cr,lf,'Output file: $'
|
|
|
|
msg9 db cr,lf,'$'
|
|
|
|
|
|
input_buffer db blksize dup (?) ;buffer for deblocking of data
|
|
;from input file
|
|
|
|
output_buffer db blksize dup (?) ;buffer for blocking of data
|
|
;sent to output file
|
|
|
|
data ends
|
|
|
|
|
|
stack segment para stack 'STACK'
|
|
db 64 dup (?)
|
|
stack ends
|
|
|
|
end clean |