You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

412 lines
9.6 KiB
NASM

;
; CLEAN --- a utility to filter text files.
; This program removes all control codes except
; for line feeds, carriage returns, and form
; feeds, strips off the high bit of all characters,
; and expands tabs. Can be used to make a Wordstar
; file acceptable for other screen or line editors,
; and vice versa.
;
cr equ 0dh ;ASCII carriage return
lf equ 0ah ;ASCII line feed
ff equ 0ch ;ASCII form feed
eof equ 01ah ;End of file marker
tab equ 09h ;ASCII tab character
command equ 80h ;buffer for command tail
blksize equ 1024 ;blocking/deblocking size
cseg segment para public 'CODE'
assume cs:cseg,ds:data,es:data,ss:stack
clean proc far ;entry point from PC-DOS
push ds ;save DS:0000 for final
xor ax,ax ;return to PC-DOS
push ax
mov ax,data ;make our data segment
mov es,ax ;addressable via ES register
call infile ;get path and file spec.
;for input file
mov ax,es ;set DS=ES for remainder
mov ds,ax ;of program
jnc clean1 ;jump, got acceptable name
mov dx,offset msg4 ;missing or illegal filespec,
jmp clean9 ;print error message and exit.
clean1: call outfile ;set up output file name
call open_input ;now try to open input file
jnc clean2 ;jump,opened input ok
mov dx,offset msg1 ;open of input file failed,
jmp clean9 ;print error msg and exit.
clean2:
call open_output ;try to open output file.
jnc clean25 ;jump,opened ok
mov dx,offset msg2 ;open of output file failed,
jmp clean9 ;print error message and exit.
clean25: ;set up buffers
call init_buffs
call sign_on ;print ident and file names
;files successfully opened,
clean3: ;now filter the file.
call get_char ;read 1 character from input.
and al,07fh ;strip off the high bit
cmp al,20h ;is it a control code?
jae clean4 ;no,write it to new file
;yes it is control code,
cmp al,eof ;is it end of file marker?
je clean6 ;yes,jump to close files.
cmp al,tab ;is it a tab command?
jz clean5 ;yes,jump to special processing.
cmp al,cr ;if control code other than
je clean35 ;tab or end-of-file mark, throw
cmp al,ff ;it away unless it is a
je clean35 ;form feed, carriage return,
cmp al,lf ;or line feed.
jne clean3
clean35: ;If it is one of those three,
mov column,0 ;incidentally initialize
jmp clean45 ;column count for tab processor.
clean4: ;count alphanumeric chars. sent.
inc column
clean45: ;write this character to
call put_char ;output file,
jnc clean3 ;if CY not set, write was
;ok so go get next char.
clean47:
call close_input ;if CY set, disk is full
call close_output ;so close files and exit
mov dx,offset msg5 ;with error message.
jmp clean9
clean5: ;process tab character
mov ax,column ;let DX:AX=column count
cwd
mov cx,8 ;divide it by eight...
idiv cx
sub cx,dx ;remainder is in DX.
add column,cx ;update column pointer.
clean55: ;8 minus the remainder
push cx ;gives us the number of
mov al,20h ;spaces to send out to
call put_char ;move to the next tab position
pop cx ;restore space count
jc clean47 ;jump if disk is full
loop clean55
jmp short clean3 ;get next character
clean6: ;end of file detected,
call put_char ;write end-of-file marker,
jc clean47 ;jump if disk was full
call flush_buffs ;write remaining data to disk
jc clean47 ;if CY set,disk was full
;otherwise file was written ok
call close_input ;close input and output
call close_output ;files.
mov dx,offset msg3 ;addr of success message,
clean9: ;print message and return
mov ah,9 ;control to PC-DOS
int 21h
ret
clean endp
infile proc near ;process name of input file
;DS:SI <- addr command line
mov si,offset command
;ES:DI <- addr filespec buffer
mov di,offset input_name
cld
lodsb ;any command line present?
or al,al ;return error status if not.
jz infile4
infile1: ;scan over leading blanks
lodsb ;to file name
cmp al,cr ;if we hit carriage return
jz infile4 ;filename is missing.
cmp al,20h ;is this a blank?
jz infile1 ;if so keep scanning.
infile2: ;found first char of name,
stosb ;move last char. to output
;file name buffer.
lodsb ;check next character, found
cmp al,cr ;carriage return yet?
je infile3 ;yes,exit with success code
cmp al,20h ;is this a blank?
jne infile2 ;if not keep moving chars.
infile3: ;exit with carry =0
clc ;for success flag
ret
infile4: ;exit with carry =1
stc ;for error flag
ret
infile endp
outfile proc near ;set up path and file
cld ;name for output file.
mov cx,64 ;length to move
mov si,offset input_name ;source addr
mov di,offset output_name ;dest addr
rep movsb ;transfer the string
mov di,offset output_name
outfile1: ;scan string looking for
mov al,[di] ;"." marking start of extension
or al,al ;or zero byte marking name end.
jz outfile2 ;if either is found,jump.
cmp al,'.'
je outfile2 ;bump string pointer, loop
inc di ;if neither '.' or zero found.
jmp outfile1
outfile2: ;found zero or '.',force the
;extension of the output file
;to '.CLN'
mov si,offset outfile_ext
mov cx,5
rep movsb
ret ;back to caller
outfile endp
open_input proc near ;open input file
;DS:DX=addr filename
mov dx,offset input_name
mov al,0 ;AL=0 for read only
mov ah,3dh ;function 3dh=open
int 21h ;handle returned in AX,
mov input_handle,ax ;save it for later.
ret ;CY is set if error
open_input endp
open_output proc near ;open output file
;DS:DX=addr filename
mov dx,offset output_name
mov al,1 ;AL=1 for write only
mov ah,3ch ;function 3ch=MAKE or
int 21h ;truncate existing file
;handle returned in AX
mov output_handle,ax;save it for later.
ret ;return CY=true if error
open_output endp
close_input proc near ;close input file
mov bx,input_handle ;BX=handle
mov ah,3eh
int 21h
ret
close_input endp
close_output proc near ;close output file
mov bx,output_handle;BX=handle
mov ah,3eh
int 21h
ret
close_output endp
get_char proc near ;get one character from input buffer
mov bx,input_ptr
cmp bx,blksize
jne get_char1
call read_block
mov bx,0
get_char1:
mov al,[input_buffer+bx]
inc bx
mov input_ptr,bx
ret
get_char endp
put_char proc near ;put one character into output buffer
mov bx,output_ptr
mov [output_buffer+bx],al
inc bx
mov output_ptr,bx
cmp bx,blksize ;buffer full yet?
jne put_char1 ;no,jump
call write_block ;yes,write the block
ret ;return CY as status code
put_char1:
clc ;return CY clear for OK status
ret
put_char endp
read_block proc near
mov bx,input_handle ;read first block of input
mov cx,blksize
mov dx,offset input_buffer
mov ah,3fh
int 21h
jnc read_block1 ;jump if no error status
mov ax,0 ;simulate a zero length read if error
read_block1:
cmp ax,blksize ;was full buffer read in?
je read_block2 ;yes,jump
mov bx,ax ;no, store End-of-File mark
mov byte ptr [input_buffer+bx],eof
read_block2:
xor ax,ax ;initialize input buffer pointer
mov input_ptr,ax
ret
read_block endp
write_block proc near ;write blocked output (blksize bytes)
mov dx,offset output_buffer
mov cx,blksize
mov bx,output_handle
mov ah,40h
int 21h
xor bx,bx ;initialize pointer to blocking buffer
mov output_ptr,bx
cmp ax,blksize ;was correct length written?
jne write_block1 ;no,disk must be full
clc ;yes,return CY=0 indicating all OK
ret
write_block1: ;disk is full, return CY =1
stc ;as error code
ret
write_block endp
init_buffs proc near
call read_block ;read 1st block of input
xor ax,ax ;initialize pointer to output
mov output_ptr,ax ;output blocking buffer
ret
init_buffs endp
flush_buffs proc near ;write any data in output buffer to disk
mov cx,output_ptr
or cx,cx
jz flush_buffs1 ;jump,buffer is empty
mov bx,output_handle
mov dx,offset output_buffer
mov ah,40h
int 21h
cmp ax,output_ptr ;was write successful?
jnz flush_buffs2 ;no,jump
flush_buffs1:
clc ;yes,return CY=0 for
ret ;success flag
flush_buffs2: ;disk was full so write failed,
stc ;return CY=1 as error flag
ret
flush_buffs endp
sign_on proc near ;print sign-on message
mov dx,offset msg6 ;title...
mov ah,9
int 21h
mov dx,offset msg7 ;input file:
mov ah,9
int 21h
mov dx,offset input_name
call pasciiz
mov dx,offset msg8 ;output file:
mov ah,9
int 21h
mov dx,offset output_name
call pasciiz
mov dx,offset msg9
mov ah,9
int 21h
ret
sign_on endp
pasciiz proc near ;call DX=offset of ASCIIZ string
mov bx,dx ;which will be printed on standard output
pasciiz1:
mov dl,[bx]
or dl,dl
jz pasciiz9
cmp dl,'A'
jb pasciiz2
cmp dl,'Z'
ja pasciiz2
or dl,20h
pasciiz2:
mov ah,2
int 21h
inc bx
jmp pasciiz1
pasciiz9:
ret
pasciiz endp
cseg ends
data segment para public 'DATA'
input_name db 64 dup (0) ;buffer for input filespec
output_name db 64 dup (0) ;buffer for output filespec
input_handle dw 0 ;token returned by PCDOS
output_handle dw 0 ;token returned by PCDOS
input_ptr dw 0 ;pointer to input blocking buffer
output_ptr dw 0 ;pointer to output blocking buffer
outfile_ext db '.CLN',0 ;extension for filtered file
column dw 0 ;column count for tab processing
msg1 db cr,lf
db 'Cannot find input file.'
db cr,lf,'$'
msg2 db cr,lf
db 'Failed to open output file.'
db cr,lf,'$'
msg3 db cr,lf
db 'File processing completed'
db cr,lf,'$'
msg4 db cr,lf
db 'Missing file name.'
db cr,lf,'$'
msg5 db cr,lf
db 'Disk is full.'
db cr,lf,'$'
msg6 db cr,lf
db 'Clean Word Processing File'
db cr,lf
db 'WWW.'
db cr,lf,'$'
msg7 db cr,lf,'Input file: $'
msg8 db cr,lf,'Output file: $'
msg9 db cr,lf,'$'
input_buffer db blksize dup (?) ;buffer for deblocking of data
;from input file
output_buffer db blksize dup (?) ;buffer for blocking of data
;sent to output file
data ends
stack segment para stack 'STACK'
db 64 dup (?)
stack ends
end clean