; ; CLEAN --- a utility to filter text files. ; This program removes all control codes except ; for line feeds, carriage returns, and form ; feeds, strips off the high bit of all characters, ; and expands tabs. Can be used to make a Wordstar ; file acceptable for other screen or line editors, ; and vice versa. ; cr equ 0dh ;ASCII carriage return lf equ 0ah ;ASCII line feed ff equ 0ch ;ASCII form feed eof equ 01ah ;End of file marker tab equ 09h ;ASCII tab character command equ 80h ;buffer for command tail blksize equ 1024 ;blocking/deblocking size cseg segment para public 'CODE' assume cs:cseg,ds:data,es:data,ss:stack clean proc far ;entry point from PC-DOS push ds ;save DS:0000 for final xor ax,ax ;return to PC-DOS push ax mov ax,data ;make our data segment mov es,ax ;addressable via ES register call infile ;get path and file spec. ;for input file mov ax,es ;set DS=ES for remainder mov ds,ax ;of program jnc clean1 ;jump, got acceptable name mov dx,offset msg4 ;missing or illegal filespec, jmp clean9 ;print error message and exit. clean1: call outfile ;set up output file name call open_input ;now try to open input file jnc clean2 ;jump,opened input ok mov dx,offset msg1 ;open of input file failed, jmp clean9 ;print error msg and exit. clean2: call open_output ;try to open output file. jnc clean25 ;jump,opened ok mov dx,offset msg2 ;open of output file failed, jmp clean9 ;print error message and exit. clean25: ;set up buffers call init_buffs call sign_on ;print ident and file names ;files successfully opened, clean3: ;now filter the file. call get_char ;read 1 character from input. and al,07fh ;strip off the high bit cmp al,20h ;is it a control code? jae clean4 ;no,write it to new file ;yes it is control code, cmp al,eof ;is it end of file marker? je clean6 ;yes,jump to close files. cmp al,tab ;is it a tab command? jz clean5 ;yes,jump to special processing. cmp al,cr ;if control code other than je clean35 ;tab or end-of-file mark, throw cmp al,ff ;it away unless it is a je clean35 ;form feed, carriage return, cmp al,lf ;or line feed. jne clean3 clean35: ;If it is one of those three, mov column,0 ;incidentally initialize jmp clean45 ;column count for tab processor. clean4: ;count alphanumeric chars. sent. inc column clean45: ;write this character to call put_char ;output file, jnc clean3 ;if CY not set, write was ;ok so go get next char. clean47: call close_input ;if CY set, disk is full call close_output ;so close files and exit mov dx,offset msg5 ;with error message. jmp clean9 clean5: ;process tab character mov ax,column ;let DX:AX=column count cwd mov cx,8 ;divide it by eight... idiv cx sub cx,dx ;remainder is in DX. add column,cx ;update column pointer. clean55: ;8 minus the remainder push cx ;gives us the number of mov al,20h ;spaces to send out to call put_char ;move to the next tab position pop cx ;restore space count jc clean47 ;jump if disk is full loop clean55 jmp short clean3 ;get next character clean6: ;end of file detected, call put_char ;write end-of-file marker, jc clean47 ;jump if disk was full call flush_buffs ;write remaining data to disk jc clean47 ;if CY set,disk was full ;otherwise file was written ok call close_input ;close input and output call close_output ;files. mov dx,offset msg3 ;addr of success message, clean9: ;print message and return mov ah,9 ;control to PC-DOS int 21h ret clean endp infile proc near ;process name of input file ;DS:SI <- addr command line mov si,offset command ;ES:DI <- addr filespec buffer mov di,offset input_name cld lodsb ;any command line present? or al,al ;return error status if not. jz infile4 infile1: ;scan over leading blanks lodsb ;to file name cmp al,cr ;if we hit carriage return jz infile4 ;filename is missing. cmp al,20h ;is this a blank? jz infile1 ;if so keep scanning. infile2: ;found first char of name, stosb ;move last char. to output ;file name buffer. lodsb ;check next character, found cmp al,cr ;carriage return yet? je infile3 ;yes,exit with success code cmp al,20h ;is this a blank? jne infile2 ;if not keep moving chars. infile3: ;exit with carry =0 clc ;for success flag ret infile4: ;exit with carry =1 stc ;for error flag ret infile endp outfile proc near ;set up path and file cld ;name for output file. mov cx,64 ;length to move mov si,offset input_name ;source addr mov di,offset output_name ;dest addr rep movsb ;transfer the string mov di,offset output_name outfile1: ;scan string looking for mov al,[di] ;"." marking start of extension or al,al ;or zero byte marking name end. jz outfile2 ;if either is found,jump. cmp al,'.' je outfile2 ;bump string pointer, loop inc di ;if neither '.' or zero found. jmp outfile1 outfile2: ;found zero or '.',force the ;extension of the output file ;to '.CLN' mov si,offset outfile_ext mov cx,5 rep movsb ret ;back to caller outfile endp open_input proc near ;open input file ;DS:DX=addr filename mov dx,offset input_name mov al,0 ;AL=0 for read only mov ah,3dh ;function 3dh=open int 21h ;handle returned in AX, mov input_handle,ax ;save it for later. ret ;CY is set if error open_input endp open_output proc near ;open output file ;DS:DX=addr filename mov dx,offset output_name mov al,1 ;AL=1 for write only mov ah,3ch ;function 3ch=MAKE or int 21h ;truncate existing file ;handle returned in AX mov output_handle,ax;save it for later. ret ;return CY=true if error open_output endp close_input proc near ;close input file mov bx,input_handle ;BX=handle mov ah,3eh int 21h ret close_input endp close_output proc near ;close output file mov bx,output_handle;BX=handle mov ah,3eh int 21h ret close_output endp get_char proc near ;get one character from input buffer mov bx,input_ptr cmp bx,blksize jne get_char1 call read_block mov bx,0 get_char1: mov al,[input_buffer+bx] inc bx mov input_ptr,bx ret get_char endp put_char proc near ;put one character into output buffer mov bx,output_ptr mov [output_buffer+bx],al inc bx mov output_ptr,bx cmp bx,blksize ;buffer full yet? jne put_char1 ;no,jump call write_block ;yes,write the block ret ;return CY as status code put_char1: clc ;return CY clear for OK status ret put_char endp read_block proc near mov bx,input_handle ;read first block of input mov cx,blksize mov dx,offset input_buffer mov ah,3fh int 21h jnc read_block1 ;jump if no error status mov ax,0 ;simulate a zero length read if error read_block1: cmp ax,blksize ;was full buffer read in? je read_block2 ;yes,jump mov bx,ax ;no, store End-of-File mark mov byte ptr [input_buffer+bx],eof read_block2: xor ax,ax ;initialize input buffer pointer mov input_ptr,ax ret read_block endp write_block proc near ;write blocked output (blksize bytes) mov dx,offset output_buffer mov cx,blksize mov bx,output_handle mov ah,40h int 21h xor bx,bx ;initialize pointer to blocking buffer mov output_ptr,bx cmp ax,blksize ;was correct length written? jne write_block1 ;no,disk must be full clc ;yes,return CY=0 indicating all OK ret write_block1: ;disk is full, return CY =1 stc ;as error code ret write_block endp init_buffs proc near call read_block ;read 1st block of input xor ax,ax ;initialize pointer to output mov output_ptr,ax ;output blocking buffer ret init_buffs endp flush_buffs proc near ;write any data in output buffer to disk mov cx,output_ptr or cx,cx jz flush_buffs1 ;jump,buffer is empty mov bx,output_handle mov dx,offset output_buffer mov ah,40h int 21h cmp ax,output_ptr ;was write successful? jnz flush_buffs2 ;no,jump flush_buffs1: clc ;yes,return CY=0 for ret ;success flag flush_buffs2: ;disk was full so write failed, stc ;return CY=1 as error flag ret flush_buffs endp sign_on proc near ;print sign-on message mov dx,offset msg6 ;title... mov ah,9 int 21h mov dx,offset msg7 ;input file: mov ah,9 int 21h mov dx,offset input_name call pasciiz mov dx,offset msg8 ;output file: mov ah,9 int 21h mov dx,offset output_name call pasciiz mov dx,offset msg9 mov ah,9 int 21h ret sign_on endp pasciiz proc near ;call DX=offset of ASCIIZ string mov bx,dx ;which will be printed on standard output pasciiz1: mov dl,[bx] or dl,dl jz pasciiz9 cmp dl,'A' jb pasciiz2 cmp dl,'Z' ja pasciiz2 or dl,20h pasciiz2: mov ah,2 int 21h inc bx jmp pasciiz1 pasciiz9: ret pasciiz endp cseg ends data segment para public 'DATA' input_name db 64 dup (0) ;buffer for input filespec output_name db 64 dup (0) ;buffer for output filespec input_handle dw 0 ;token returned by PCDOS output_handle dw 0 ;token returned by PCDOS input_ptr dw 0 ;pointer to input blocking buffer output_ptr dw 0 ;pointer to output blocking buffer outfile_ext db '.CLN',0 ;extension for filtered file column dw 0 ;column count for tab processing msg1 db cr,lf db 'Cannot find input file.' db cr,lf,'$' msg2 db cr,lf db 'Failed to open output file.' db cr,lf,'$' msg3 db cr,lf db 'File processing completed' db cr,lf,'$' msg4 db cr,lf db 'Missing file name.' db cr,lf,'$' msg5 db cr,lf db 'Disk is full.' db cr,lf,'$' msg6 db cr,lf db 'Clean Word Processing File' db cr,lf db 'WWW.' db cr,lf,'$' msg7 db cr,lf,'Input file: $' msg8 db cr,lf,'Output file: $' msg9 db cr,lf,'$' input_buffer db blksize dup (?) ;buffer for deblocking of data ;from input file output_buffer db blksize dup (?) ;buffer for blocking of data ;sent to output file data ends stack segment para stack 'STACK' db 64 dup (?) stack ends end clean