#!/usr/bin/env python

# January 13, 2019, Dr. Brian Fristensky, University of Manitoba

# Description: Customize local copy of BIRCH documentation by 
# converting URLs and other strings in HTML files to correspond
# to local files and directory structures.

# Synopsis: customdoc.py oldstrings newstrings htmldirs

# Files: oldstrings      old strings to be replaced
#        newstrings      new strings to replace old strings
#        htmldirs        directories in which to change HTML files

# Automatically converted to Python3 using 2to3. Compliant with Python 2 and 3.

import sys
import os
import string
import re

###########
# GLOBALS #
###########
TEMPFN = str(os.getpid()) + '.TEMP'
DONTCHANGE = "<!-- DON'T CHANGE -->"
BEGIN_DELETE = "<!-- BEGIN DELETE -->"
END_DELETE = "<!-- END DELETE -->"
BEGIN_PROTECT = "<!-- BEGIN PROTECT -->"
END_PROTECT = "<!-- END PROTECT -->"
#BEGIN_REPLACE = '<!-- BEGIN REPLACE name =".*" -->'
BEGIN_REPLACE = '<!-- BEGIN REPLACE name="'
END_REPLACE = "<!-- END REPLACE -->"

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read in old and new strings, striping 
# leading and trailing whitespace, including
# newline characters.
def GETLIST(FN) :
    LST = []
    FILE = open(FN,'r')
    LINE = FILE.readline()
    while LINE != '':
          LST.append(string.strip(LINE))
	  LINE = FILE.readline()
    FILE.close()
    return LST

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def CHANGEFILE(HTMLFN, OLDLIST, NEWLIST, DIRLIST) :
    # Run through INFILE, changing old URLs to new URLs
    # Note that these changes are consecutive. However,
    # lines containing DONTCHANGE, or blocks enclosed by
    # PROTECT tags are not changed.
    CHANGED = 0

    HTMLFILE = open(HTMLFN,'r')
    TEMPFILE = open(TEMPFN,'w')
    DELETE = 0
    PROTECT = 0
    REPLACE = 0
    LINE = HTMLFILE.readline()

    # This loop reads a line at a time from HTMLFILE. The if statements
    # determine what is done with each line. The ONLY line that
    # reads from HTMLFILE is at the bottom of the loop. Nothing else
    # inside the loop reads from HTMLFILE. The variables DELETE, PROECT,
    # REPLACE and DONTCHANGE determine what happens to a line. 
    
    while LINE != '':

          # Turn DELETE on and off - - - - - - - - - - - - - - - -
          if LINE.find(BEGIN_DELETE) >= 0 :
	     DELETE = 1
	     CHANGED = 1
	  elif LINE.find(END_DELETE) >= 0 :
	     DELETE = 0

          # Turn PROTECT on and off - - - - - - - - - - - - - - - -
	  elif LINE.find(BEGIN_PROTECT) >= 0 :
	     TEMPFILE.write(LINE)	  
	     PROTECT = 1     	     
	  elif LINE.find(END_PROTECT) >= 0 :
	     PROTECT = 0
	     TEMPFILE.write("\n")
	     TEMPFILE.write(LINE)

          # Turn REPLACE on and off,and - - - - - - - - - - - - - - - -
          # read in lines to replace the old text with the new text
	  elif re.match(BEGIN_REPLACE,LINE) >= 0 :
	     TEMPFILE.write(LINE)	  
	     REPLACE = 1
	     RFN = re.split('"',LINE)
	     REPLACEFN = "local/public_html/" + RFN[1]
	     if os.path.exists(REPLACEFN) :
		REPLACEFILE = open(REPLACEFN,'r')
        	REPLINE = REPLACEFILE.readline()
        	while REPLINE != '':
        	   TEMPFILE.write(REPLINE)
		   REPLINE = REPLACEFILE.readline()
		   print(REPLINE)
        	REPLACEFILE.close()	     	     
	     CHANGED = 1
	  elif LINE.find(END_REPLACE) >= 0 :
	     REPLACE = 0
	     TEMPFILE.write("\n")
	     TEMPFILE.write(LINE)	     

          # Change the line, unless PROTECT or DONTCHANGE are true
          else :
	     if REPLACE == 1 :
	        pass 	  
	     elif DELETE == 0 :
        	#if line contains DONTCHANGE
		#   do nothing
		if (PROTECT == 0) or (LINE.find(DONTCHANGE) >= 0 ) :
		    # Otherwise, replace each string in OLDLIST with
		    # its counterpart in NEWLIST. 
		    I = 0;
		    for STR in OLDLIST :
	        	if LINE.find(OLDLIST[I]) >= 0 :
	        	   LINE = LINE.replace(OLDLIST[I], NEWLIST[I])
			   CHANGED = 1
			   print(LINE)
			I = I + 1	          	     
        	TEMPFILE.write(LINE)
	  
          # This is the ONLY line that reads from HTMLFILE   
	  LINE = HTMLFILE.readline()
    HTMLFILE.close()	  
    TEMPFILE.close()

    # If file has changed, overwrite original file.
    # Delete temporary file.
    if CHANGED == 0 :
       os.remove(TEMPFN)
    else :
       os.remove(HTMLFN)
       os.rename(TEMPFN,HTMLFN)
       os.chmod(HTMLFN,0o644)
    return
      
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
def TRAVERSE(P, OLDLIST, NEWLIST, DIRLIST) :
    os.chdir(P)
    print (P)
    
    # Make lists of all files and directories in the 
    # current directory.
    ALLFILES = os.listdir(os.curdir)
    HTMLFILES = []
    DIRECTORIES = []
    for NAME in ALLFILES :
	if os.path.isdir(NAME) and not os.path.islink(NAME):
	   DIRECTORIES.append(NAME)
	elif NAME[-5:] == '.html' :
	   HTMLFILES.append(NAME)
	   
    # list HTML files
    for FILE in HTMLFILES :	   
        CHANGEFILE(FILE, OLDLIST, NEWLIST, DIRLIST)

    # Visit all directories recursively
    for D in DIRECTORIES :
        TRAVERSE(D, OLDLIST, NEWLIST, DIRLIST)
	
    # Don't forget to return to the parent directory.	
    os.chdir(os.pardir)
    return

#======================== MAIN PROCEDURE ==========================
def customdoc(OLDSTRFN, NEWSTRFN, DIRFN):
	# Read in list of strings to change
	OLDLIST = GETLIST(OLDSTRFN)
	NEWLIST = GETLIST(NEWSTRFN)
	DIRLIST = GETLIST(DIRFN)
	
	OLDLEN = len(OLDLIST)
	NEWLEN = len(NEWLIST)
	DIRLEN = len(DIRLIST)
	
	if OLDLEN == 0 :
	   print(OLDSTRFN + " has 0 elements. Doing nothing.")
	elif NEWLEN == 0 :
	   print(NEWSTRFN + " has 0 elements. Doing nothing.")
	elif OLDLEN != NEWLEN :
	   print(OLDSTRFN + ' and ' + NEWSTRFN + ' must have the same number of elements') 
	   print("Doing nothing.")
	elif DIRLEN == 0:
	   print(DIRFN + " has 0 elements. Doing nothing.")
	else: 
	   # Traverse the directory tree recursively, changing all HTML
	   # files to use new strings.
	   for PNAME in DIRLIST : 
	       TRAVERSE(PNAME, OLDLIST, NEWLIST, DIRLIST)
	
	# debugging
	print("OLDLIST:")
	print(OLDLIST)
	print("NEWLIST:")
	print(NEWLIST)

if __name__=="__main__":
	#---------- Set global variables
	OLDSTRFN = sys.argv[1]
	NEWSTRFN = sys.argv[2]
	DIRFN = sys.argv[3]
	
	customdoc(OLDSTRFN, NEWSTRFN, DIRFN)

