#!/usr/bin/python
#
# Copyright (C) 2009 Jan Essert
#
# This is shuffle_notation.py, a simple script to permute notation in a mathematical text written in LaTeX
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2, or (at your option) any
# later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA

import optparse, string, random, sys, re

# LaTeX greek letters
small_greek_letters = [r"\\alpha",r"\\beta",r"\\gamma", r"\\delta", r"\\epsilon", r"\\zeta", \
          r"\\eta", r"\\theta", r"\\iota", r"\\kappa", r"\\lambda", r"\\mu", r"\\nu",r"\\xi",r"\\pi", r"\\rho", \
          r"\\sigma", r"\\tau", r"\\chi", r"\\phi", r"\\psi", r"\\omega" ]
large_greek_letters = [r"\\Gamma", r"\\Delta", r"\\Theta", r"\\Lambda", r"\\Xi", r"\\Pi", r"\\Sigma", r"\\Phi", r"\\Psi", r"\\Omega"]

# Everything that we permute
all_symbols = list(string.ascii_letters) + small_greek_letters + large_greek_letters          

# patterns finding math mode strings
# Result is always (text before, delimiter, math mode text, delimiter, text afterwards)
patterns = [ (r"(?P<first>.*?(?:\$.*?\$(?P=first))*)(\$)([^$]*?)(\$)(.*?)",r"\$"), \
           (r"(?P<first>[^$]*(?:\$\$.*?\$\$(?P=first))*)(\$\$)([^$]*?)(\$\$)(.*)",r"\$\$"), \
           (r"(.*)((?<!\\)\\\[)(.*?)(\\\])(.*)",r"\\\["), \
           (r"(.*)(\\ensuremath\{)([^}]*?)(\})(.*)",r"ensuremath\{")] + \
           [(r"(.*)(\\begin\{"+env+r"\*?\})(.*?)(\\end\{"+env+r"\*?\})(.*)",r"\{"+env) for env in [r"align",r"equation",r"eqnarray",r"multline"] ]

# Things that apply to math mode strings and escape from math mode as e.g. \text{...}
# Result is always (math mode before, escaped non-math-mode text, math mode afterwards)
math_escape_patterns = [ r"(.*)(\\"+env+r"\{.*?\})(.*)" for env in [r"text",r"intertext",r"begin\{array\}",r"begin",r"end",r"genfrac\{.*?\}\{.*?\}"] ] + \
        [ r"(.*)(\\"+env+r"\[.*?\])(.*)" for env in [r"ar.{,4}"] ]
math_escape_regexes = [ re.compile(pattern,re.DOTALL) for pattern in math_escape_patterns ]

first_regex = dict()
second_regex = dict()
permutation = dict()

def shuffle_notation(simple):
    """Prepares the permutation dict according whether we mix everything or just the alphabets (simple)"""
    if simple:
        symbol_tables = [list(string.ascii_lowercase),list(string.ascii_uppercase), small_greek_letters, large_greek_letters]
    else:
        symbol_tables = [all_symbols]
    for symbols in symbol_tables:
        shuffled_symbols = symbols[:]
        random.shuffle(shuffled_symbols)
        for (key,value) in zip(symbols, shuffled_symbols):
            permutation[key] = value

def construct_regexes():
    """Construct all regexes for symbol replacement beforehand"""
    for symbol in string.ascii_letters:
        # We have to exclude letters which are already being permuted or which are part of a word, which starts with a backslash
        # Words not starting with a backslash are converted from left to right this way
        first_regex[symbol] = re.compile(r"(?<!%%%)(?<!\\)(?<![a-zA-Z])"+symbol)
        second_regex[symbol] = re.compile(r"%%%"+symbol)

    for symbol in small_greek_letters+large_greek_letters:
        first_regex[symbol] = re.compile(r"(?<!%%%)"+symbol)
        second_regex[symbol] = re.compile(r"%%%"+symbol)

def shuffle_string(text):
    """Apply the symbol permutation to the string, which is a math environment with possibly \text{..} still in it"""
    for regex in math_escape_regexes:
        inner_match = regex.match(text)
        if inner_match==None:
            continue
        else:
            return shuffle_string(inner_match.group(1)) + inner_match.group(2) + shuffle_string(inner_match.group(3))
    
    # Write first half of shuffle regex
    for symbol in all_symbols:
        text = first_regex[symbol].subn(r" %%%"+symbol+" ",text)[0]

    # Write second half of shuffle regex
    for symbol in all_symbols:
        text = second_regex[symbol].subn(permutation[symbol],text)[0]

    return text

def do_shuffle(match):
    """Shuffles the middle part of a match of the form (text before,delimiter,math mode, delimiter, text afterwards)"""
    text = match.group(3)

    text = shuffle_string(text)
        
    return "".join(match.group(1,2)) + text + "".join(match.group(4,5))

if __name__ == "__main__":
    # Parse the command line
    parser = optparse.OptionParser(usage="%prog [input_file] [output_file]",description="This script randomly shuffles the notation in the latex file [input_file] and writes the resulting LaTeX document to [output_file]. It overwrites the output file unconditionally, so beware!")
    parser.add_option("-s","--simple", dest="simple", default=False, action="store_true", help="Only mix letters of one alphabet and of the same case.")
    (options,args) = parser.parse_args()
    parser.destroy()

    # Prepare everything
    shuffle_notation(options.simple)
    construct_regexes()

    # Read input file
    infile = open(args[0],"r")
    content = infile.read()
    infile.close()
    
    # Replace iteratively all patterns
    for pattern in patterns:
        print("Searching for "+pattern[1])
        regex = re.compile(pattern[0],re.DOTALL)
        if re.search(pattern[1],content)==None:
            print("Pattern not present")
            continue
        content = regex.subn(do_shuffle, content)[0]

    # Write output file
    outfile = open(args[1],"w")
    outfile.write(content)
    outfile.close()
