r"""
Copyright 2005-2020 Peter Gacs
Licensed under the Academic Free Licence version 2.1

                          DE-MACRO

Version 1.4.1 - A small typo corrected.

Version 1.4     - Luca Citi made it python2.7 and python3 compatible.
               Peter Gacs improved the parsing of \input{<filename>},
               and made @ a letter in the style files.
Version 1.3     - this version is much more conservative about deleting
               comments and inserting or deleting blank space: tries to
               leave in all comments, adds space only when necessary, and
               tries not to delete space in the main text.
               The motivating comments came from Daniel Webb.
Version 1.2     - a syntactical bug corrected, thanks Brian de Alwis!


PURPOSE

This program can eliminate most private macros from a LaTeX file.
Applications:
  - your publisher has difficulty dealing with many private macros
  - you cooperate with colleagues who do not understand your macros
  - preprocessing before a system like latex2html, which is somewhat
    unpredictable with private macros.

It cannot be used to eliminate more complex macros that rely on
more programming-like constructs in style files.  In particular, it will
not replace style files that have options.

FILES

<tex-file-1>.db
<tex-file>-clean.tex
<defs-file>-private.sty

For speed, a macro database file called <defs-file>.db is created.
If such a file exists already then it is used.
If <defs-file>-private.sty is older than <tex-file-1>.db then it will not
be used.

It is possible to specify another database filename via --defs <defs-db>.
Then <defs-db>.db will be used.

For each <tex-file-i>, a file <tex-file-i>-clean.tex will be produced.
If <tex-file-i>-clean.tex is newer than <tex-file-i>.tex then it stays.

INPUT COMMAND

If a tex file contains a command \input{<tex-file-j>} or \input <tex-file-j>
then <tex-file-j>.tex is processed recursively, and <tex-file-j>-clean.tex
will be inserted into the final output.
For speed, if <tex-file-j>-clean.tex is newer than <tex-file-j>.tex
then <tex-file-j>.tex will not be reprocessed.

The dependency checking is not sophisticated, so if you rewrite some macros
then remove all *-clean.tex files!

"""

import getopt
import logging
import os
import re
import shelve
import sys

# Utilities

class No_detail:
    strerror = ""

no_detail = No_detail()


class Error(Exception):
    """Base class for exceptions in this module."""
    pass

class Empty_text_error(Error):
    """Exception raised for errors in the input.

    Attributes:
        data -- data that was found empty
        message
    """

    def __init__(self, data, message):
        self.data = data
        self.message = message

def warn(error_message, detail = no_detail):
    sys.stderr.write(error_message + "\n")
    if no_detail != detail:
        sys.stderr.write(detail.strerror + "\n")

def die(error_message, detail = no_detail):
    warn(error_message, detail = no_detail)
    sys.exit(1)

def getopt_map(one_letter_opts, long_optlist):
    "Turns long options into an option map, using getopt."
    optlist, args = getopt.getopt(sys.argv[1:],
                                  one_letter_opts, long_optlist)
    opt_map = {}
    for pair in optlist: opt_map[pair[0]] = pair[1] or 1
    return opt_map, args

def newer(file1, file2):

    if not os.path.isfile(file1):
        return False

    try:
        stat_return = os.lstat(file1)
    except OSError as detail:
        die("lstat " + file1 + " failed:", detail)
    time1 = stat_return.st_mtime

    try:
        stat_return = os.lstat(file2)
    except OSError as detail:
        die("lstat " + file2 + " failed:", detail)
    time2 = stat_return.st_mtime

    return time1 > time2

def cut_extension(filename, ext):
    """
    If filename has extension ext (including the possible dot),
    it will be cut off.
    """
    file = filename
    index = filename.rfind(ext)
    if 0 <= index and len(file)-len(ext) == index:
        file = file[:index]
    return file


class Stream:
    data = None
    pos = None
    item = None

    def legal(self):
        return 0 <= self.pos and self.pos < len(self.data)

    def uplegal(self):
        return self.pos < len(self.data)

    def __init__(self, data = None):
        self.data = data
        if self.data:
           self.pos = 0
           self.item = self.data[self.pos]

    def next(self):
        self.pos += 1
        if self.pos < len(self.data):
            self.item = self.data[self.pos]
            return self.item

    def reset(self):
        if self.data and 0 < len(self.data):
            self.pos = 0
            self.item = self.data[0]
            return self.item


# Basic classes

blank_re = re.compile(r"\s")
blanked_filename_re = re.compile(r"^\s+(\w*)\s+")
braced_filename_re = re.compile(r"^\s*{\s*(\w*)\s*}")
blank_or_rbrace_re = re.compile(r"[\s}]")
pos_digit_re = re.compile(r"[1-9]")

def isletter(c, isatletter=False):
    if "@" == c:
        return isatletter
    else:
        return c.isalpha()

class Token:
    """Type 0 means ordinary character, types 1,2 mean escape sequence
    (without the \ ), type 3 means comment.
    """
    simple_ty = 0
    esc_symb_ty = 1
    esc_str_ty = 2
    comment_ty = 3

    type = simple_ty
    val = " "

    def __init__(self, type_v=simple_ty, val_v=" "):
        self.type = type_v
        self.val = val_v

    def show(self):
        out = ""
        if simple_ty == self.type or comment_ty == self.type:
            out = self.val
        else:
            out = "\\" + self.val
        return out


# Constants

g_token = Token(0," ")    # generic token
simple_ty = g_token.simple_ty
comment_ty = g_token.comment_ty
esc_symb_ty = g_token.esc_symb_ty
esc_str_ty = g_token.esc_str_ty


def detokenize(text, isatletter=False):
    """
    Input is a list of tokens.
    Output is a string.
    """
    out = ""
    if 0 == len(text):
        return
    pos = 0
    out += text[pos].show()
    pos += 1
    while pos < len(text):
        previtem = text[pos-1]
        item = text[pos]
        """Insert a separating space after an escape sequence if it is a
        string and is followed by a letter."""
        if (esc_str_ty == previtem.type
            and simple_ty == item.type and isletter(item.val[0], isatletter)):
            out += " "
        out += item.show()
        pos += 1
    return out


def strip_comments(text):
    """
    Input is a list of tokens.
    Output is the same list except the comment tokens.
    """
    out = []
    for token in text:
        if not comment_ty == token.type:
            out.append(token)
    return out

class Group:
    """type 0 means a token, type 1 means contents of a group within {}
    """
    token_ty = 0
    group_ty = 1
    type = token_ty
    val = [] # Value is a token list.

    def __init__(self, type_v, val_v):
        self.type = type_v
        self.val = val_v

    def show(self):
        if token_ty == self.type:
            return self.val.show()
        else:
            return "{%s}" % detokenize(self.val)

# Constants

g_group = Group(0, [])
token_ty = g_group.token_ty
group_ty = g_group.group_ty


def tokenize(in_str, isatletter=False):
    """Returns a list of tokens.
    """
    text = []
    cs = Char_stream(in_str)
    cs.reset()
    if not cs.legal():
        raise Error("No string to tokenize.")
    while cs.uplegal():
        if "%" == cs.item:
            comment = cs.scan_comment_token()
            text.append(Token(comment_ty, comment))
        elif "\\" != cs.item:
            text.append(Token(simple_ty, cs.item))
            cs.next()
        else:
            cs.next()
            name = cs.scan_escape_token(isatletter)
            if isletter(name[0], isatletter):
                token = Token(esc_str_ty, name)
            else:
                token = Token(esc_symb_ty, name)
            text.append(token)
            if "makeatletter" == name:
                isatletter=True
            elif "makeatother" == name:
                isatletter=False
    return text


class Command_def:
    name = "1"
    numargs = 0
    body= ""

    def __init__(self, name_v, numargs_v, body_v):
        self.name = name_v
        self.numargs = numargs_v
        self.body = body_v

    def show(self):
        out = "\\newcommand{\\%s}" % (self.name)
        if 0 < self.numargs:
            out += "[%d]" % self.numargs
        out += "{%s}" % detokenize(self.body)
        return out


class Env_def:
    name = "1"
    numargs = 0
    begin = ""
    end = ""

    def __init__(self, name_v, numargs_v, begin_v, end_v):
        self.name = name_v
        self.numargs = numargs_v
        self.begin = begin_v
        self.end = end_v

    def show(self):
        out = "\\newenvironment{%s}" % self.name
        if 0 < self.numargs:
            out += "[%d]" % self.numargs
        out += "{%s}" % detokenize(self.begin)
        out += "{%s}" % detokenize(self.end)
        return out


class Command_instance:
    name = "1"
    args = []

    def __init__(self, name_v, args_v):
        self.name = name_v
        self.args = args_v

    def show(self):
        out = "\\"+self.name
        for arg in self.args:
            out += "{%s}" % detokenize(arg)
        return out


class Env_instance:
    name = "1"
    args = []

    def __init__(self, name_v, args_v, body_v):
        self.name = name_v
        self.args = args_v
        self.body = body_v

    def show(self):
        out = "\\begin{%s}" % self.name
        for arg in self.args:
            out += "{%s}" % detokenize(arg)
        out += detokenize(self.body)
        out += "\\end{%s}" % self.name
        return out

class Char_stream(Stream):

    def scan_escape_token(self, isatletter=False):
        """
        Starts after the escape sign, assumes that it is scanning a symbol.
        Returns a token-string.
        """
        out = self.item # Continue only if this is a letter.
        item = self.next()
        if isletter(out, isatletter):
            while self.uplegal() and isletter(item, isatletter):
                out += item
                item = self.next()
        return out

    def scan_comment_token(self):
        """
        Starts at the comment sign %, assumes that it is scanning a comment.
        Returns the whole comment string,
        including the % and all empty space after it.
        """
        comment = ""
        while self.uplegal() and "\n" != self.item:
            comment += self.item
            self.next()
        while self.uplegal() and blank_re.match(self.item):
            comment += self.item
            self.next()
        return comment

    def scan_input_filename(self):
        """We have just read an \input token.  The next group or word will be
        interpreted as a filename (possibly without .tex).    Filenames should not begin with spaces.
        Return the filename.
        """
        item = self.item
        file = ""
        while self.uplegal() and blank_re.match(self.item):
            item = self.next()
        if "{" == item:
            item = self.next()
            while self.uplegal() and not "}" == item:
                file += item
                item = self.next()
            self.next()
        else:
            while self.uplegal() and not blank_re.match(item):
                file += item
                item = self.next()
        return file

    def scan_package_filenames(self):
        r"""We just read a \usepackage token.  The next group will be
        interpreted as a list of filenames (without .sty) separated by commas.
        Return the list.
        """
        item = self.item
        while self.uplegal() and blank_re.match(item):
            item = self.next()
        file = ""
        if not "{" == item:
            raise Error("\\usepackage not followed by brace.")
        item = self.next()
        while self.uplegal() and not blank_or_rbrace_re.match(item):
            file += item
            item = self.next()
        self.next()
        return file.split(",")


class TexDemacro(Stream):
    defs_db = "x"
    defs_db_file = "x.db"

    def __init__(self, macros=None, *args, **kwargs):
        self.defs = ({}, {})
        super().__init__(*args, **kwargs)
        if macros:
            self.add_defs_str(macros)

    def smart_tokenize(self, in_str, handle_inputs=False, isatletter=False):
        """Returns a list of tokens.
        It may interpret and carry out all \input commands.
        """
        self.data = []
        text = self.data
        cs = Char_stream(in_str)
        cs.reset()
        if not cs.legal():
            raise Error("No string to tokenize.")
        while cs.uplegal():
            if "%" == cs.item:
                comment = cs.scan_comment_token()
                text.append(Token(comment_ty, comment))
            elif "\\" != cs.item:
                text.append(Token(simple_ty, cs.item))
                cs.next()
            else:
                cs.next()
                name = cs.scan_escape_token(isatletter)
                if "input" == name and handle_inputs:
                    file = cs.scan_input_filename()
                    to_add = self.process_if_newer(file)
                    text.extend(to_add)
                elif "usepackage" == name:
                    while cs.uplegal() and blank_re.match(cs.item):
                        cs.next()
                    if "[" == cs.item: # Packages with options will not be processed.
                        text.extend([Token(esc_str_ty, "usepackage"),
                                     Token(simple_ty, "[")])
                        cs.next()
                        continue
                    files = cs.scan_package_filenames()
                    i = 0
                    while i < len(files):  # process private packages
                        file = files[i]
                        p = file.rfind("-private")
                        if p < 0 or not len(file) - len("-private") == p:
                            i += 1
                            continue
                        defs_db_file = file+".db"
                        self.add_defs(file)
                        del files[i:(i+1)]
                    if files: # non-private packages left
                        group_content = ",".join(files)
                        to_add_str = "\\usepackage{%s}" % (group_content)
                        to_add = tokenize(to_add_str,isatletter)
                        text.extend(to_add)
                else:
                    if isletter(name[0], isatletter):
                        token = Token(esc_str_ty, name)
                    else:
                        token = Token(esc_symb_ty, name)
                    text.append(token)
                    if "makeatletter" == name:
                        isatletter=True
                    elif "makeatother" == name:
                        isatletter=False
        self.reset()
        return self.data

    def smart_detokenize(self, isatletter=False, handle_inputs=False):
        """
        Output is a string.
        If the list contains an \input{file} then the content of file
        file-clean.tex replaces it in the output.
        """
        self.reset()
        if not self.legal():
            return ""
        out = ""
        previtem = None
        while self.uplegal():
            item = self.item
            """Insert a separating space after an escape sequence if it is a
            string and is followed by a letter."""
            if (None != previtem and esc_str_ty == previtem.type
                and simple_ty == item.type and isletter(item.val[0], isatletter)):
                out += " "
            previtem = item
            if not (esc_str_ty == item.type and "input" == item.val and handle_inputs):
                out += item.show()
                self.next()
            else:
                self.next()
                group = self.scan_group()
                file = detokenize(group.val)
                clean_file = "%s-clean.tex" % (file)
                logging.info("Reading file %s" % (clean_file))
                fp = open(clean_file,"r")
                content = fp.read()
                fp.close()
                out += content
        return out

    # Basic tex scanning

    def skip_blank_tokens(self): # we also skip comment tokens.
        item = self.item
        while (self.uplegal() and
               (comment_ty == item.type or
                (simple_ty == item.type and blank_re.match(item.val)))):
            item = self.next()
        return item

    def scan_group(self):
        """Returns group.
        """
        if not self.legal():
            raise Error("No group to scan.")
        item = self.item
        if not (simple_ty == item.type and "{" == item.val):
            return Group(token_ty, [self.item])
        count = 1
        group = []
        item = self.next()
        while count and self.uplegal():
            if simple_ty == item.type:
                if "{" == item.val:
                    count += 1
                elif "}" == item.val:
                    count -= 1
            if count != 0:
                group.append(item)
            item = self.next()
        return Group(group_ty, group)

    # Command and environment definitions

    def scan_command_name(self):
        """Returns name.
        """
        if not self.legal():
            raise Error("No command name to scan.")
        item = self.item
        name = ""
        if item.type in [esc_symb_ty, esc_str_ty]:
            name = item.val
        else:
            if not "{" == item.val:
                raise Error("Command definition misses first {.")
            self.next()
            item = self.skip_blank_tokens()
            if not item.type in [esc_symb_ty, esc_str_ty]:
                raise Error("Command definition does not begin with control sequence.")
            name = item.val
            self.next()
            item = self.skip_blank_tokens()
            if not "}" == item.val:
                raise Error("Definition for commmand %s misses first }., %s" %
                       (name, item.val))
        self.next()
        self.skip_blank_tokens()
        return name

    def scan_numargs(self, name):
        """
        name is the name of the command or environment definition being
        scanned.
        Starts on a nonblank token.
        Returns numargs
        where numargs is the number of arguments in a command or environment
        definition,
        """
        if not self.legal():
            raise Error("No numargs to scan.")
        item = self.item
        numargs = 0
        if not simple_ty == item.type:
            raise Error("Illegal command or environment definition: "+name)
        if "[" == item.val:
            if not 4 < len(self.data):
                raise Error("Command or environment definition is illegal: "+name)
            item = self.next()
            if not simple_ty == item.type:
                raise Error("Illegal command or environment definition: "+name)
            numargs = item.val
            if not pos_digit_re.match(numargs):
                raise Error("%s must be argument number after %s" % (numargs, name))
            numargs = int(numargs)
            self.next()
            item = self.skip_blank_tokens()
            if not simple_ty == item.type:
                raise Error("Illegal command definition: "+name)
            if "]" != item.val:
                raise Error("Illegal command definition: "+name)
            self.next()
            self.skip_blank_tokens()
        return numargs

    def scan_command_def(self):
        """Scan a command definition.
        Return command_def.
        Assumes that the number of arguments is at most 9.
        """
        if not self.legal():
            raise Error("No command definition to scan.")
        item = self.item
        if not 2 < len(self.data):
            raise Error("Command definition is illegal.")
        # newcommand or renewcommand
        if not item.type in [esc_symb_ty, esc_str_ty]:
            raise Error("Command definition should begin with control sequence: "+item.val)
        if item.val not in ["newcommand", "renewcommand"]:
            raise Error("Command definition should begin with control sequence.")
        self.next()
        self.skip_blank_tokens()

        cmd_name = self.scan_command_name()
        numargs = self.scan_numargs(cmd_name)

        body_group = self.scan_group()
        if group_ty != body_group.type:
            raise Error("Command body missing: "+cmd_name)
        body_val = strip_comments(body_group.val)
        return Command_def(cmd_name, numargs, body_val)

    def scan_env_name(self):
        """Starts on a {.
        Returns name.
        """
        if not self.legal():
            raise Error("No environment name to scan.")
        item = self.item
        if not "{" == item.val:
            raise Error("Env. definition begins with %s, not with {" % (item.val))
        self.next()
        item = self.skip_blank_tokens()
        name = ""
        if not simple_ty == item.type:
            raise Error("1. Env. def. begins with cont. seq. %s, not with env.name."
                 % (item.val))
        while self.uplegal() and not blank_or_rbrace_re.match(item.val):
            name += item.val
            item = self.next()
            if not simple_ty == item.type:
                raise Error("2. Env. def. begins with cont. seq. %s, not with env.name."
                       % (item.val))
        item = self.skip_blank_tokens()
        if not "}" == item.val:
            raise Error("Command definition does not begin with control sequence.")
        self.next()
        self.skip_blank_tokens()
        return name

    def scan_env_def(self):
        """Scan an environment definition.
        Return env_def
        Assumes that the number of arguments is at most 9.
        """
        if not self.legal():
            raise Error("No environment definition to scan.")
        item = self.item
        if not 7 < len(self.data):
            raise Error("Environment definition is illegal.")
        pos = 0

        if not item.type in [esc_symb_ty, esc_str_ty]:
            raise Error("Env. definition does not begin with control sequence:"+
                   item.val)
        if item.val not in ["newenvironment", "renewenvironment"]:
            raise Error("Env. definition does not begin with control sequence.")
        self.next()
        self.skip_blank_tokens()

        env_name = self.scan_env_name()
        numargs = self.scan_numargs(env_name)
        self.skip_blank_tokens()

        begin_group = self.scan_group()
        if group_ty != begin_group.type:
            raise Error("Begin body missing: "+env_name)
        begin_val = strip_comments(begin_group.val)

        self.skip_blank_tokens()

        end_group = self.scan_group()
        if group_ty != end_group.type:
            raise Error("End body missing:"+env_name)
        end_val = strip_comments(end_group.val)

        return Env_def(env_name, numargs, begin_val, end_val)

    def scan_defs(self):
        if not self.legal():
            raise Error("No definitions to scan.")
        self.reset()
        command_defs, env_defs = self.defs
        while self.uplegal():
            if (esc_str_ty == self.item.type
                and self.item.val in ["newcommand", "renewcommand"]):
                command_def = self.scan_command_def()
                command_defs[command_def.name] = command_def
            elif (esc_str_ty == self.item.type and self.item.val
                  in ["newenvironment", "renewenvironment"]):
                env_def = self.scan_env_def()
                env_defs[env_def.name] = env_def
            else:
                self.next()

    # Instances

    def scan_args(self, command_or_env_def):
        """Scan the arguments of a command or environment.
        Return [args].
        """
        if not self.legal():
            raise Error("No arguments to scan.")
        numargs = command_or_env_def.numargs
        name = command_or_env_def.name

        args = []
        for i in range(numargs):
            arg = []
            if not (simple_ty == self.item.type and "{" == self.item.val):
                arg = [self.item]
                self.next()
            else:
                group = self.scan_group()
                arg = group.val
            args.append(arg)
        return args

    def scan_command(self, command_def):
        """Scan the arguments of a command.
        Return command_instance
        """
        if not self.legal():
            raise Error("No command to scan.")
        if not self.item.type in [esc_symb_ty, esc_str_ty]:
            raise Error("Command does not begin with control sequence.")
        name = self.item.val
        self.next()
        if 0 < command_def.numargs:
            self.skip_blank_tokens()
            args = self.scan_args(command_def)
        else:
            args = []
        return Command_instance(name, args)

    def test_env_boundary(self, item):
        """Check whether an environment begin or end follows.
        Return 1 if \begin, -1 if \end, 0 otherwise.
        """
        d = 0
        if esc_str_ty == item.type:
            if "begin"==item.val:
                d = 1
            elif "end"==item.val:
                d = -1
        return d

    def scan_env_begin(self):
        """Scan an environment name.
        Return env_name.
        """
        if not self.legal():
            raise Error("No environment begin to scan.")
        item = self.item
        if not (esc_str_ty == item.type and "begin" == item.val):
            raise Error("Environment does not begin with begin.")
        self.next()
        name_group = self.scan_group()
        name = detokenize(name_group.val)
        return name

    def scan_env_end(self):
        """Scan an environment end.
        Return env_name.
        """
        if not self.legal():
            raise Error("No environment end to scan.")
        item = self.item
        if not (esc_str_ty == item.type and "end" == item.val):
            raise Error("Environment does not end with end.")
        self.next()
        name_group = self.scan_group()
        name = detokenize(name_group.val)
        return name

    def scan_env_rest(self, env_def):
        """Scanning starts after \begin{envname}.
        Returns env_instance.
        """
        if not self.legal():
            raise Error("No environment rest to scan.")
        count = 1 # We are already within a boundary.
        args = self.scan_args(env_def)
        body = []
        while count and self.uplegal():
            old_pos = self.pos
            d = self.test_env_boundary(self.item)
            count += d
            if 1 == d:
                self.scan_env_begin()
            elif -1 == d:
                self.scan_env_end()
            else:
                self.next()
            if 0 < count:
                body.extend(self.data[old_pos : self.pos])
        return Env_instance(env_def.name, args, body)

    # Definitions

    def restore_defs(self):
        if os.path.isfile(self.defs_db_file):
            logging.info("Using defs db %s" % (self.defs_db_file))
            db_h = shelve.open(self.defs_db)
            self.defs = db_h["defs"]
            db_h.close()

    def save_defs(self):
        db_h = shelve.open(self.defs_db)
        if "defs" in db_h:
            del db_h["defs"]
        db_h["defs"] = self.defs
        db_h.close()

    def add_defs_str(self, defs_str):
        ds = TexDemacro()
        ds.defs = self.defs
        defs_text = ds.smart_tokenize(defs_str,isatletter=True)
        # changing ds.defs will change self.defs
        ds.scan_defs()

    def add_defs(self, defs_file):
        defs_file_compl = defs_file + ".sty"
        if not os.path.isfile(defs_file_compl):
            raise Error("%s does not exist" % (defs_file_compl))

        defs_db_file = self.defs_db_file
        if newer(defs_db_file, defs_file_compl):
            logging.info("Using defs db %s for %s" % (defs_db_file, defs_file))
        else:
            defs_fp = open(defs_file_compl, "r")
            defs_str = defs_fp.read()
            defs_fp.close()
            self.add_defs_str(defs_str)

    # Applying definitions, recursively
    # (maybe not quite in Knuth order, so avoid tricks!)

    def subst_args(self, body, args):
        out = []
        pos = 0
        while pos < len(body):
            item = body[pos]
            if not (simple_ty == item.type and "#" == item.val):
                out.append(item)
                pos += 1
                continue
            pos += 1
            token = body[pos]
            argnum = token.val
            if not pos_digit_re.match(argnum):
                raise Error("# is not followed by number.")
            argnum = int(argnum)
            if argnum > len(args):
                raise Error("Too large argument number.")
            arg = args[argnum-1]
            out += arg
            pos += 1
        return out

    def apply_command_recur(self, command_instance):
        command_defs, env_defs = self.defs
        name = command_instance.name
        command_def = command_defs[name]

        args = command_instance.args
        body = command_def.body
        result = self.subst_args(body, args)
        try:
            result = self.apply_all_recur(result)
        except Empty_text_error as e:
            raise Error("apply_all_recur fails on command instance %s: %s, %s" % \
                  (command_instance.show(), detokenize(e.data), e.message))
        return result

    def apply_env_recur(self, env_instance):
        command_defs, env_defs = self.defs
        name = env_instance.name
        env_def = env_defs[name]

        begin, end = env_def.begin, env_def.end
        body, args = env_instance.body, env_instance.args
        out = self.subst_args(begin, args) + body + self.subst_args(end, args)
        return self.apply_all_recur(out)

    def apply_all_recur(self, data, report=False, return_macros=False):
        ts = TexDemacro(data=data)
        ts.defs = self.defs
        command_defs, env_defs = self.defs
        out, macros = [], []
        progress_step = 10000
        progress = progress_step
        if not ts.legal():
            raise Empty_text_error(data, "No text to process.")
        while ts.uplegal():
            if self.pos > progress:
                if report:
                    logging.info(self.pos)
                progress += progress_step
            if not ts.item.type in [esc_symb_ty, esc_str_ty]:
                out.append(ts.item)
                ts.next()
                continue
            if 1 == ts.test_env_boundary(ts.item):
                old_pos = ts.pos
                env_name = ts.scan_env_begin()
                if env_name not in env_defs:
                    out.extend(ts.data[old_pos : ts.pos])
                    continue
                else:
                    env_def = env_defs[env_name]
                    env_instance = ts.scan_env_rest(env_def)
                    result = ts.apply_env_recur(env_instance)
                    out.extend(result)
                    macros.append(env_def.show())
            elif ts.item.val not in command_defs:
                out.append(ts.item)
                ts.next()
                continue
            else:
                command_def = command_defs[ts.item.val]
                command_inst = ts.scan_command(command_def)
                result = ts.apply_command_recur(command_inst)
                out.extend(result)
                macros.append(command_def.show())
        return (out, list(dict.fromkeys(macros))) if return_macros else out

    # Processing files

    def process_file(self, file):
        file = cut_extension(file, ".tex")
        source_file = "%s.tex" % (file)
        logging.info("File %s [" % (source_file))
        source_fp = open(source_file, "r")
        text_str = source_fp.read()
        source_fp.close()
        text_str_clean = self.process(text_str, handle_inputs=True)

        result_fname = "%s-clean.tex" % (file)
        logging.info("Writing %s [" % (result_fname))
        result_fp = open(result_fname, "w")
        result_fp.write(text_str_clean)
        result_fp.close()
        logging.info("] file %s" % (result_fname))
        logging.info("] file %s" % (source_file))

    def process(self, text_str, handle_inputs=False):
        self.smart_tokenize(text_str, handle_inputs=handle_inputs)
        if not self.data:
            raise Error("Empty tokenization result.")
        self.reset()

        self.data = self.apply_all_recur(self.data, report=True)
        return self.smart_detokenize(handle_inputs=handle_inputs)

    def find(self, text_str, handle_inputs=False):
        self.smart_tokenize(text_str, handle_inputs=handle_inputs)
        if not self.data:
            raise Error("Empty tokenization result.")
        self.reset()

        return self.apply_all_recur(self.data, report=True, return_macros=True)[1]

    def process_if_newer(self, file):
        r"""
        \input{file} is added to the token list.
        If the input file is newer it is processed.
        Returns tokenized \input{file}.
        """
        file = cut_extension(file, ".tex")
        tex_file = file+".tex"
        clean_tex_file = file+"-clean.tex"
        if newer(clean_tex_file, tex_file):
            logging.info("Using %s." % (clean_tex_file))
        else:
            ts = TexDemacro()
            ts.data = []
            ts.defs = self.defs
            ts.process_file(file)
        to_add = "\\input{%s}" % (file)
        return tokenize(to_add)
