tools/scons-1.2.0/engine/SCons/Scanner/LaTeX.py

   1 """SCons.Scanner.LaTeX
   2
   3 This module implements the dependency scanner for LaTeX code.
   4
   5 """
   6
   7 #
   8 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 The SCons Foundation
   9 #
  10 # Permission is hereby granted, free of charge, to any person obtaining
  11 # a copy of this software and associated documentation files (the
  12 # "Software"), to deal in the Software without restriction, including
  13 # without limitation the rights to use, copy, modify, merge, publish,
  14 # distribute, sublicense, and/or sell copies of the Software, and to
  15 # permit persons to whom the Software is furnished to do so, subject to
  16 # the following conditions:
  17 #
  18 # The above copyright notice and this permission notice shall be included
  19 # in all copies or substantial portions of the Software.
  20 #
  21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
  22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
  23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
  25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  28 #
  29
  30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 3842 2008/12/20 22:59:52 scons"
  31
  32 import os.path
  33 import string
  34 import re
  35
  36 import SCons.Scanner
  37 import SCons.Util
  38
  39 # list of graphics file extensions for TeX and LaTeX
  40 TexGraphics   = ['.eps', '.ps']
  41 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif']
  42
  43 # Used as a return value of modify_env_var if the variable is not set.
  44 class _Null:
  45     pass
  46 _null = _Null
  47
  48 # The user specifies the paths in env[variable], similar to other builders.
  49 # They may be relative and must be converted to absolute, as expected
  50 # by LaTeX and Co. The environment may already have some paths in
  51 # env['ENV'][var]. These paths are honored, but the env[var] paths have
  52 # higher precedence. All changes are un-done on exit.
  53 def modify_env_var(env, var, abspath):
  54     try:
  55         save = env['ENV'][var]
  56     except KeyError:
  57         save = _null
  58     env.PrependENVPath(var, abspath)
  59     try:
  60         if SCons.Util.is_List(env[var]):
  61             #TODO(1.5)
  62             #env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]])
  63             env.PrependENVPath(var, map(lambda p: os.path.abspath(str(p)), env[var]))
  64         else:
  65             # Split at os.pathsep to convert into absolute path
  66             #TODO(1.5) env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).split(os.pathsep)])
  67             env.PrependENVPath(var, map(lambda p: os.path.abspath(p), string.split(str(env[var]), os.pathsep)))
  68     except KeyError:
  69         pass
  70
  71     # Convert into a string explicitly to append ":" (without which it won't search system
  72     # paths as well). The problem is that env.AppendENVPath(var, ":")
  73     # does not work, refuses to append ":" (os.pathsep).
  74
  75     if SCons.Util.is_List(env['ENV'][var]):
  76         # TODO(1.5)
  77         #env['ENV'][var] = os.pathsep.join(env['ENV'][var])
  78         env['ENV'][var] = string.join(env['ENV'][var], os.pathsep)
  79     # Append the trailing os.pathsep character here to catch the case with no env[var]
  80     env['ENV'][var] = env['ENV'][var] + os.pathsep
  81
  82     return save
  83
  84 class FindENVPathDirs:
  85     """A class to bind a specific *PATH variable name to a function that
  86     will return all of the *path directories."""
  87     def __init__(self, variable):
  88         self.variable = variable
  89     def __call__(self, env, dir=None, target=None, source=None, argument=None):
  90         import SCons.PathList
  91         try:
  92             path = env['ENV'][self.variable]
  93         except KeyError:
  94             return ()
  95
  96         dir = dir or env.fs._cwd
  97         path = SCons.PathList.PathList(path).subst_path(env, target, source)
  98         return tuple(dir.Rfindalldirs(path))
  99
 100
 101
 102 def LaTeXScanner():
 103     """Return a prototype Scanner instance for scanning LaTeX source files
 104     when built with latex.
 105     """
 106     ds = LaTeX(name = "LaTeXScanner",
 107                suffixes =  '$LATEXSUFFIXES',
 108                # in the search order, see below in LaTeX class docstring
 109                graphics_extensions = TexGraphics,
 110                recursive = 0)
 111     return ds
 112
 113 def PDFLaTeXScanner():
 114     """Return a prototype Scanner instance for scanning LaTeX source files
 115     when built with pdflatex.
 116     """
 117     ds = LaTeX(name = "PDFLaTeXScanner",
 118                suffixes =  '$LATEXSUFFIXES',
 119                # in the search order, see below in LaTeX class docstring
 120                graphics_extensions = LatexGraphics,
 121                recursive = 0)
 122     return ds
 123
 124 class LaTeX(SCons.Scanner.Base):
 125     """Class for scanning LaTeX files for included files.
 126
 127     Unlike most scanners, which use regular expressions that just
 128     return the included file name, this returns a tuple consisting
 129     of the keyword for the inclusion ("include", "includegraphics",
 130     "input", or "bibliography"), and then the file name itself.
 131     Based on a quick look at LaTeX documentation, it seems that we
 132     should append .tex suffix for the "include" keywords, append .tex if
 133     there is no extension for the "input" keyword, and need to add .bib
 134     for the "bibliography" keyword that does not accept extensions by itself.
 135
 136     Finally, if there is no extension for an "includegraphics" keyword
 137     latex will append .ps or .eps to find the file, while pdftex may use .pdf,
 138     .jpg, .tif, .mps, or .png.
 139
 140     The actual subset and search order may be altered by
 141     DeclareGraphicsExtensions command. This complication is ignored.
 142     The default order corresponds to experimentation with teTeX
 143         $ latex --version
 144         pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4)
 145         kpathsea version 3.5.4
 146     The order is:
 147         ['.eps', '.ps'] for latex
 148         ['.png', '.pdf', '.jpg', '.tif'].
 149
 150     Another difference is that the search path is determined by the type
 151     of the file being searched:
 152     env['TEXINPUTS'] for "input" and "include" keywords
 153     env['TEXINPUTS'] for "includegraphics" keyword
 154     env['BIBINPUTS'] for "bibliography" keyword
 155     env['BSTINPUTS'] for "bibliographystyle" keyword
 156
 157     FIXME: also look for the class or style in document[class|style]{}
 158     FIXME: also look for the argument of bibliographystyle{}
 159     """
 160     keyword_paths = {'include': 'TEXINPUTS',
 161                      'input': 'TEXINPUTS',
 162                      'includegraphics': 'TEXINPUTS',
 163                      'bibliography': 'BIBINPUTS',
 164                      'bibliographystyle': 'BSTINPUTS',
 165                      'usepackage': 'TEXINPUTS'}
 166     env_variables = SCons.Util.unique(keyword_paths.values())
 167
 168     def __init__(self, name, suffixes, graphics_extensions, *args, **kw):
 169
 170         # We have to include \n with the % we exclude from the first part
 171         # part of the regex because the expression is compiled with re.M.
 172         # Without the \n,  the ^ could match the beginning of a *previous*
 173         # line followed by one or more newline characters (i.e. blank
 174         # lines), interfering with a match on the next line.
 175         regex = r'^[^%\n]*\\(include|includegraphics(?:\[[^\]]+\])?|input|bibliography|usepackage){([^}]*)}'
 176         self.cre = re.compile(regex, re.M)
 177         self.graphics_extensions = graphics_extensions
 178
 179         def _scan(node, env, path=(), self=self):
 180             node = node.rfile()
 181             if not node.exists():
 182                 return []
 183             return self.scan(node, path)
 184
 185         class FindMultiPathDirs:
 186             """The stock FindPathDirs function has the wrong granularity:
 187             it is called once per target, while we need the path that depends
 188             on what kind of included files is being searched. This wrapper
 189             hides multiple instances of FindPathDirs, one per the LaTeX path
 190             variable in the environment. When invoked, the function calculates
 191             and returns all the required paths as a dictionary (converted into
 192             a tuple to become hashable). Then the scan function converts it
 193             back and uses a dictionary of tuples rather than a single tuple
 194             of paths.
 195             """
 196             def __init__(self, dictionary):
 197                 self.dictionary = {}
 198                 for k,n in dictionary.items():
 199                     self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n),
 200                                            FindENVPathDirs(n) )
 201
 202             def __call__(self, env, dir=None, target=None, source=None,
 203                                     argument=None):
 204                 di = {}
 205                 for k,(c,cENV)  in self.dictionary.items():
 206                     di[k] = ( c(env, dir=None, target=None, source=None,
 207                                    argument=None) ,
 208                               cENV(env, dir=None, target=None, source=None,
 209                                    argument=None) )
 210                 # To prevent "dict is not hashable error"
 211                 return tuple(di.items())
 212
 213         class LaTeXScanCheck:
 214             """Skip all but LaTeX source files, i.e., do not scan *.eps,
 215             *.pdf, *.jpg, etc.
 216             """
 217             def __init__(self, suffixes):
 218                 self.suffixes = suffixes
 219             def __call__(self, node, env):
 220                 current = not node.has_builder() or node.is_up_to_date()
 221                 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0]
 222                 # Returning false means that the file is not scanned.
 223                 return scannable and current
 224
 225         kw['function'] = _scan
 226         kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths)
 227         kw['recursive'] = 1
 228         kw['skeys'] = suffixes
 229         kw['scan_check'] = LaTeXScanCheck(suffixes)
 230         kw['name'] = name
 231
 232         apply(SCons.Scanner.Base.__init__, (self,) + args, kw)
 233
 234     def _latex_names(self, include):
 235         filename = include[1]
 236         if include[0] == 'input':
 237             base, ext = os.path.splitext( filename )
 238             if ext == "":
 239                 return [filename + '.tex']
 240         if (include[0] == 'include'):
 241             return [filename + '.tex']
 242         if include[0] == 'bibliography':
 243             base, ext = os.path.splitext( filename )
 244             if ext == "":
 245                 return [filename + '.bib']
 246         if include[0] == 'usepackage':
 247             base, ext = os.path.splitext( filename )
 248             if ext == "":
 249                 return [filename + '.sty']
 250         if include[0] == 'includegraphics':
 251             base, ext = os.path.splitext( filename )
 252             if ext == "":
 253                 #TODO(1.5) return [filename + e for e in self.graphics_extensions]
 254                 return map(lambda e, f=filename: f+e, self.graphics_extensions)
 255         return [filename]
 256
 257     def sort_key(self, include):
 258         return SCons.Node.FS._my_normcase(str(include))
 259
 260     def find_include(self, include, source_dir, path):
 261         try:
 262             sub_path = path[include[0]]
 263         except (IndexError, KeyError):
 264             sub_path = ()
 265         try_names = self._latex_names(include)
 266         for n in try_names:
 267             # see if we find it using the path in env[var]
 268             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0])
 269             if i:
 270                 return i, include
 271             # see if we find it using the path in env['ENV'][var]
 272             i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1])
 273             if i:
 274                 return i, include
 275         return i, include
 276
 277     def scan(self, node, path=()):
 278         # Modify the default scan function to allow for the regular
 279         # expression to return a comma separated list of file names
 280         # as can be the case with the bibliography keyword.
 281
 282         # Cache the includes list in node so we only scan it once:
 283         path_dict = dict(list(path))
 284         noopt_cre = re.compile('\[.*$')
 285         if node.includes != None:
 286             includes = node.includes
 287         else:
 288             includes = self.cre.findall(node.get_contents())
 289             # 1. Split comma-separated lines, e.g.
 290             #      ('bibliography', 'phys,comp')
 291             #    should become two entries
 292             #      ('bibliography', 'phys')
 293             #      ('bibliography', 'comp')
 294             # 2. Remove the options, e.g., such as
 295             #      ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps')
 296             #    should become
 297             #      ('includegraphics', 'picture.eps')
 298             split_includes = []
 299             for include in includes:
 300                 inc_type = noopt_cre.sub('', include[0])
 301                 inc_list = string.split(include[1],',')
 302                 for j in range(len(inc_list)):
 303                     split_includes.append( (inc_type, inc_list[j]) )
 304             #
 305             includes = split_includes
 306             node.includes = includes
 307
 308         # This is a hand-coded DSU (decorate-sort-undecorate, or
 309         # Schwartzian transform) pattern.  The sort key is the raw name
 310         # of the file as specifed on the \include, \input, etc. line.
 311         # TODO: what about the comment in the original Classic scanner:
 312         # """which lets
 313         # us keep the sort order constant regardless of whether the file
 314         # is actually found in a Repository or locally."""
 315         nodes = []
 316         source_dir = node.get_dir()
 317         for include in includes:
 318             #
 319             # Handle multiple filenames in include[1]
 320             #
 321             n, i = self.find_include(include, source_dir, path_dict)
 322             if n is None:
 323                 # Do not bother with 'usepackage' warnings, as they most
 324                 # likely refer to system-level files
 325                 if include[0] != 'usepackage':
 326                     SCons.Warnings.warn(SCons.Warnings.DependencyWarning,
 327                                         "No dependency generated for file: %s (included from: %s) -- file not found" % (i, node))
 328             else:
 329                 sortkey = self.sort_key(n)
 330                 nodes.append((sortkey, n))
 331         #
 332         nodes.sort()
 333         nodes = map(lambda pair: pair[1], nodes)
 334         return nodes