#!/usr/bin/env python # txt2tags - generic text conversion tool # http://txt2tags.org # # Copyright 2001-2014 Aurelio Jargas # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # License: http://www.gnu.org/licenses/gpl-2.0.txt # Subversion: http://svn.txt2tags.org # Bug tracker: http://bugs.txt2tags.org # ######################################################################## # # BORING CODE EXPLANATION AHEAD # # Just read it if you wish to understand how the txt2tags code works. # ######################################################################## # # The code that [1] parses the marked text is separated from the # code that [2] insert the target tags. # # [1] made by: def convert() # [2] made by: class BlockMaster # # The structures of the marked text are identified and its contents are # extracted into a data holder (Python lists and dictionaries). # # When parsing the source file, the blocks (para, lists, quote, table) # are opened with BlockMaster, right when found. Then its contents, # which spans on several lines, are feeded into a special holder on the # BlockMaster instance. Just when the block is closed, the target tags # are inserted for the full block as a whole, in one pass. This way, we # have a better control on blocks. Much better than the previous line by # line approach. # # In other words, whenever inside a block, the parser *holds* the tag # insertion process, waiting until the full block is read. That was # needed primary to close paragraphs for the XHTML target, but # proved to be a very good adding, improving many other processing. # # ------------------------------------------------------------------- # # These important classes are all documented: # CommandLine, SourceDocument, ConfigMaster, ConfigLines. # # There is a RAW Config format and all kind of configuration is first # converted to this format. Then a generic method parses it. # # These functions get information about the input file(s) and take # care of the init processing: # get_infiles_config(), process_source_file() and convert_this_files() # ######################################################################## #XXX Python coding warning # Avoid common mistakes: # - do NOT use newlist=list instead newlist=list[:] # - do NOT use newdic=dic instead newdic=dic.copy() # - do NOT use dic[key] instead dic.get(key) # - do NOT use del dic[key] without key in dic before #XXX Smart Image Align don't work if the image is a link # Can't fix that because the image is expanded together with the # link, at the linkbank filling moment. Only the image is passed # to parse_images(), not the full line, so it is always 'middle'. #XXX Paragraph separation not valid inside Quote # Quote will not have

inside, instead will close and open # again the
. This really sux in CSS, when defining a # different background color. Still don't know how to fix it. #XXX TODO (maybe) # New mark or macro which expands to an anchor full title. # It is necessary to parse the full document in this order: # DONE 1st scan: HEAD: get all settings, including %!includeconf # DONE 2nd scan: BODY: expand includes & apply %!preproc # 3rd scan: BODY: read titles and compose TOC info # 4th scan: BODY: full parsing, expanding [#anchor] 1st # Steps 2 and 3 can be made together, with no tag adding. # Two complete body scans will be *slow*, don't know if it worths. # One solution may be add the titles as postproc rules ############################################################################## # User config (1=ON, 0=OFF) USE_I18N = 1 # use gettext for i18ned messages? (default is 1) COLOR_DEBUG = 1 # show debug messages in colors? (default is 1) BG_LIGHT = 0 # your terminal background color is light (default is 0) HTML_LOWER = 0 # use lowercased HTML tags instead upper? (default is 0) ############################################################################## # These are all the core Python modules used by txt2tags (KISS!) import re import os import sys import locale import time # %%date, %%mtime import getopt import textwrap import csv import struct import unicodedata import base64 # embedImage() import shlex # CommandLine.tokenize() # import urllib # read remote files (URLs) -- postponed, see issue 96 # import email # %%mtime for remote files -- postponed, see issue 96 try: import targets except ImportError: targets = None TARGETS_LIST = [] # Program information my_url = 'http://txt2tags.org' my_name = 'txt2tags' my_email = 'verde@aurelio.net' my_revision = '$Revision$' # automatic, from SVN my_version = '2.6' # Add SVN revision number to version: 1.2.345 my_version = '%s.%s' % (my_version, re.sub(r'\D', '', my_revision)) # i18n - just use if available if USE_I18N: try: import gettext # If your locale dir is different, change it here cat = gettext.Catalog('txt2tags', localedir='/usr/share/locale/') _ = cat.gettext except: _ = lambda x: x else: _ = lambda x: x # FLAGS : the conversion related flags , may be used in %!options # OPTIONS : the conversion related options, may be used in %!options # ACTIONS : the other behavior modifiers, valid on command line only # MACROS : the valid macros with their default values for formatting # SETTINGS: global miscellaneous settings, valid on RC file only # NO_TARGET: actions that don't require a target specification # NO_MULTI_INPUT: actions that don't accept more than one input file # CONFIG_KEYWORDS: the valid %!key:val keywords # # FLAGS and OPTIONS are configs that affect the converted document. # They usually have also a --no-