%(HEADER1)s <author>%(HEADER2)s <date>%(HEADER3)s """, 'html': """\ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <HTML> <HEAD> <META NAME="generator" CONTENT="http://txt2tags.org"> <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=%(ENCODING)s"> <LINK REL="stylesheet" TYPE="text/css" HREF="%(STYLE)s"> <TITLE>%(HEADER1)s

%(HEADER1)s

%(HEADER2)s
%(HEADER3)s """, 'htmlcss': """\ %(HEADER1)s """, # HTML5 reference code: # https://github.com/h5bp/html5-boilerplate/blob/master/index.html # https://github.com/murtaugh/HTML5-Reset/blob/master/index.html 'html5': """\ %(HEADER1)s

%(HEADER1)s

%(HEADER2)s

%(HEADER3)s

""", 'html5css': """\ %(HEADER1)s

%(HEADER1)s

%(HEADER2)s

%(HEADER3)s

""", 'htmls': """\ %(HEADER1)s

""", 'xhtml': """\ %(HEADER1)s

%(HEADER1)s

%(HEADER2)s

%(HEADER3)s

""", 'xhtmlcss': """\ %(HEADER1)s """, 'xhtmls': """\ %(HEADER1)s

%(HEADER1)s

%(HEADER2)s

%(HEADER3)s

""", 'xhtmlscss': """\ %(HEADER1)s """, 'dbk': """\

%(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'man': """\ .TH "%(HEADER1)s" 1 "%(HEADER3)s" "%(HEADER2)s" """, 'utmac': """\ .DT "%(HEADER1)s" .DA "%(HEADER2)s" .DI "%(HEADER3)s" .H1 "%(HEADER1)s" .H* "%(HEADER2)s" . .\\" txt2tags shortcuts .ds url \\W'\\\\$2'\\\\$1\\W .ds mail \\W'mailto:\\\\$2'\\\\$1\\W .ds underl \\Z'\\\\$*'\\v'.25m'\\l"\\w'\\\\$*'u"\\v'-.25m' .ds strike \\Z'\\\\$*'\\v'-.25m'\\l"\w'\\\\$*'u"\\v'.25m' .\\"ds underl \\X'SetColor blue'\\\\$1\\X'SetColor black' .\\"ds strike \\X'SetColor red'\\\\$1\\X'SetColor black' .\ """, # TODO style to

'pm6': """\

<@Normal=

><@Bullet=<@-PARENT "Normal"> ><@PreFormat=<@-PARENT "Normal"> ><@Title1=<@-PARENT "Normal"> ><@Title2=<@-PARENT "Title1"> ><@Title3=<@-PARENT "Title1"> ><@Title4=<@-PARENT "Title3"> ><@Title5=<@-PARENT "Title3"> ><@Quote=<@-PARENT "Normal">> %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'vimwiki': """\ %%title %(HEADER1)s ## by %(HEADER2)s in %(HEADER3)s %%toc %(HEADER1)s """, 'mgp': """\ #!/usr/X11R6/bin/mgp -t 90 %%deffont "normal" xfont "utopia-medium-r", charset "iso8859-1" %%deffont "normal-i" xfont "utopia-medium-i", charset "iso8859-1" %%deffont "normal-b" xfont "utopia-bold-r" , charset "iso8859-1" %%deffont "normal-bi" xfont "utopia-bold-i" , charset "iso8859-1" %%deffont "mono" xfont "courier-medium-r", charset "iso8859-1" %%default 1 size 5 %%default 2 size 8, fore "yellow", font "normal-b", center %%default 3 size 5, fore "white", font "normal", left, prefix " " %%tab 1 size 4, vgap 30, prefix " ", icon arc "red" 40, leftfill %%tab 2 prefix " ", icon arc "orange" 40, leftfill %%tab 3 prefix " ", icon arc "brown" 40, leftfill %%tab 4 prefix " ", icon arc "darkmagenta" 40, leftfill %%tab 5 prefix " ", icon arc "magenta" 40, leftfill %%%%------------------------- end of headers ----------------------------- %%page %%size 10, center, fore "yellow" %(HEADER1)s %%font "normal-i", size 6, fore "white", center %(HEADER2)s %%font "mono", size 7, center %(HEADER3)s """, 'moin': """\ '''%(HEADER1)s''' ''%(HEADER2)s'' %(HEADER3)s """, 'gwiki': """\ *%(HEADER1)s* %(HEADER2)s _%(HEADER3)s_ """, 'adoc': """\ = %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'doku': """\ ===== %(HEADER1)s ===== **//%(HEADER2)s//** //%(HEADER3)s// """, 'pmw': """\ (:Title %(HEADER1)s:) (:Description %(HEADER2)s:) (:Summary %(HEADER3)s:) """, 'wiki': """\ '''%(HEADER1)s''' %(HEADER2)s ''%(HEADER3)s'' """, 'red': """\ h1. %(HEADER1)s Author: %(HEADER2)s Date: %(HEADER3)s """, 'tex': \ r"""\documentclass{article} \usepackage{graphicx} \usepackage{paralist} %% needed for compact lists \usepackage[normalem]{ulem} %% needed by strike \usepackage[urlcolor=blue,colorlinks=true]{hyperref} \usepackage[%(ENCODING)s]{inputenc} %% char encoding \usepackage{%(STYLE)s} %% user defined \title{%(HEADER1)s} \author{%(HEADER2)s} \begin{document} \date{%(HEADER3)s} \maketitle \clearpage """, 'texs': \ r"""\documentclass{article} \usepackage{graphicx} \usepackage[urlcolor=black,colorlinks=true]{hyperref} \usepackage[%(ENCODING)s]{inputenc} %% char encoding \usepackage{%(STYLE)s} %% user defined \begin{document} """, 'lout': """\ @SysInclude { doc } @SysInclude { tbl } @Document @InitialFont { Times Base 12p } # Times, Courier, Helvetica, ... @PageOrientation { Portrait } # Portrait, Landscape @ColumnNumber { 1 } # Number of columns (2, 3, ...) @PageHeaders { Simple } # None, Simple, Titles, NoTitles @InitialLanguage { English } # German, French, Portuguese, ... @OptimizePages { Yes } # Yes/No smart page break feature // @Text @Begin @Display @Heading { %(HEADER1)s } @Display @I { %(HEADER2)s } @Display { %(HEADER3)s } #@NP # Break page after Headers """, # @SysInclude { tbl } # Tables support # setup: @MakeContents { Yes } # show TOC # setup: @SectionGap # break page at each section 'creole': """\ %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'md': """\ %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'bbcode': """\ %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'spip': """\ {{{%(HEADER1)s}}} {{%(HEADER2)s}} {%(HEADER3)s} """, 'rtf': \ r"""{\rtf1\ansi\ansicpg1252\deff0 {\fonttbl {\f0\froman Times;} {\f1\fswiss Arial;} {\f2\fmodern Courier;} } {\colortbl;\red0\green0\blue255;} {\stylesheet {\s1\sbasedon222\snext1\f0\fs24\cf0 Normal;} {\s2\sbasedon1\snext2{\*\txttags paragraph}\f0\fs24\qj\sb0\sa0\sl480\slmult1\li0\ri0\fi360 Body Text;} {\s3\sbasedon2\snext3{\*\txttags verbatim}\f2\fs20\ql\sb0\sa240\sl240\slmult1\li720\ri720\fi0 Verbatim;} {\s4\sbasedon2\snext4{\*\txttags quote}\f0\fs24\qj\sb0\sa0\sl480\slmult1\li720\ri720\fi0 Block Quote;} {\s10\sbasedon1\snext10\keepn{\*\txttags maintitle}\f1\fs24\qc\sb0\sa0\sl480\slmult1\li0\ri0\fi0 Title;} {\s11\sbasedon1\snext2\keepn{\*\txttags title1}\f1\fs24\qc\sb240\sa240\sl480\slmult1\li0\ri0\fi0\b Heading 1;} {\s12\sbasedon11\snext2\keepn{\*\txttags title2}\f1\fs24\ql\sb240\sa240\sl480\slmult1\li0\ri0\fi0\b Heading 2;} {\s13\sbasedon11\snext2\keepn{\*\txttags title3}\f1\fs24\ql\sb240\sa240\sl480\slmult1\li360\ri0\fi0\b Heading 3;} {\s14\sbasedon11\snext2\keepn{\*\txttags title4}\f1\fs24\ql\sb240\sa240\sl480\slmult1\li360\ri0\fi0\b\i Heading 4;} {\s15\sbasedon11\snext2\keepn{\*\txttags title5}\f1\fs24\ql\sb240\sa240\sl480\slmult1\li360\ri0\fi0\i Heading 5;} {\s21\sbasedon2\snext21{\*\txttags list}\f0\fs24\qj\sb0\sa0\sl480\slmult1{\*\txttags list indent}\li720\ri0\fi-360 List;} } {\*\listtable {\list\listtemplateid1 {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li720\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li1080\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li1440\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li1800\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li2160\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li2520\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li2880\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li3240\ri0\fi-360} {\listlevel\levelnfc23\leveljc0\levelstartat1\levelfollow0{\leveltext \'01\'95;}{\levelnumbers;}{\*\txttags list indent}\li3600\ri0\fi-360} \listid1} {\list\listtemplateid2 {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'00.;}{\levelnumbers\'01;}{\*\txttags list indent}\li720\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'01.;}{\levelnumbers\'01;}{\*\txttags list indent}\li1080\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'02.;}{\levelnumbers\'01;}{\*\txttags list indent}\li1440\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'03.;}{\levelnumbers\'01;}{\*\txttags list indent}\li1800\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'04.;}{\levelnumbers\'01;}{\*\txttags list indent}\li2160\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'05.;}{\levelnumbers\'01;}{\*\txttags list indent}\li2520\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'06.;}{\levelnumbers\'01;}{\*\txttags list indent}\li2880\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'07.;}{\levelnumbers\'01;}{\*\txttags list indent}\li3240\ri0\fi-360} {\listlevel\levelnfc0\leveljc0\levelstartat1\levelfollow0{\leveltext \'02\'08.;}{\levelnumbers\'01;}{\*\txttags list indent}\li3600\ri0\fi-360} \listid2} {\list\listtemplateid3 {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'02\'00.;}{\levelnumbers\'01;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'04\'00.\'01.;}{\levelnumbers\'01\'03;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'06\'00.\'01.\'02.;}{\levelnumbers\'01\'03\'05;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'08\'00.\'01.\'02.\'03.;}{\levelnumbers\'01\'03\'05\'07;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'10\'00.\'01.\'02.\'03.\'04.;}{\levelnumbers\'01\'03\'05\'07\'09;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'02\'05.;}{\levelnumbers\'01;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'02\'06.;}{\levelnumbers\'01;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow1{\leveltext \'02\'07.;}{\levelnumbers\'01;}} {\listlevel\levelnfc0\leveljc1\levelstartat1\levelfollow0{\leveltext \'02\'08.;}{\levelnumbers\'01;}} \listid3} } {\listoverridetable {\listoverride\listid1\listoverridecount0\ls1} {\listoverride\listid2\listoverridecount0\ls2} {\listoverride\listid3\listoverridecount0\ls3} } {\info {\title %(HEADER1)s } {\author %(HEADER2)s } } \deflang1033\widowctrl\hyphauto\uc1\fromtext \paperw12240\paperh15840 \margl1440\margr1440\margt1440\margb1440 \sectd {\header\pard\qr\plain\f0 Page \chpgn\par} {\pard\plain\s10\keepn{\*\txttags maintitle}\f1\fs24\qc\sb2880\sa0\sl480\slmult1\li0\ri0\fi0 %(HEADER1)s\par} {\pard\plain\s10\keepn{\*\txttags maintitle}\f1\fs24\qc\sb0\sa0\sl480\slmult1\li0\ri0\fi0 %(HEADER2)s\par} {\pard\plain\s10\keepn{\*\txttags maintitle}\f1\fs24\qc\sb0\sa0\sl480\slmult1\li0\ri0\fi0 %(HEADER3)s\par} """, 'wp': """\ %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'wpcss': """\ %(HEADER1)s %(HEADER2)s %(HEADER3)s """, 'tml': """\ ---+!! %(HEADER1)s *%(HEADER2)s* %%BR%% __%(HEADER3)s__ """, ## MOM ## # # "mom" is a sort of "LaTeX" for groff and has a lot of macro # commands and variables to customize for specific needs. # These few lines of commands are sufficient anyway for a good # postscript typesetted document (and so also pdf): the author # of "mom" is a professional typographer so the typesetting # defaults are pleasant and sane. See mom's author site: # http://www.schaffter.ca/mom/mom-01.html that's a good # example of documentation too! # NB: \# are commented lines in groff. # I put here a lot of options, commented or not, to let you # see the possibilities but there many more... # NB: use "-k" option for groff if input/output is UTF-8 # # usage: groff -k -m mom sample.mom > sample.ps # 'mom': """\ \# Cover and title .TITLE "%(HEADER1)s" .AUTHOR "%(HEADER2)s" \#.DOCTITLE \" ONLY to collate different files (sections, chapters etc.) .SUBTITLE "%(HEADER3)s" \# \# printstyle: typeset or typewrite it's MANDATORY! .PRINTSTYLE TYPESET \#.PRINTSTYLE TYPEWRITE \# \# doctype: default, chapter, user-defined, letter (commented is "default") \#.DOCTYPE DEFAULT \# \# copystyle: draft or final .COPYSTYLE FINAL \#.COPYSTYLE DRAFT \# \# Default values for some strings \# They're valid in every printstyle or copystyle \# Here are MY defaults (italian) \# For a more general use I think they should be groff commented \# \#.CHAPTER_STRING "Capitolo" \#.ATTRIBUTE_STRING "di" \#.TOC_HEADER_STRING "Indice" \#.ENDNOTE_TITLE "Note" \# \# section break char "#" for 1 time (LINEBREAK) \#.LINEBREAK_CHAR # 1 \# a null end string .FINIS_STRING "" \# \# Typesetting values \# These are all MY preferences! Comment out for default. \# .PAPER A4 \# Left margin (c=centimeters) \#.L_MARGIN 2.8c \# Length of line (it's for 62 chars a line for point size 12 in typewrite style) \#.LL 15.75c \# Palatino groff font, better than Times for reading. IMHO .FAMILY P .PT_SIZE 12 \# line spacing .LS 18 \# left aligned (mom macro defaults to "both aligned") .QUAD L \# No hyphenation .HY OFF \# Header and footer sizes .HEADER_SIZE -1 .FOOTER_SIZE -1 .PAGENUM_SIZE -2 \# \# Other options \# \# Indent space for "quote" and "blockquote" (defaults are good too!) \#.QUOTE_INDENT 2 \#.BLOCKQUOTE_INDENT 2 \# \# Footnotes \# \# Next gives you superscript numbers (use STAR for symbols, it's default) \# use additional argument NO_SUPERSCRIPT for typewrite printstyle \#.FOOTNOTE_MARKER_STYLE NUMBER \# Cover title at about 1/3 from top \#.DOCHEADER_ADVANCE 7.5c \# \# Double quotes italian style! aka << and >> It works only for "typeset" printstyle \#.SMARTQUOTES IT \# Next cmd is MANDATORY. .START """, } for target in TARGETS_LIST: HEADER_TEMPLATE[target] = getattr(getattr(targets, target), 'HEADER', '') HEADER_TEMPLATE[target + 'css'] = getattr(getattr(targets, target), 'HEADERCSS', '') # Generated files are easier to edit with the DZSlides engine at the end, but breaks W3C validator. AAPW_FOOT = """\ """ ############################################################################## def getTags(config): "Returns all the known tags for the specified target" keys = """ title1 numtitle1 title2 numtitle2 title3 numtitle3 title4 numtitle4 title5 numtitle5 title1Open title1Close title2Open title2Close title3Open title3Close title4Open title4Close title5Open title5Close blocktitle1Open blocktitle1Close blocktitle2Open blocktitle2Close blocktitle3Open blocktitle3Close paragraphOpen paragraphClose blockVerbOpen blockVerbClose blockVerbLine blockQuoteOpen blockQuoteClose blockQuoteLine blockVerbSep blockCommentOpen blockCommentClose fontMonoOpen fontMonoClose fontBoldOpen fontBoldClose fontItalicOpen fontItalicClose fontUnderlineOpen fontUnderlineClose fontStrikeOpen fontStrikeClose listOpen listClose listOpenCompact listCloseCompact listItemOpen listItemClose listItemLine numlistOpen numlistClose numlistOpenCompact numlistCloseCompact numlistItemOpen numlistItemClose numlistItemLine deflistOpen deflistClose deflistOpenCompact deflistCloseCompact deflistItem1Open deflistItem1Close deflistItem2Open deflistItem2Close deflistItem2LinePrefix bar1 bar2 url urlMark urlMarkAnchor urlImg email emailMark img imgAlignLeft imgAlignRight imgAlignCenter _imgAlignLeft _imgAlignRight _imgAlignCenter tableOpen tableClose _tableBorder _tableAlignLeft _tableAlignCenter tableRowOpen tableRowClose tableRowSep tableTitleRowOpen tableTitleRowClose tableCellOpen tableCellClose tableCellSep tableTitleCellOpen tableTitleCellClose tableTitleCellSep _tableColAlignLeft _tableColAlignRight _tableColAlignCenter tableCellAlignLeft tableCellAlignRight tableCellAlignCenter _tableCellAlignLeft _tableCellAlignRight _tableCellAlignCenter _tableCellAlignLeftBorder _tableCellAlignRightBorder _tableCellAlignCenterBorder _tableCellColSpan tableColAlignSep _tableCellColSpanChar tableCellCovered _tableCellBorder _tableCellMulticolOpen _tableCellMulticolClose tableCellHead tableTitleCellHead bodyOpen bodyClose cssOpen cssClose tocOpen tocClose TOC anchor comment pageBreak EOD """.split() # TIP: \a represents the current text inside the mark # TIP: ~A~, ~B~ and ~C~ are expanded to other tags parts alltags = { 'aat': { 'title1' : '\a' , 'title2' : '\a' , 'title3' : '\a' , 'title4' : '\a' , 'title5' : '\a' , 'blockQuoteLine' : AA['quote'] , 'listItemOpen' : AA['bullet'] + ' ' , 'numlistItemOpen' : '\a. ' , 'bar1' : aa_line(AA['bar1'], config['width']), 'bar2' : aa_line(AA['bar2'], config['width']), 'url' : '\a' , 'urlMark' : '\a[\a]' , 'email' : '\a' , 'emailMark' : '\a[\a]' , 'img' : '[\a]' , 'imgEmbed' : '\a' , 'fontBoldOpen' : '*' , 'fontBoldClose' : '*' , 'fontItalicOpen' : '/' , 'fontItalicClose' : '/' , 'fontUnderlineOpen' : '_' , 'fontUnderlineClose' : '_' , 'fontStrikeOpen' : '-' , 'fontStrikeClose' : '-' , }, 'rst': { 'title1' : '\a' , 'title2' : '\a' , 'title3' : '\a' , 'title4' : '\a' , 'title5' : '\a' , 'blockVerbOpen' : '::\n' , 'blockQuoteLine' : ' ' , 'listItemOpen' : RST['bullet'] + ' ' , 'numlistItemOpen' : '\a. ' , 'bar1' : aa_line(RST['bar1'], 10) , 'url' : '\a' , 'urlMark' : '`\a <\a>`_' , 'email' : '\a' , 'emailMark' : '`\a <\a>`_' , 'img' : '\n\n.. image:: \a\n :align: ~A~\n\nENDIMG', 'urlImg' : '\n :target: ' , '_imgAlignLeft' : 'left' , '_imgAlignCenter' : 'center' , '_imgAlignRight' : 'right' , 'fontMonoOpen' : '``' , 'fontMonoClose' : '``' , 'fontBoldOpen' : '**' , 'fontBoldClose' : '**' , 'fontItalicOpen' : '*' , 'fontItalicClose' : '*' , 'comment' : '.. \a' , 'TOC' : '\n.. contents::' , }, 'txt': { 'title1' : ' \a' , 'title2' : '\t\a' , 'title3' : '\t\t\a' , 'title4' : '\t\t\t\a' , 'title5' : '\t\t\t\t\a', 'blockQuoteLine' : '\t' , 'listItemOpen' : '- ' , 'numlistItemOpen' : '\a. ' , 'bar1' : '\a' , 'url' : '\a' , 'urlMark' : '\a (\a)' , 'email' : '\a' , 'emailMark' : '\a (\a)' , 'img' : '[\a]' , }, 'csv': { 'tableCellSep' : CSV['separator'] , 'tableCellOpen' : CSV.get('quotechar') or '' , 'tableCellClose' : CSV.get('quotechar') or '' , }, 'csvs': { #TIP csvs inherits all csv tags }, 'db': { }, 'txt2t': { 'title1' : ' = \a =~A~' , 'title2' : ' == \a ==~A~' , 'title3' : ' === \a ===~A~' , 'title4' : ' ==== \a ====~A~' , 'title5' : ' ===== \a =====~A~' , 'numtitle1' : ' + \a +~A~' , 'numtitle2' : ' ++ \a ++~A~' , 'numtitle3' : ' +++ \a +++~A~' , 'numtitle4' : ' ++++ \a ++++~A~' , 'numtitle5' : ' +++++ \a +++++~A~' , 'anchor' : '[\a]', 'blockVerbOpen' : '```' , 'blockVerbClose' : '```' , 'blockQuoteLine' : '\t' , 'blockCommentOpen' : '%%%' , 'blockCommentClose' : '%%%' , 'fontMonoOpen' : '``' , 'fontMonoClose' : '``' , 'fontBoldOpen' : '**' , 'fontBoldClose' : '**' , 'fontItalicOpen' : '//' , 'fontItalicClose' : '//' , 'fontUnderlineOpen' : '__' , 'fontUnderlineClose' : '__' , 'fontStrikeOpen' : '--' , 'fontStrikeClose' : '--' , 'listItemOpen' : '- ' , 'numlistItemOpen' : '+ ' , 'deflistItem1Open' : ': ' , 'listClose': '-', 'numlistClose': '+', 'deflistClose': ':', 'bar1' : '-------------------------' , 'bar2' : '=========================' , 'url' : '\a' , 'urlMark' : '[\a \a]' , #'urlMarkAnchor' : '' , 'email' : '\a' , 'emailMark' : '[\a \a]' , 'img' : '[\a]' , '_tableBorder' : '|' , '_tableAlignLeft' : '' , '_tableAlignCenter' : ' ' , 'tableRowOpen' : '~A~' , 'tableRowClose' : '~B~' , # 'tableRowSep' : '' , 'tableTitleRowOpen' : '~A~|' , 'tableCellOpen' : '| ' , 'tableCellClose' : ' ~S~' , # 'tableCellSep' : '' , 'tableCellAlignLeft' : '\a ' , 'tableCellAlignRight' : ' \a' , 'tableCellAlignCenter' : ' \a ' , # '_tableCellColSpan' : '' , '_tableCellColSpanChar' : '|' , 'comment' : '% \a' , }, 'ods': { 'tableOpen' : '', 'tableClose' : '' , 'tableRowOpen' : '' , 'tableRowClose' : '' , 'tableCellOpen' : '' , 'tableCellClose' : '' , 'tableTitleCellOpen' : '', 'tableTitleCellClose' : '', 'tableCellCovered' : '' , '_tableCellAlignCenter': ' table:style-name="ce1"', '_tableCellAlignRight' : ' table:style-name="ce2"', '_tableCellAlignLeftBorder' : ' table:style-name="ce3"', '_tableCellAlignCenterBorder': ' table:style-name="ce4"', '_tableCellAlignRightBorder' : ' table:style-name="ce5"', '_tableCellColSpan' : ' table:number-columns-spanned="\a"', 'EOD' : '', }, 'html': { 'paragraphOpen' : '

' , 'paragraphClose' : '

' , 'title1' : '\a' , 'title2' : '\a' , 'title3' : '\a' , 'title4' : '\a' , 'title5' : '\a' , 'anchor' : ' ID="\a"', 'blockVerbOpen' : '
' , 'blockVerbClose' : '
' , 'blockQuoteOpen' : '
' , 'blockQuoteClose' : '
' , 'fontMonoOpen' : '' , 'fontMonoClose' : '' , 'fontBoldOpen' : '' , 'fontBoldClose' : '' , 'fontItalicOpen' : '' , 'fontItalicClose' : '' , 'fontUnderlineOpen' : '' , 'fontUnderlineClose' : '' , 'fontStrikeOpen' : '~~' , 'fontStrikeClose' : '~~' , 'listOpen' : '
' , 'listClose' : '
' , 'listItemOpen' : '
' , 'numlistOpen' : '
' , 'numlistClose' : '
' , 'numlistItemOpen' : '
' , 'deflistOpen' : '
' , 'deflistClose' : '
' , 'deflistItem1Open' : '
' , 'deflistItem1Close' : '
' , 'deflistItem2Open' : '
' , 'bar1' : '
' , 'bar2' : '
' , 'url' : '\a' , 'urlMark' : '\a' , 'email' : '\a' , 'emailMark' : '\a' , 'img' : '', 'imgEmbed' : '', '_imgAlignLeft' : ' ALIGN="left"' , '_imgAlignCenter' : ' ALIGN="middle"', '_imgAlignRight' : ' ALIGN="right"' , 'tableOpen' : '', 'tableClose' : '' , 'tableRowOpen' : '' , 'tableRowClose' : '' , 'tableCellOpen' : '' , 'tableCellClose' : '' , 'tableTitleCellOpen' : '' , 'tableTitleCellClose' : '' , '_tableBorder' : ' BORDER="1"' , '_tableAlignCenter' : ' ALIGN="center"', '_tableCellAlignRight' : ' ALIGN="right"' , '_tableCellAlignCenter': ' ALIGN="center"', '_tableCellColSpan' : ' COLSPAN="\a"' , 'cssOpen' : '' , 'comment' : '' , 'EOD' : '' }, #TIP wp inherits all HTML tags 'wp': { # Exclusions to let the WordPress code cleaner 'bodyOpen' : '', 'bodyClose' : '', 'paragraphOpen' : '', 'paragraphClose' : '', 'comment' : '', 'EOD' : '', # All list items must be closed 'listItemClose' : '
' , 'numlistItemClose' : '' , 'deflistItem2Close' : '' , # WP likes tags this way 'bar1' : '
', 'bar2' : '
', 'fontBoldOpen' : '' , 'fontBoldClose' : '' , 'fontItalicOpen' : '' , 'fontItalicClose' : '' , }, #TIP xhtml inherits all HTML definitions (lowercased) #TIP http://www.w3.org/TR/xhtml1/#guidelines #TIP http://www.htmlref.com/samples/Chapt17/17_08.htm 'xhtml': { 'listItemClose' : '' , 'numlistItemClose' : '' , 'deflistItem2Close' : '' , 'bar1' : '
', 'bar2' : '
', 'img' : '', 'imgEmbed' : '' }, 'xhtmls': { 'fontBoldOpen' : '' , 'fontBoldClose' : '' , 'fontItalicOpen' : '' , 'fontItalicClose' : '' , 'fontUnderlineOpen' : '', 'fontUnderlineClose' : '' , 'fontStrikeOpen' : '', # use ~~instead ? 'fontStrikeClose' : '~~' , 'listItemClose' : '' , 'numlistItemClose' : '' , 'deflistItem2Close' : '' , 'bar1' : '
', 'bar2' : '
', 'img' : '', 'imgEmbed' : '', '_imgAlignLeft' : 'margin: 0 auto 0 0;' , '_imgAlignCenter' : 'margin: 0 auto 0 auto;', '_imgAlignRight' : 'margin: 0 0 0 auto;' , '_tableAlignCenter' : ' style="margin-left: auto; margin-right: auto;"', '_tableCellAlignRight' : ' style="text-align:right"' , '_tableCellAlignCenter': ' style="text-align:center"', }, 'html5': { 'title1Open' : '\n
\a
' , 'title1Close' : '' , 'title2Open' : '\n
\a
' , 'title2Close' : '' , 'title3Open' : '\n
\a
' , 'title3Close' : '' , 'title4Open' : '\n
\a
' , 'title4Close' : '' , 'title5Open' : '\n
\a
' , 'title5Close' : '' , 'fontBoldOpen' : '' , 'fontBoldClose' : '' , 'fontItalicOpen' : '' , 'fontItalicClose' : '' , 'fontUnderlineOpen' : '', 'fontUnderlineClose' : '' , 'fontStrikeOpen' : '~~' , 'fontStrikeClose' : '~~' , 'listItemClose' : '' , 'numlistItemClose' : '' , 'deflistItem2Close' : '' , 'bar1' : '
' , 'bar2' : '
' , 'img' : '' , 'imgEmbed' : '' , '_imgAlignLeft' : ' class="left"' , '_imgAlignCenter' : ' class="center"', '_imgAlignRight' : ' class="right"' , 'tableOpen' : '' , '_tableBorder' : ' class="tableborder"' , '_tableAlignCenter' : ' style="margin-left: auto; margin-right: auto;"', '_tableCellAlignRight' : ' class="right"' , '_tableCellAlignCenter': ' class="center"', 'cssOpen' : '', 'comment' : '\\# \a' , 'blockCommentOpen' : '.COMMENT' , 'blockCommentClose' : '.COMMENT OFF' , 'TOC' : '.TOC', # NB: it must be the last macro in file! 'EOD' : '.FINIS' }, } for target in TARGETS_LIST: if getattr(getattr(targets, target), 'RULES', {}).get('confdependenttags'): reload(getattr(targets, target)) alltags[target] = getattr(getattr(targets, target), 'TAGS', {}) # Exceptions for --css-sugar if (config['css-sugar'] and config['target'] in ('html', 'xhtml', 'xhtmls')) or config['target'] == 'wp': # Change just HTML because XHTML inherits it htmltags = alltags['html'] # Table with no cellpadding htmltags['tableOpen'] = htmltags['tableOpen'].replace(' CELLPADDING="4"', '') # DIVs htmltags['tocOpen'] = '
' htmltags['tocClose'] = '
' htmltags['bodyOpen'] = '
' htmltags['bodyClose'] = '
' # Make the HTML -> XHTML inheritance xhtml = alltags['html'].copy() for key in xhtml.keys(): xhtml[key] = xhtml[key].lower() # Some like HTML tags as lowercase, some don't... (headers out) if HTML_LOWER: alltags['html'] = xhtml.copy() if config['target'] == 'htmls': alltags['htmls'] = alltags['html5'].copy() if config['target'] == 'texs': alltags['texs'] = alltags['tex'].copy() if config['target'] == 'csvs': alltags['csvs'] = alltags['csv'].copy() if config['target'] in ('xhtml', 'xhtmls', 'html5', 'htmls', 'wp'): xhtml.update(alltags[config['target']]) alltags[config['target']] = xhtml.copy() if config['target'] == 'aat' and config['slides']: alltags['aat']['urlMark'] = alltags['aat']['emailMark'] = '\a (\a)' alltags['aat']['bar1'] = aa_line(AA['bar1'], config['width'] - 2) alltags['aat']['bar2'] = aa_line(AA['bar2'], config['width'] - 2) if not config['chars']: alltags['aat']['listItemOpen'] = '* ' if config['target'] == 'aat' and config['web']: alltags['aat']['url'] = alltags['aat']['urlMark'] = '\a' alltags['aat']['email'] = alltags['aat']['emailMark'] = '\a' alltags['aat']['img'] = '' alltags['aat']['anchor'] = '' alltags['aat']['comment'] = '' for beautifier in ['Bold', 'Italic', 'Underline', 'Strike']: _open, close = 'font' + beautifier + 'Open', 'font' + beautifier + 'Close' alltags['aat'][_open], alltags['aat'][close] = alltags['html'][_open].lower(), alltags['html'][close].lower() # Compose the target tags dictionary tags = {} target_tags = alltags[config['target']].copy() for key in keys: tags[key] = '' # create empty keys for key in target_tags.keys(): tags[key] = maskEscapeChar(target_tags[key]) # populate # Map strong line to pagebreak if rules['mapbar2pagebreak'] and tags['pageBreak']: tags['bar2'] = tags['pageBreak'] # Change img tag if embedding images in RTF if config['embed-images']: if tags.get('imgEmbed'): tags['img'] = tags['imgEmbed'] else: Error(_("Invalid --embed-images option with target '%s'." % config['target'])) # Map strong line to separator if not defined if not tags['bar2'] and tags['bar1']: tags['bar2'] = tags['bar1'] return tags ############################################################################## def getRules(config): "Returns all the target-specific syntax rules" ret = {} allrules = [ # target rules (ON/OFF) 'linkable', # target supports external links 'tableable', # target supports tables 'tableonly', # target computes only the tables 'spread', # target uses the spread.py engine 'spreadgrid', # target adds the reference grid to the sheet 'imglinkable', # target supports images as links 'imgalignable', # target supports image alignment 'imgasdefterm', # target supports image as definition term 'autonumberlist', # target supports numbered lists natively 'autonumbertitle', # target supports numbered titles natively 'stylable', # target supports external style files 'parainsidelist', # lists items supports paragraph 'compactlist', # separate enclosing tags for compact lists 'spacedlistitem', # lists support blank lines between items 'listnotnested', # lists cannot be nested 'listitemnotnested', # list items must be closed before nesting lists 'quotenotnested', # quotes cannot be nested 'verbblocknotescaped', # don't escape specials in verb block 'verbblockfinalescape', # do final escapes in verb block 'escapeurl', # escape special in link URL 'labelbeforelink', # label comes before the link on the tag 'onelinepara', # dump paragraph as a single long line 'onelinequote', # dump quote as a single long line (EXPERIMENTAL) 'notbreaklistitemclose', # do not break line before the list item close tag (EXPERIMENTAL) 'tabletitlerowinbold', # manually bold any cell on table titles 'tablecellstrip', # strip extra spaces from each table cell 'tablecellspannable', # the table cells can have span attribute 'tablecellcovered', # covered cell follows the cell span 'tablecellmulticol', # separate open+close tags for multicol cells 'tablecolumnsnumber', # set the number of columns in place of n_cols in tableOpen 'tablenumber', # set the number of the table in place of n_table in tableOpen 'barinsidequote', # bars are allowed inside quote blocks 'finalescapetitle', # perform final escapes on title lines 'autotocnewpagebefore', # break page before automatic TOC 'autotocnewpageafter', # break page after automatic TOC 'autotocwithbars', # automatic TOC surrounded by bars 'plaintexttoc', # TOC will be plain text (no links) 'mapbar2pagebreak', # map the strong bar to a page break 'titleblocks', # titles must be on open/close section blocks 'listlineafteropen', # put listItemLine after listItemOpen 'escapexmlchars', # escape the XML special chars: < > & 'listlevelzerobased', # list levels start at 0 when encoding into tags 'zerodepthparagraph', # non-nested paras have block depth of 0 instead of 1 'cellspancumulative', # cell span value adds up for each cell of a row 'keepblankheaderline', # template lines are not removed if headers are blank 'confdependenttags', # tags are configuration dependent 'confdependentrules', # rules are configuration dependent # Target code beautify (ON/OFF) 'indentverbblock', # add leading spaces to verb block lines 'breaktablecell', # break lines after any table cell 'breaktablelineopen', # break line after opening table line 'notbreaklistopen', # don't break line after opening a new list 'keepquoteindent', # don't remove the leading TABs on quotes 'keeplistindent', # don't remove the leading spaces on lists 'blankendautotoc', # append a blank line at the auto TOC end 'tagnotindentable', # tags must be placed at the line beginning 'spacedlistitemopen', # append a space after the list item open tag 'spacednumlistitemopen', # append a space after the numlist item open tag 'deflisttextstrip', # strip the contents of the deflist text 'blanksaroundpara', # put a blank line before and after paragraphs 'blanksaroundverb', # put a blank line before and after verb blocks 'blanksaroundquote', # put a blank line before and after quotes 'blanksaroundlist', # put a blank line before and after lists 'blanksaroundnumlist', # put a blank line before and after numlists 'blanksarounddeflist', # put a blank line before and after deflists 'blanksaroundnestedlist', # put a blank line before and after all type of nested lists 'blanksaroundtable', # put a blank line before and after tables 'blanksaroundbar', # put a blank line before and after bars 'blanksaroundtitle', # put a blank line before and after titles 'blanksaroundnumtitle', # put a blank line before and after numtitles 'iswrapped', # wrap with the --width value # Value settings 'listmaxdepth', # maximum depth for lists 'quotemaxdepth', # maximum depth for quotes 'tablecellaligntype', # type of table cell align: cell, column 'blockdepthmultiply', # block depth multiple for encoding 'depthmultiplyplus', # add to block depth before multiplying 'cellspanmultiplier', # cell span is multiplied by this value 'spreadmarkup', # the markup spread engine option: 'txt', 'html' or 'tex' ] rules_bank = { 'txt': { 'indentverbblock': 1, 'spacedlistitem': 1, 'parainsidelist': 1, 'keeplistindent': 1, 'barinsidequote': 1, 'autotocwithbars': 1, 'plaintexttoc': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, 'iswrapped': 1, }, 'txt2t': { 'linkable': 1, 'tableable': 1, 'imglinkable': 1, # 'imgalignable', 'imgasdefterm': 1, 'autonumberlist': 1, 'autonumbertitle': 1, 'stylable': 1, 'spacedlistitem': 1, 'labelbeforelink': 1, 'tablecellstrip': 1, 'tablecellspannable': 1, 'keepblankheaderline': 1, 'barinsidequote': 1, 'keeplistindent': 1, 'blankendautotoc': 1, 'blanksaroundpara': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, 'tablecellaligntype': 'cell', }, 'rst': { 'indentverbblock': 1, 'spacedlistitem': 1, 'parainsidelist': 1, 'keeplistindent': 1, 'barinsidequote': 1, 'imgalignable': 1, 'imglinkable': 1, 'tableable': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, 'blanksaroundnestedlist': 1, }, 'aat': { #TIP art inherits all TXT rules }, 'csv': { 'tableable': 1, 'tableonly': 1, 'tablecellstrip': 1, }, 'csvs': { #TIP csvs inherits all csv rules 'spread': 1, 'spreadmarkup': 'txt', }, 'db': { 'tableable': 1, 'tableonly': 1, }, 'ods': { 'escapexmlchars': 1, 'tableable': 1, 'tableonly': 1, 'tablecellstrip': 1, 'tablecellspannable': 1, 'tablecellcovered': 1, 'tablecellaligntype': 'cell', }, 'html': { 'escapexmlchars': 1, 'indentverbblock': 1, 'linkable': 1, 'stylable': 1, 'escapeurl': 1, 'imglinkable': 1, 'imgalignable': 1, 'imgasdefterm': 1, 'autonumberlist': 1, 'spacedlistitem': 1, 'parainsidelist': 1, 'tableable': 1, 'tablecellstrip': 1, 'breaktablecell': 1, 'breaktablelineopen': 1, 'keeplistindent': 1, 'keepquoteindent': 1, 'barinsidequote': 1, 'autotocwithbars': 1, 'tablecellspannable': 1, 'tablecellaligntype': 'cell', # 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'xhtml': { #TIP xhtml inherits all HTML rules }, 'wp': { #TIP wp inherits all HTML rules 'onelinepara': 1, 'onelinequote': 1, 'tagnotindentable': 1, 'blanksaroundpara': 1, 'quotemaxdepth': 1, 'keepquoteindent': 0, 'keeplistindent': 0, 'notbreaklistitemclose': 1, }, 'xhtmls': { #TIP xhtmls inherits all HTML rules }, 'html5': { #TIP html5 inherits all HTML rules 'titleblocks' : 1, }, 'htmls': { #TIP htmls inherits all HTML rules 'tableonly': 1, 'spread': 1, 'spreadgrid': 1, 'spreadmarkup': 'html', }, 'sgml': { 'escapexmlchars': 1, 'linkable': 1, 'escapeurl': 1, 'autonumberlist': 1, 'spacedlistitem': 1, 'tableable': 1, 'tablecellstrip': 1, 'blankendautotoc': 1, 'keeplistindent': 1, 'keepquoteindent': 1, 'barinsidequote': 1, 'finalescapetitle': 1, 'tablecellaligntype': 'column', 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, 'quotemaxdepth': 1, }, 'dbk': { 'escapexmlchars': 1, 'linkable': 1, 'tableable': 1, 'imglinkable': 1, 'imgalignable': 1, 'imgasdefterm': 1, 'autonumberlist': 1, 'autonumbertitle': 1, 'parainsidelist': 1, 'spacedlistitem': 1, 'titleblocks': 1, 'tablecolumnsnumber': 1, }, 'vimwiki': { 'linkable':1, 'tableable':1, #'spacedlistitem':1, #'tablecellstrip':1, #'autotocwithbars':1, #'spacedlistitemopen':1, #'spacednumlistitemopen':1, #'deflisttextstrip':1, 'autonumberlist':1, 'autonumbertitle':1, 'imgalignable':1, 'keeplistindent':1, 'blanksaroundpara':1, 'blanksaroundverb':1, # 'blanksaroundquote':1, #'blanksaroundlist':1, #'blanksaroundnumlist':1, #'blanksarounddeflist':1, 'blanksaroundtable':1, 'blanksaroundbar':1, 'blanksaroundtitle':1, 'blanksaroundnumtitle':1, }, 'mgp': { 'tagnotindentable': 1, 'spacedlistitem': 1, 'imgalignable': 1, 'autotocnewpagebefore': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'tableable': 1, # 'blanksaroundtitle': 1, # 'blanksaroundnumtitle': 1, }, 'tex': { 'stylable': 1, 'escapeurl': 1, 'autonumberlist': 1, 'autonumbertitle': 1, 'spacedlistitem': 1, 'compactlist': 1, 'parainsidelist': 1, 'tableable': 1, 'tablecellstrip': 1, 'tabletitlerowinbold': 1, 'verbblocknotescaped': 1, 'keeplistindent': 1, 'listmaxdepth': 4, # deflist is 6 'quotemaxdepth': 6, 'barinsidequote': 1, 'finalescapetitle': 1, 'autotocnewpageafter': 1, 'mapbar2pagebreak': 1, 'tablecellaligntype': 'column', 'tablecellmulticol': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'texs': { #TIP texs inherits all tex rules 'tableonly': 1, 'spread': 1, 'spreadgrid': 1, 'spreadmarkup': 'tex', }, 'lout': { 'tableable': 1, 'tablecolumnsnumber': 1, 'tablecellspannable': 1, 'tablecellaligntype': 'cell', 'tablecellstrip': 1, 'breaktablecell': 1, 'keepquoteindent': 1, 'deflisttextstrip': 1, 'escapeurl': 1, 'verbblocknotescaped': 1, 'imgalignable': 1, #'mapbar2pagebreak': 1, 'titleblocks': 1, 'autonumberlist': 1, 'parainsidelist': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'moin': { 'spacedlistitem': 1, 'linkable': 1, 'keeplistindent': 1, 'tableable': 1, 'barinsidequote': 1, 'tabletitlerowinbold': 1, 'tablecellstrip': 1, 'autotocwithbars': 1, 'tablecellspannable': 1, 'tablecellaligntype': 'cell', 'deflisttextstrip': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, # 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'gwiki': { 'spacedlistitem': 1, 'linkable': 1, 'keeplistindent': 1, 'tableable': 1, 'tabletitlerowinbold': 1, 'tablecellstrip': 1, 'autonumberlist': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, # 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'adoc': { 'spacedlistitem': 1, 'linkable': 1, 'keeplistindent': 0, 'autonumberlist': 1, 'autonumbertitle': 1, 'listnotnested': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'doku': { 'indentverbblock': 1, # DokuWiki uses ' ' to mark verb blocks 'spacedlistitem': 1, 'linkable': 1, 'keeplistindent': 1, 'tableable': 1, 'barinsidequote': 1, 'tablecellstrip': 1, 'autotocwithbars': 1, 'autonumberlist': 1, 'imgalignable': 1, 'tablecellaligntype': 'cell', 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'pmw': { 'indentverbblock': 1, 'spacedlistitem': 1, 'linkable': 1, 'labelbeforelink': 1, # 'keeplistindent': 1, 'tableable': 1, 'barinsidequote': 1, 'tablecellstrip': 1, 'autotocwithbars': 1, 'autonumberlist': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'imgalignable': 1, 'tabletitlerowinbold': 1, 'tablecellaligntype': 'cell', 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'wiki': { 'escapexmlchars': 1, 'linkable': 1, 'tableable': 1, 'tablecellstrip': 1, 'autotocwithbars': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'deflisttextstrip': 1, 'autonumberlist': 1, 'imgalignable': 1, 'tablecellspannable': 1, 'tablecellaligntype': 'cell', 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'red': { 'linkable': 1, 'tableable': 1, 'tablecellstrip': 1, 'tablecellspannable': 1, 'tablecellaligntype': 'cell', 'autotocwithbars': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'deflisttextstrip': 1, 'autonumberlist': 1, 'imgalignable': 1, 'labelbeforelink': 1, 'quotemaxdepth': 1, 'autonumbertitle': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'man': { 'spacedlistitem': 1, 'tagnotindentable': 1, 'tableable': 1, 'tablecellaligntype': 'column', 'tabletitlerowinbold': 1, 'tablecellstrip': 1, 'barinsidequote': 1, 'parainsidelist': 0, 'plaintexttoc': 1, 'blanksaroundpara': 0, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, # 'blanksaroundbar': 1, 'blanksaroundtitle': 0, 'blanksaroundnumtitle': 1, }, 'utmac': { 'tagnotindentable': 1, 'autonumbertitle': 1, 'quotenotnested' : 1, 'barinsidequote': 1, 'parainsidelist': 0, 'spacedlistitem': 0, 'labelbeforelink' : 0, # is that work ? 'imgalignable': 1, 'plaintexttoc': 0, 'tableable': 1, 'tablecellaligntype': 'column', 'tabletitlerowinbold': 1, 'tablecellstrip': 1, 'blanksaroundpara': 0, 'blanksaroundverb': 0, 'blanksaroundquote': 0, 'blanksaroundlist': 0, 'blanksaroundnumlist': 0, 'blanksarounddeflist': 0, 'blanksaroundtable': 0, 'blanksaroundbar': 0, 'blanksaroundtitle': 0, 'blanksaroundnumtitle': 0, }, 'pm6': { 'keeplistindent': 1, 'verbblockfinalescape': 1, #TODO add support for these # maybe set a JOINNEXT char and do it on addLineBreaks() 'notbreaklistopen': 1, 'barinsidequote': 1, 'autotocwithbars': 1, 'onelinepara': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, # 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, # 'blanksaroundtable': 1, # 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'creole': { 'linkable': 1, 'tableable': 1, 'imglinkable': 1, 'tablecellstrip': 1, 'autotocwithbars': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'deflisttextstrip': 1, 'verbblocknotescaped': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, }, 'md': { #'keeplistindent': 1, 'linkable': 1, 'labelbeforelink': 1, 'tableable': 1, 'imglinkable': 1, 'tablecellstrip': 1, 'autonumberlist': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'deflisttextstrip': 1, 'blanksaroundpara': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, #'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, }, 'bbcode': { #'keeplistindent': 1, 'keepquoteindent': 1, #'indentverbblock': 1, 'linkable': 1, #'labelbeforelink': 1, #'tableable': 1, 'imglinkable': 1, 'tablecellstrip': 1, #'autotocwithbars': 1, 'autonumberlist': 1, 'spacedlistitemopen': 1, 'spacednumlistitemopen': 1, 'deflisttextstrip': 1, #'verbblocknotescaped': 1, 'blanksaroundpara': 1, #'blanksaroundverb': 1, #'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, #'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, }, 'spip': { 'spacedlistitem': 1, 'spacedlistitemopen': 1, 'linkable': 1, 'blankendmotherlist': 1, 'tableable': 1, 'barinsidequote': 1, 'keepquoteindent': 1, 'blankendtable': 1, 'tablecellstrip': 1, 'imgalignable': 1, 'tablecellaligntype': 'cell', 'listlineafteropen': 1, 'labelbeforelink': 1, 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundquote': 1, 'blanksaroundlist': 1, 'blanksaroundnumlist': 1, 'blanksarounddeflist': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'rtf': { 'linkable': 1, 'tableable': 1, 'autonumbertitle': 1, 'parainsidelist': 1, 'listnotnested': 1, 'listitemnotnested': 1, 'quotenotnested': 1, 'onelinepara': 1, 'tablecellstrip': 1, 'tablecellspannable': 1, 'tagnotindentable': 1, 'deflisttextstrip': 1, 'encodeblockdepth': 1, 'zerodepthparagraph': 1, 'cellspancumulative': 1, 'blockdepthmultiply': 360, 'depthmultiplyplus': 1, 'cellspanmultiplier': 1080, 'listmaxdepth': 9, 'tablecellaligntype': 'cell', }, 'tml': { 'escapexmlchars': 1, 'linkable': 1, 'tableable': 1, 'tablecellstrip': 1, 'tablecellspannable': 1, 'spacedlistitem': 1, 'autonumberlist': 1, 'notbreaklistopen': 1, 'imgalignable': 1, 'imglinkable': 1, 'tablecellaligntype': 'cell', 'blanksaroundpara': 1, 'blanksaroundverb': 1, 'blanksaroundtable': 1, 'blanksaroundbar': 1, 'blanksaroundtitle': 1, 'blanksaroundnumtitle': 1, }, 'mom': { 'autonumberlist': 1, # target supports numbered lists natively 'autonumbertitle': 1, # target supports numbered titles natively 'imgalignable': 1, # target supports image alignment # 'stylable': 1, # target supports external style files 'parainsidelist': 1, # lists items supports paragraph 'spacedlistitem': 1, # lists support blank lines between items 'labelbeforelink': 1, # label comes before the link on the tag 'barinsidequote': 1, # bars are allowed inside quote blocks 'quotenotnested': 1, # quotes cannot be nested 'autotocnewpagebefore': 1, # break page before automatic TOC 'autotocnewpageafter': 1, # break page after automatic TOC 'mapbar2pagebreak': 1, # map the strong bar to a page break 'tableable': 1, # target supports tables 'tablecellaligntype': 'column', 'tabletitlerowinbold': 1, 'tablecellstrip': 1, 'blanksaroundlist': 1, # put a blank line before and after lists # 'blanksaroundnumlist': 1, # put a blank line before and after numlists # 'blanksarounddeflist': 1, # put a blank line before and after deflists # 'blanksaroundnestedlist': 1, # put a blank line before and after all type of nested lists # 'blanksaroundquote', # put a blank line before and after quotes 'blanksaroundtable': 1, # put a blank line before and after tables 'blankendautotoc': 1, # append a blank line at the auto TOC end 'tagnotindentable': 1, # tags must be placed at the line beginning }, } for target in TARGETS_LIST: if getattr(getattr(targets, target), 'RULES', {}).get('confdependentrules'): reload(getattr(targets, target)) rules_bank[target] = getattr(getattr(targets, target), 'RULES', {}) # Exceptions for --css-sugar if (config['css-sugar'] and config['target'] in ('html', 'xhtml', 'xhtmls', 'html5')) or config['target'] == 'wp': rules_bank['html']['indentverbblock'] = 0 rules_bank['html']['autotocwithbars'] = 0 # Get the target specific rules if config['target'] in ('xhtml', 'xhtmls', 'html5', 'htmls', 'wp'): myrules = rules_bank['html'].copy() # inheritance myrules.update(rules_bank[config['target']]) # get specific elif config['target'] == 'aat': myrules = rules_bank['txt'].copy() # inheritance myrules['tableable'] = 1 if config['slides']: myrules['blanksaroundtitle'] = 0 myrules['blanksaroundnumtitle'] = 0 myrules['blanksaroundlist'] = 0 myrules['blanksaroundnumlist'] = 0 myrules['blanksarounddeflist'] = 0 if config['web']: myrules['linkable'] = 1 myrules['imglinkable'] = 1 myrules['escapexmlchars'] = 1 if config['spread']: myrules['tableonly'] = 1 myrules['spread'] = 1 myrules['spreadgrid'] = 1, myrules['spreadmarkup'] = 'txt' if config['web']: myrules['spreadmarkup'] = 'html' elif config['target'] == 'texs': myrules = rules_bank['tex'].copy() # inheritance myrules.update(rules_bank[config['target']]) # get specific elif config['target'] == 'csvs': myrules = rules_bank['csv'].copy() # inheritance myrules.update(rules_bank[config['target']]) # get specific else: myrules = rules_bank[config['target']].copy() # Populate return dictionary for key in allrules: ret[key] = 0 # reset all ret.update(myrules) # get rules if ret['iswrapped'] and not config['width']: config['width'] = DFT_TEXT_WIDTH return ret ############################################################################## def getRegexes(): "Returns all the regexes used to find the t2t marks" bank = { 'blockVerbOpen': re.compile(r'^```\s*$'), 'blockVerbClose': re.compile(r'^```\s*$'), 'blockRawOpen': re.compile(r'^"""\s*$'), 'blockRawClose': re.compile(r'^"""\s*$'), 'blockTaggedOpen': re.compile(r"^'''\s*$"), 'blockTaggedClose': re.compile(r"^'''\s*$"), 'blockCommentOpen': re.compile(r'^%%%\s*$'), 'blockCommentClose': re.compile(r'^%%%\s*$'), 'quote': re.compile(r'^\t+'), '1lineVerb': re.compile(r'^``` (?=.)'), '1lineRaw': re.compile(r'^""" (?=.)'), '1lineTagged': re.compile(r"^''' (?=.)"), # mono, raw, bold, italic, underline: # - marks must be glued with the contents, no boundary spaces # - they are greedy, so in ****bold****, turns to **bold** 'fontMono': re.compile(r'``([^\s](|.*?[^\s])`*)``'), 'raw': re.compile(r'""([^\s](|.*?[^\s])"*)""'), 'tagged': re.compile(r"''([^\s](|.*?[^\s])'*)''"), 'math': re.compile(r'\$\$([^\s](|.*?[^\s])\$*)\$\$'), 'fontBold': re.compile(r'\*\*([^\s](|.*?[^\s])\**)\*\*'), 'fontItalic': re.compile(r'//([^\s](|.*?[^\s])/*)//'), 'fontUnderline': re.compile(r'__([^\s](|.*?[^\s])_*)__'), 'fontStrike': re.compile(r'--([^\s](|.*?[^\s])-*)--'), 'list': re.compile(r'^( *)(-) (?=[^ ])'), 'numlist': re.compile(r'^( *)(\+) (?=[^ ])'), 'deflist': re.compile(r'^( *)(:) (.*)$'), 'listclose': re.compile(r'^( *)([-+:])\s*$'), 'bar': re.compile(r'^(\s*)([_=-]{20,})\s*$'), 'table': re.compile(r'^ *\|([|_/])? '), 'blankline': re.compile(r'^\s*$'), 'comment': re.compile(r'^%'), # Auxiliary tag regexes '_imgAlign' : re.compile(r'~A~', re.I), '_tableAlign' : re.compile(r'~A~', re.I), '_anchor' : re.compile(r'~A~', re.I), '_tableBorder' : re.compile(r'~B~', re.I), '_tableColAlign' : re.compile(r'~C~', re.I), '_tableCellColSpan' : re.compile(r'~S~', re.I), '_tableCellAlign' : re.compile(r'~A~', re.I), '_tableAttrDelimiter': re.compile(r'~Z~', re.I), '_blockDepth' : re.compile(r'~D~', re.I), '_listLevel' : re.compile(r'~L~', re.I), } # Special char to place data on TAGs contents (\a == bell) bank['x'] = re.compile('\a') # %%macroname [ (formatting) ] bank['macros'] = re.compile(r'%%%%(?P%s)\b($(?P.*?)$)?' % ( '|'.join(MACROS.keys())), re.I) # %%TOC special macro for TOC positioning bank['toc'] = re.compile(r'^ *%%toc\s*$', re.I) # Almost complicated title regexes ;) titskel = r'^ *(?P%s)(?P%s)\1(\[(?P[\w-]*)\])?\s*$' bank['title'] = re.compile(titskel % ('[=]{1,5}', '[^=](|.*[^=])')) bank['numtitle'] = re.compile(titskel % ('[+]{1,5}', '[^+](|.*[^+])')) ### Complicated regexes begin here ;) # # Textual descriptions on --help's style: [...] is optional, | is OR ### First, some auxiliary variables # # [image.EXT] patt_img = r'\[([\w_,.+%$#@!?+~/-]+\.(png|jpe?g|gif|eps|bmp|svg))\]' # Link things # http://www.gbiv.com/protocols/uri/rfc/rfc3986.html # pchar: A-Za-z._~- / %FF / !$&'()*+,;= / :@ # Recomended order: scheme://user:pass@domain/path?query=foo#anchor # Also works : scheme://user:pass@domain/path#anchor?query=foo # TODO form: !'(): urlskel = { 'proto' : r'(https?|ftp|news|telnet|gopher|wais)://', 'guess' : r'(www[23]?|ftp)\.', # w/out proto, try to guess 'login' : r'A-Za-z0-9_.-', # for ftp://login@domain.com 'pass' : r'[^ @]*', # for ftp://login:pass@dom.com 'chars' : r'A-Za-z0-9%._/~:,=$@&+-', # %20(space), :80(port), D&D 'anchor': r'A-Za-z0-9%._-', # %nn(encoded) 'form' : r'A-Za-z0-9/%&=+:;.,$@*_-', # .,@*_-(as is) 'punct' : r'.,;:!?' } # username [ :password ] @ patt_url_login = r'([%s]+(:%s)?@)?' % (urlskel['login'], urlskel['pass']) # [ http:// ] [ username:password@ ] domain.com [ / ] # [ #anchor | ?form=data ] retxt_url = r'\b(%s%s|%s)[%s]+\b/*(\?[%s]+)?(#[%s]*)?' % ( urlskel['proto'], patt_url_login, urlskel['guess'], urlskel['chars'], urlskel['form'], urlskel['anchor']) # filename | [ filename ] #anchor retxt_url_local = r'[%s]+|[%s]*(#[%s]*)' % ( urlskel['chars'], urlskel['chars'], urlskel['anchor']) # user@domain [ ?form=data ] patt_email = r'\b[%s]+@([A-Za-z0-9_-]+\.)+[A-Za-z]{2,4}\b(\?[%s]+)?' % ( urlskel['login'], urlskel['form']) # Saving for future use bank['_urlskel'] = urlskel ### And now the real regexes # bank['email'] = re.compile(patt_email, re.I) # email | url bank['link'] = re.compile(r'%s|%s' % (retxt_url, patt_email), re.I) # \[ label | imagetag url | email | filename \] bank['linkmark'] = re.compile( r'\[(?P%s|[^]]+) (?P%s|%s|%s)\]' % ( patt_img, retxt_url, patt_email, retxt_url_local), re.L + re.I) # Image bank['img'] = re.compile(patt_img, re.L + re.I) # Special things bank['special'] = re.compile(r'^%!\s*') return bank ### END OF regex nightmares # The ASCII Art library def aa_line(char, width): return char * width def aa_under(txt, char, width, over): ret = [] if over: ret.append(aa_line(char, aa_lencjk(txt))) for lin in aa_wrap(txt, width, False): ret.extend([lin, aa_line(char, aa_lencjk(lin))]) return ret def aa_quote(txt, quote_char, prefix_char, width, depth, web, blank=False): if quote_char and quote_char in '123456789': prefix = int(quote_char) * depth * prefix_char else: prefix = quote_char * depth + prefix_char wrap_width = width - aa_lencjk(prefix) wrap_txt = aa_wrap(txt, wrap_width, web) if blank: blank_prefix = aa_lencjk(prefix) * ' ' block_txt = [prefix + wrap_txt[0]] + [blank_prefix + line for line in wrap_txt[1:]] else: block_txt = [prefix + line for line in wrap_txt] return block_txt def aa_box(txt, chars, width, centred=True, web=False, slides=False): wrap_txt = [] char_side = '' if slides: width = width - 2 char_side = ' ' for lin in txt: wrap_txt.extend(aa_wrap(lin, width - 4, web)) len_cjk = max([aa_lencjk(lin, web) for lin in wrap_txt]) tline_box = char_side + aa_center(chars['tlcorner'] + chars['border'] * (len_cjk + 2) + chars['trcorner'], width) + char_side bline_box = char_side + aa_center(chars['blcorner'] + chars['border'] * (len_cjk + 2) + chars['brcorner'], width) + char_side line_txt = [] for lin in wrap_txt: if centred: line_txt.append(char_side + aa_center(chars['side'] + ' ' + aa_center(lin, len_cjk, web) + ' ' + chars['side'], width, web) + char_side) else: line_txt.append(char_side + aa_center(chars['side'] + ' ' + lin + ' ' * (len_cjk - aa_lencjk(lin, web) + 1) + chars['side'], width, web) + char_side) return [tline_box] + line_txt + [bline_box] def aa_header(header_data, chars, width, height, web, slides, printing): h = [header_data[v] for v in header_data if v.startswith("HEADER") and header_data[v]] n_h = len(h) height_box = sum([len(aa_box([header], chars, width, slides=slides)) for header in h]) if not n_h: return [] if not slides: n, end = 2, 0 else: x = height - 2 - height_box n = x / (n_h + 1) end = x % (n_h + 1) header = [aa_line(chars['bar2'], width)] header.extend([''] * n) for h in 'HEADER1', 'HEADER2', 'HEADER3': if header_data[h]: header.extend(aa_box([header_data[h]], chars, width, slides=slides)) header.extend([''] * n) header.extend([''] * end) header.append(aa_line(chars['bar2'], width)) if slides: if web: header = ['
' + header[0]] + header[1:-1] + [header[-1] + '
'] elif printing: header = header[:-1] + [header[-1] + ''] if not slides or printing: header = [''] + header return header def aa_slide(title, char, width, web): res = [aa_line(char, width)] res.append('') res.append(aa_slicecjk(aa_center(title, width), width)[0]) res.append('') res.append(aa_line(char, width)) if web: res = ['
' + res[0]] + res[1:] return res def aa_table(data, chars, width, borders, h_header, v_header, align, spread, web): n = max([len(lin[0]) for lin in data]) data3 = [] for lin in data: if max(lin[1]) == 1: data3.append(lin[0]) else: newline = [] for i, el in enumerate(lin[0]): if lin[1][i] == 1: newline.append(el) else: newline.extend(lin[1][i] * ['']) data3.append(newline) tab = [] for i in range(n): tab.append([lin[i] for lin in data3]) if web: length = [max([aa_lencjk(re.sub('|', '', el)) for el in lin]) for lin in tab] else: length = [max([aa_lencjk(el) for el in lin]) for lin in tab] if spread: data[0][0] = [data[0][0][i].center(length[i]) for i in range(n)] tcross, border, bcross, lcross, side, rcross, tlcorner, trcorner, cross, blcorner, brcorner, tvhead, vhead, vheadcross, bvhead ,headerscross, hhead, hheadcross, lhhead, rhhead= chars['tcross'], chars['border'], chars['bcross'], chars['lcross'], chars['side'], chars['rcross'], chars['tlcorner'], chars['trcorner'], chars['cross'], chars['blcorner'], chars['brcorner'], chars['tvhead'], chars['vhead'], chars['vheadcross'], chars['bvhead'], chars['headerscross'], chars['hhead'], chars['hheadcross'], chars['lhhead'], chars['rhhead'] if not v_header: tvhead, bvhead = tcross, bcross if borders: vheadcross = cross if h_header: headerscross = hheadcross if not borders: hhead, hheadcross, lhhead, rhhead, headerscross = border, cross, lcross, rcross, vheadcross if h_header and not v_header: headerscross = cross if v_header and not h_header: headerscross = vheadcross len0 = length[0] + 2 res = lcross + len0 * border + vheadcross resh = lhhead + len0 * hhead + headerscross rest = tlcorner + len0 * border + tvhead resb = blcorner + len0 * border + bvhead for i in range(1, n): res = res + (length[i] + 2) * border + cross resh = resh + (length[i] + 2) * hhead + hheadcross rest = rest + (length[i] + 2) * border + tcross resb = resb + (length[i] + 2) * border + bcross res = res[:-1] + rcross resh = resh[:-1] + rhhead rest = rest[:-1] + trcorner resb = resb[:-1] + brcorner ret = [] for i, lin in enumerate(data): aff = side if i == 1 and h_header: ret.append(resh) elif i == 0: ret.append(rest) elif borders: ret.append(res) for j, el in enumerate(lin[0]): if web: aff = aff + " " + el + (sum(length[j:(j + lin[1][j])]) + lin[1][j] * 3 - aa_lencjk(re.sub('|', '',el)) - 2) * " " + side else: aff = aff + " " + el + (sum(length[j:(j + lin[1][j])]) + lin[1][j] * 3 - aa_lencjk(el) - 2) * " " + side if j == 0 and v_header: aff = aff[:-1] + vhead ret.append(aff) ret.append(resb) if align == 'Left': ret = [' ' * 2 + lin for lin in ret] elif align == 'Center' and not (web and spread): ret = [aa_center(lin, width) for lin in ret] return ret def aa_image(image): art_table = '#$!;:,. ' art_image = [] for lin in image: art_line = '' for pixel in lin: art_line = art_line + art_table[pixel/32] art_image.append(art_line) return art_image def aa_wrap(txt, width, web): twcjk = TextWrapperCJK(width=width) if not web: return twcjk.wrap(txt) txt = re.split('()|()|()', txt) lin, length, ret = '', 0, [] for el in txt: if el: if el[0] != '<': if len(el) > width: lin = lin + el multi = twcjk.wrap(lin) ret.extend(multi[:-1]) lin = multi[-1] elif length + len(el) <= width: length = length + len(el) lin = lin + el else: ret.append(lin) lin, length = el, len(el) else: lin = lin + el ret.append(lin) return ret def aa_lencjk(txt, web=False): if web: txt = re.sub('()|()|()', '', txt) if isinstance(txt, str): return len(txt) l = 0 for char in txt: if unicodedata.east_asian_width(unicode(char)) in ('F', 'W'): l = l + 2 else: l = l + 1 return l def aa_slicecjk(txt, space_left): if isinstance(txt, str): return txt[:space_left], txt[space_left:] if aa_lencjk(txt) <= space_left: return txt, '' i = 1 while aa_lencjk(txt[:i]) <= space_left: # <= and index i-1 # to catch the last double length char of odd line i = i + 1 return txt[:i-1], txt[i-1:] class TextWrapperCJK(textwrap.TextWrapper): # CJK fix for the Greg Ward textwrap lib. def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): if width < 1: space_left = 1 else: space_left = width - cur_len if self.break_long_words: chunk_start, chunk_end = aa_slicecjk(reversed_chunks[-1], space_left) cur_line.append(chunk_start) reversed_chunks[-1] = chunk_end elif not cur_line: cur_line.append(reversed_chunks.pop()) def _wrap_chunks(self, chunks): lines = [] if self.width <= 0: raise ValueError("invalid width %r (must be > 0)" % self.width) if self.width == 1 and sum(aa_lencjk(chunk) for chunk in chunks) > sum(len(chunk) for chunk in chunks): raise ValueError("invalid width %r (must be > 1 if CJK chars)" % self.width) chunks.reverse() while chunks: cur_line = [] cur_len = 0 if lines: indent = self.subsequent_indent else: indent = self.initial_indent width = self.width - len(indent) if chunks[-1].strip() == '' and lines: del chunks[-1] while chunks: l = aa_lencjk(chunks[-1]) if cur_len + l <= width: cur_line.append(chunks.pop()) cur_len += l else: break if chunks and aa_lencjk(chunks[-1]) > width: self._handle_long_word(chunks, cur_line, cur_len, width) if cur_line and cur_line[-1].strip() == '': del cur_line[-1] if cur_line: lines.append(indent + ''.join(cur_line)) return lines def aa_center(txt, width, web=False): n_before = (width - aa_lencjk(txt, web)) / 2 n_after = width - aa_lencjk(txt, web) - n_before return ' ' * n_before + txt + ' ' * n_after # The Spreadsheet library ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' class SpreadSheet: """Raymond Hettinger's recipe http://code.activestate.com/recipes/355045-spreadsheet """ _cells = {} def __setitem__(self, key, formula): self._cells[key] = formula def getformula(self, key): return self._cells[key] def __getitem__(self, key): if self._cells[key].strip()[0] == '=': try: return eval(self._cells[key].strip()[1:], globals(), self) except Exception, e: return e else: return self._cells[key].strip() def spreadsheet(data, markup, grid): n = max([len(line[0]) for line in data]) if n > 676: Error("Spreadsheet tables are limited to 676 columns, and your table has %i columns." % n) s = SpreadSheet() for j, row in enumerate(data): for i, el in enumerate(row[0]): ind = ascii_uppercase[i/26 - 1].replace('Z', '') + ascii_uppercase[i%26] + str(j+1) if el and el.strip(): s[ind] = el for j, row in enumerate(data): for i, el in enumerate(row[0]): ind = ascii_uppercase[i/26 - 1].replace('Z', '') + ascii_uppercase[i%26] + str(j+1) if el and el.strip(): if markup == 'html': row[0][i] = '' + str(s[ind]) + '' elif markup == 'tex': if el.strip()[0] == '=': tooltip = 'formula: ' else: tooltip = 'value: ' row[0][i] = '\htmladdnormallink{' + str(s[ind]) + '}{' + tooltip + el.strip() + '}' elif markup == 'txt': row[0][i] = str(s[ind]) if grid: h = [(ascii_uppercase[i/26 - 1].replace('Z', '') + ascii_uppercase[i%26]) for i in range(n)] data = [[h, [1] * n]] + data data = [[[str(i)] + line[0], [1] + line[1]] for i, line in enumerate(data)] data[0][0][0] = '' return data def completes_table(table): data = [[row['cells'], row['cellspan']] for row in table] n = max([len(line[0]) for line in data]) data2 = [] for line in data: if not line[1]: data2.append([n * [''], n * [1]]) else: data2.append([line[0] + (n - sum(line[1])) * [''], line[1] + (n - sum(line[1])) * [1]]) return data2 def convert_to_table(itera, headers, borders, center): if center: row_ini = ' | ' else: row_ini = '| ' if borders: row_end = ' |' else: row_end = '' table = [] for row in itera: table.append(row_ini + ' | '.join(row).expandtabs() + row_end) if headers: table[0] = table[0].replace('|', '||', 1) return table def parse_convert_table(table, tableable, target): ret = [] # Note: cell contents is raw, no t2t marks are parsed if tableable: ret.extend(BLOCK.blockin('table')) if table: BLOCK.tableparser.__init__(table[0]) for row in table: tablerow = TableMaster().parse_row(row) BLOCK.tableparser.add_row(tablerow) # Very ugly, but necessary for escapes line = SEPARATOR.join(tablerow['cells']) BLOCK.holdadd(doEscape(target, line)) ret.extend(BLOCK.blockout()) # Tables are mapped to verb when target is not table-aware else: ret.extend(BLOCK.blockin('verb')) BLOCK.propset('mapped', 'table') for row in table: BLOCK.holdadd(row) ret.extend(BLOCK.blockout()) return ret def math_print(target, txt): try: import sympy except: Error(_("$$math$$: You need SymPy to use math formula")) if sympy.__version__ < '0.7.4': Error(_("$$math$$: You need SymPy 0.7.4 or > to use math formula")) exec('from sympy import *') if target == 'aat': return pretty(sympify(txt, evaluate=False), use_unicode=CONF['unicode_art']) elif target == "tex": return latex(sympify(txt, evaluate=False)) elif target == "html5": from sympy.printing.mathml import mathml from sympy.utilities.mathml import c2p return c2p(mathml(sympify(txt, evaluate=False))) ############################################################################## class error(Exception): pass def echo(msg): # for quick debug print '\033[32;1m%s\033[m' % msg def Quit(msg=''): if msg: print msg sys.exit(0) def Error(msg): msg = _("%s: Error: ") % my_name + msg raise error(msg) def getTraceback(): try: from traceback import format_exception etype, value, tb = sys.exc_info() return ''.join(format_exception(etype, value, tb)) except: pass def getUnknownErrorMessage(): msg = '%s\n%s (%s):\n\n%s' % ( _('Sorry! Txt2tags aborted by an unknown error.'), _('Please send the following Error Traceback to the author'), my_email, getTraceback()) return msg def Message(msg, level): if level <= VERBOSE and not QUIET: prefix = '-' * 5 print "%s %s" % (prefix * level, msg) def Debug(msg, id_=0, linenr=None): "Show debug messages, categorized (colored or not)" if QUIET or not DEBUG: return if int(id_) not in range(8): id_ = 0 # 0:black 1:red 2:green 3:yellow 4:blue 5:pink 6:cyan 7:white ;1:light ids = ['INI', 'CFG', 'SRC', 'BLK', 'HLD', 'GUI', 'OUT', 'DET'] colors_bgdark = ['7;1', '1;1', '3;1', '6;1', '4;1', '5;1', '2;1', '7;1'] colors_bglight = ['0' , '1' , '3' , '6' , '4' , '5' , '2' , '0' ] if linenr is not None: msg = "LINE %04d: %s" % (linenr, msg) if COLOR_DEBUG: if BG_LIGHT: color = colors_bglight[id_] else: color = colors_bgdark[id_] msg = '\033[3%sm%s\033[m' % (color, msg) print "++ %s: %s" % (ids[id_], msg) def Readfile(file_path, remove_linebreaks=0, ignore_error=0): data = [] # STDIN if file_path == '-': try: data = sys.stdin.readlines() except: if not ignore_error: Error(_('You must feed me with data on STDIN!')) # URL elif PathMaster().is_url(file_path): try: from urllib import urlopen f = urlopen(file_path) if f.getcode() == 404: # URL not found raise data = f.readlines() f.close() except: if not ignore_error: Error(_("Cannot read file:") + ' ' + file_path) # local file else: try: f = open(file_path) data = f.readlines() f.close() except: if not ignore_error: Error(_("Cannot read file:") + ' ' + file_path) if remove_linebreaks: data = map(lambda x: re.sub('[\n\r]+$', '', x), data) Message(_("File read (%d lines): %s") % (len(data), file_path), 2) return data def Savefile(file_path, contents): try: f = open(file_path, 'wb') except: Error(_("Cannot open file for writing:") + ' ' + file_path) if type(contents) == type([]): doit = f.writelines else: doit = f.write cont = [] if CONF['encoding'].lower() == 'utf-8' and CONF['target'] != 'mgp': for line in contents: if isinstance(line, unicode): cont.append(line.encode('utf-8')) else: cont.append(line) elif CONF['target'] == 'mgp': for line in contents: if isinstance(line, unicode): cont.append(line.encode('latin1', 'replace')) else: cont.append(line) else: cont = contents doit(cont) f.close() def showdic(dic): for k in dic.keys(): print "%15s : %s" % (k, dic[k]) def dotted_spaces(txt=''): return txt.replace(' ', '.') # TIP: win env vars http://www.winnetmag.com/Article/ArticleID/23873/23873.html def get_rc_path(): "Return the full path for the users' RC file" # Try to get the path from an env var. if yes, we're done user_defined = os.environ.get('T2TCONFIG') if user_defined: return user_defined # Env var not found, so perform automatic path composing # Set default filename according system platform rc_names = {'default': '.txt2tagsrc', 'win': '_t2trc'} rc_file = rc_names.get(sys.platform[:3]) or rc_names['default'] # The file must be on the user directory, but where is this dir? rc_dir_search = ['HOME', 'HOMEPATH'] for var in rc_dir_search: rc_dir = os.environ.get(var) if rc_dir: break # rc dir found, now we must join dir+file to compose the full path if rc_dir: # Compose path and return it if the file exists rc_path = os.path.join(rc_dir, rc_file) # On windows, prefix with the drive (%homedrive%: 2k/XP/NT) if sys.platform.startswith('win'): rc_drive = os.environ.get('HOMEDRIVE') rc_path = os.path.join(rc_drive, rc_path) return rc_path # Sorry, not found return '' ############################################################################## class PathMaster: """Handle paths. See issues: 27, 62, 63, 71, 85.""" def __init__(self): pass def is_url(self, text): return text.startswith('http://') or text.startswith('https://') def join(self, dirname, filename): """Join paths, unless filename is STDOUT, absolute or URL.""" if not dirname \ or not filename \ or filename in (STDOUT, MODULEOUT) \ or os.path.isabs(filename) \ or self.is_url(filename): return filename else: return os.path.join(dirname, filename) def relpath(self, path, start): """Unlike os.path.relpath(), never touch URLs""" if not path or self.is_url(path): return path else: return os.path.relpath(path, start) class CommandLine: """ Command Line class - Masters command line This class checks and extract data from the provided command line. The --long options and flags are taken from the global OPTIONS, FLAGS and ACTIONS dictionaries. The short options are registered here, and also their equivalence to the long ones. _compose_short_opts() -> str _compose_long_opts() -> list Compose the valid short and long options list, on the 'getopt' format. parse() -> (opts, args) Call getopt to check and parse the command line. It expects to receive the command line as a list, and without the program name (sys.argv[1:]). get_raw_config() -> [RAW config] Scans command line and convert the data to the RAW config format. See ConfigMaster class to the RAW format description. Optional 'ignore' and 'filter_' arguments are used to filter in or out specified keys. compose_cmdline(dict) -> [Command line] Compose a command line list from an already parsed config dictionary, generated from RAW by ConfigMaster(). Use this to compose an optimal command line for a group of options. The get_raw_config() calls parse(), so the typical use of this class is: raw = CommandLine().get_raw_config(sys.argv[1:]) """ def __init__(self): self.all_options = OPTIONS.keys() self.all_flags = FLAGS.keys() self.all_actions = ACTIONS.keys() # short:long options equivalence self.short_long = { 'C': 'config-file', 'h': 'help', 'H': 'no-headers', 'i': 'infile', 'n': 'enum-title', 'o': 'outfile', 'q': 'quiet', 't': 'target', 'T': 'template', 'v': 'verbose', 'V': 'version', } # Compose valid short and long options data for getopt self.short_opts = self._compose_short_opts() self.long_opts = self._compose_long_opts() def _compose_short_opts(self): "Returns a string like 'hVt:o' with all short options/flags" ret = [] for opt in self.short_long.keys(): long_ = self.short_long[opt] if long_ in self.all_options: # is flag or option? opt = opt + ':' # option: have param ret.append(opt) #Debug('Valid SHORT options: %s' % ret) return ''.join(ret) def _compose_long_opts(self, extra=True): "Returns a list with all the valid long options/flags" ret = map(lambda x: x + '=', self.all_options) # add = ret.extend(self.all_flags) # flag ON ret.extend(self.all_actions) # actions ret.extend(map(lambda x: 'no-' + x, self.all_flags)) # add no-* if extra: ret.extend(['no-style', 'no-encoding']) # turn OFF ret.extend(['no-outfile', 'no-infile']) # turn OFF ret.extend(['no-dump-config', 'no-dump-source']) # turn OFF ret.extend(['no-targets']) # turn OFF #Debug('Valid LONG options: %s' % ret) return ret def tokenize(self, cmdline=''): "Convert a command line string to a list" return shlex.split(cmdline) def parse(self, cmdline=[]): "Check/Parse a command line list TIP: no program name!" # Get the valid options short, long_ = self.short_opts, self.long_opts # Parse it! try: opts, args = getopt.getopt(cmdline, short, long_) except getopt.error, errmsg: Error(_("%s (try --help)") % errmsg) return (opts, args) def get_raw_config(self, cmdline=[], ignore=[], filter_=[], relative=0): "Returns the options/arguments found as RAW config" if not cmdline: return [] ret = [] # We need lists, not strings (such as from %!options) if type(cmdline) in (type(''), type(u'')): if isinstance(cmdline, unicode): cmdline = cmdline.encode('utf-8') cmdline = self.tokenize(cmdline) # Extract name/value pair of all configs, check for invalid names options, arguments = self.parse(cmdline[:]) # Needed when expanding %!options inside remote %!includeconf dirname = '' # Some cleanup on the raw config for name, value in options: # Remove leading - and -- name = re.sub('^--?', '', name) # Fix old misspelled --suGGar, --no-suGGar name = name.replace('suggar', 'sugar') # Translate short option to long if len(name) == 1: name = self.short_long[name] if name == 'dirname': dirname = value continue # Outfile exception: path relative to PWD if name == 'outfile' and value not in [STDOUT, MODULEOUT]: if relative: value = os.path.abspath(value) else: value = PathMaster().join(dirname, value) # -C, --config-file inclusion, path relative to PWD if name == 'config-file': value = PathMaster().join(dirname, value) ret.extend(ConfigLines().include_config_file(value)) continue # --style: path relative to PWD # Already OK, when comming from the command line # Needs fix when coming from %!options: --style foo.css if name == 'style': ret.append(['all', 'stylepath', PathMaster().join(dirname, value)]) # Save this config ret.append(['all', name, value]) # All configuration was read and saved # Get infile, if any while arguments: infile = arguments.pop(0) ret.append(['all', 'infile', infile]) # Apply 'ignore' and 'filter_' rules (filter_ is stronger) if (ignore or filter_): filtered = [] for target, name, value in ret: if (filter_ and name in filter_) or \ (ignore and name not in ignore): filtered.append([target, name, value]) else: fancykey = dotted_spaces("%12s" % name) Message(_("Ignored config") + (" %s : %s" % (fancykey, value)), 3) ret = filtered[:] # Add the original command line string as 'realcmdline' ret.append(['all', 'realcmdline', cmdline]) return ret def compose_cmdline(self, conf={}, no_check=0): "compose a full (and diet) command line from CONF dict" if not conf: return [] args = [] dft_options = OPTIONS.copy() cfg = conf.copy() valid_opts = self.all_options + self.all_flags use_short = {'no-headers': 'H', 'enum-title': 'n'} # Remove useless options if not no_check and cfg.get('toc-only'): if 'no-headers' in cfg: del cfg['no-headers'] if 'outfile' in cfg: del cfg['outfile'] # defaults to STDOUT if cfg.get('target') == 'txt': del cfg['target'] # already default args.append('--toc-only') # must be the first del cfg['toc-only'] # Add target type if 'target' in cfg: args.append('-t ' + cfg['target']) del cfg['target'] # Add other options for key in cfg.keys(): if key not in valid_opts: continue # may be a %!setting if key == 'outfile' or key == 'infile': continue # later val = cfg[key] if not val: continue # Default values are useless on cmdline if val == dft_options.get(key): continue # -short format if key in use_short: args.append('-' + use_short[key]) continue # --long format if key in self.all_flags: # add --option args.append('--' + key) else: # add --option=value args.append('--%s=%s' % (key, val)) # The outfile using -o if 'outfile' in cfg and \ cfg['outfile'] != dft_options.get('outfile'): args.append('-o ' + cfg['outfile']) # Place input file(s) always at the end if 'infile' in cfg: args.append(' '.join(cfg['infile'])) # Return as a nice list Debug("Diet command line: %s" % ' '.join(args), 1) return args class BaseOptions(CommandLine): def __init__(self, cmdline=None, dft_options={}, dft_flags={}, short_long={}): # Available options self.dft_options = dft_options self.dft_flags = dft_flags self.short_long = short_long # Default values for all options self.defaults = {} self.defaults.update(self.dft_options) self.defaults.update(self.dft_flags) # Needed by self._compose_*_opts() self.all_flags = self.dft_flags.keys() self.all_options = self.dft_options.keys() self.all_actions = [] # Compose valid short and long options data for getopt self.short_opts = self._compose_short_opts() self.long_opts = self._compose_long_opts(extra=False) # Got data? Parse it! if cmdline: self.raw = self.get_raw_config(cmdline) self.parsed = self.parse_raw() else: self.raw = [] self.parsed = {} def get(self, key): return self.parsed.get(key, self.defaults[key]) def parse(self, cmdline): try: opts, args = getopt.getopt(cmdline, self.short_opts, self.long_opts) except getopt.error, errmsg: Error(_("%s in %%!%s command") % (errmsg, self.__class__.__name__[:-7].upper())) return (opts, args) def parse_raw(self, raw=None): if not raw: raw = self.raw # Reset attributes to our modest needs cm = ConfigMaster(raw) cm.dft_options = self.dft_options.copy() cm.dft_flags = self.dft_flags.copy() cm.dft_actions = {} cm.dft_settings = {} cm.incremental = [] cm.numeric = [] cm.multi = [] # maybe in the future: ['infile'] cm.defaults = self.defaults.copy() cm.off = cm._get_off() return cm.parse() class CsvOptions(BaseOptions): """Tokenize and parse the %!CSV command arguments. When you find this line in the user document: %!CSV: -s tab foo.csv Just feed everything after the first : to this class, as a single string. It will be tokenized, parsed and saved to self.raw and self.parsed. Use the self.get() method to get the value of a config. If missing, the default value will be returned. Example: >>> import txt2tags, pprint >>> csvopt = txt2tags.CsvOptions('-s tab foo.csv') >>> pprint.pprint(csvopt.raw) [['all', 'separator', 'tab'], ['all', 'infile', 'foo.csv'], ['all', 'realcmdline', ['-s', 'tab', 'foo.csv']]] >>> pprint.pprint(csvopt.parsed) {'infile': 'foo.csv', 'realcmdline': ['-s', 'tab', 'foo.csv'], 'separator': 'tab'} >>> csvopt.get('separator') 'tab' >>> """ def __init__(self, cmdline=None): # Available options for %!CSV self.dft_options = { 'separator': ',', 'quotechar': '', 'infile': '', } self.dft_flags = { 'headers': 0, 'borders': 0, 'center': 0, 'utf8': 0, 'mailing': 0, } self.short_long = { 'b': 'borders', 'c': 'center', 'h': 'headers', 's': 'separator', 'q': 'quotechar', 'u': 'utf8', 'm': 'mailing', } BaseOptions.__init__(self, cmdline, self.dft_options, self.dft_flags, self.short_long) class DbOptions(BaseOptions): """Tokenize and parse the %!DB command arguments. When you find this line in the user document: %!DB: -q "select * from table" foo.db Just feed everything after the first : to this class, as a single string. It will be tokenized, parsed and saved to self.raw and self.parsed. Use the self.get() method to get the value of a config. If missing, the default value will be returned. Example: >>> import txt2tags, pprint >>> dbopt = txt2tags.DbOptions('-q "select * from table" foo.db') >>> pprint.pprint(dbopt.raw) [['all', 'query', 'select * from table'], ['all', 'infile', 'foo.db'], ['all', 'realcmdline', ['-q', 'select * from table', 'foo.db']]] >>> pprint.pprint(dbopt.parsed) {'infile': 'foo.db', 'query': 'select * from table'} >>> dbopt.get('query') 'select * from table' >>> """ def __init__(self, cmdline=None): # Available options for %!DB self.dft_options = { 'query': '', 'infile': '', } self.dft_flags = { 'borders': 0, 'center': 0, 'headers': 0, 'mailing': 0, } self.short_long = { 'b': 'borders', 'c': 'center', 'h': 'headers', 'q': 'query', 'm': 'mailing', } BaseOptions.__init__(self, cmdline, self.dft_options, self.dft_flags, self.short_long) class FenOptions(BaseOptions): def __init__(self, cmdline=None): # Available options for %!FEN self.dft_options = { 'infile': '', } self.dft_flags = { 'unicode': 0, } self.short_long = { 'u': 'unicode', } BaseOptions.__init__(self, cmdline, self.dft_options, self.dft_flags, self.short_long) ############################################################################## class SourceDocument: """ SourceDocument class - scan document structure, extract data It knows about full files. It reads a file and identify all the areas beginning (Head,Conf,Body). With this info it can extract each area contents. Note: the original line break is removed. DATA: self.arearef - Save Head, Conf, Body init line number self.areas - Store the area names which are not empty self.buffer - The full file contents (with NO \\r, \\n) METHODS: get() - Access the contents of an Area. Example: config = SourceDocument(file).get('conf') split() - Get all the document Areas at once. Example: head, conf, body = SourceDocument(file).split() RULES: * The document parts are sequential: Head, Conf and Body. * One ends when the next begins. * The Conf Area is optional, so a document can have just Head and Body Areas. These are the Areas limits: - Head Area: the first three lines - Body Area: from the first valid text line to the end - Conf Area: the comments between Head and Body Areas Exception: If the first line is blank, this means no header info, so the Head Area is just the first line. """ def __init__(self, filename='', contents=[]): self.areas = ['head', 'conf', 'body'] self.arearef = [] self.areas_fancy = '' self.filename = filename self.buffer = [] if filename: self.scan_file(filename) elif contents: self.scan(contents) def split(self): "Returns all document parts, splitted into lists." return self.get('head'), self.get('conf'), self.get('body') def get(self, areaname): "Returns head|conf|body contents from self.buffer" # Sanity if areaname not in self.areas: return [] if not self.buffer: return [] # Go get it bufini = 1 bufend = len(self.buffer) if areaname == 'head': ini = bufini end = self.arearef[1] or self.arearef[2] or bufend elif areaname == 'conf': ini = self.arearef[1] end = self.arearef[2] or bufend elif areaname == 'body': ini = self.arearef[2] end = bufend else: Error("Unknown Area name '%s'" % areaname) lines = self.buffer[ini:end] # Make sure head will always have 3 lines while areaname == 'head' and len(lines) < 3: lines.append('') return lines def scan_file(self, filename): Debug("source file: %s" % filename) Message(_("Loading source document"), 1) buf = Readfile(filename, remove_linebreaks=1) self.scan(buf) def scan(self, lines): "Run through source file and identify head/conf/body areas" buf = lines if len(buf) == 0: Error(_('The input file is empty: %s') % self.filename) cfg_parser = ConfigLines().parse_line buf.insert(0, '') # text start at pos 1 ref = [1, 4, 0] if not buf[1].strip(): # no header ref[0] = 0 ref[1] = 2 rgx = getRegexes() on_comment_block = 0 for i in xrange(ref[1], len(buf)): # find body init: # Handle comment blocks inside config area if not on_comment_block \ and rgx['blockCommentOpen'].search(buf[i]): on_comment_block = 1 continue if on_comment_block \ and rgx['blockCommentOpen'].search(buf[i]): on_comment_block = 0 continue if on_comment_block: continue if buf[i].strip() and ( # ... not blank and buf[i][0] != '%' or # ... not comment or rgx['macros'].match(buf[i]) or # ... %%macro rgx['toc'].match(buf[i]) or # ... %%toc cfg_parser(buf[i], 'include')[1] or # ... %!include cfg_parser(buf[i], 'csv')[1] or # ... %!csv cfg_parser(buf[i], 'db')[1] or # ... %!db cfg_parser(buf[i], 'fen')[1] # ... %!fen ): ref[2] = i break if ref[1] == ref[2]: ref[1] = 0 # no conf area for i in 0, 1, 2: # del !existent if ref[i] >= len(buf): ref[i] = 0 # title-only if not ref[i]: self.areas[i] = '' Debug('Head,Conf,Body start line: %s' % ref) self.arearef = ref # save results self.buffer = buf # Fancyness sample: head conf body (1 4 8) self.areas_fancy = "%s (%s)" % ( ' '.join(self.areas), ' '.join(map(str, map(lambda x: x or '', ref)))) Message(_("Areas found: %s") % self.areas_fancy, 2) def get_raw_config(self): "Handy method to get the CONF area RAW config (if any)" if not self.areas.count('conf'): return [] Message(_("Scanning source document CONF area"), 1) raw = ConfigLines( file_=self.filename, lines=self.get('conf'), first_line=self.arearef[1]).get_raw_config() Debug("document raw config: %s" % raw, 1) return raw ############################################################################## class ConfigMaster: """ ConfigMaster class - the configuration wizard This class is the configuration master. It knows how to handle the RAW and PARSED config format. It also performs the sanity checking for a given configuration. DATA: self.raw - Stores the config on the RAW format self.parsed - Stores the config on the PARSED format self.defaults - Stores the default values for all keys self.off - Stores the OFF values for all keys self.multi - List of keys which can have multiple values self.numeric - List of keys which value must be a number self.incremental - List of keys which are incremental RAW FORMAT: The RAW format is a list of lists, being each mother list item a full configuration entry. Any entry is a 3 item list, on the following format: [ TARGET, KEY, VALUE ] Being a list, the order is preserved, so it's easy to use different kinds of configs, as CONF area and command line, respecting the precedence. The special target 'all' is used when no specific target was defined on the original config. PARSED FORMAT: The PARSED format is a dictionary, with all the 'key : value' found by reading the RAW config. The self.target contents matters, so this dictionary only contains the target's config. The configs of other targets are ignored. The CommandLine and ConfigLines classes have the get_raw_config() method to convert the configuration found to the RAW format. Just feed it to parse() and get a brand-new ready-to-use config dictionary. Example: >>> raw = CommandLine().get_raw_config(['-n', '-H']) >>> print raw [['all', 'enum-title', ''], ['all', 'no-headers', '']] >>> parsed = ConfigMaster(raw).parse() >>> print parsed {'enum-title': 1, 'headers': 0} """ def __init__(self, raw=[], target=''): self.raw = raw self.target = target self.parsed = {} self.dft_options = OPTIONS.copy() self.dft_flags = FLAGS.copy() self.dft_actions = ACTIONS.copy() self.dft_settings = SETTINGS.copy() self.defaults = self._get_defaults() self.off = self._get_off() self.incremental = ['verbose'] self.numeric = ['toc-level', 'split', 'width', 'height'] self.multi = ['infile', 'preproc', 'postproc', 'postvoodoo', 'options', 'style', 'stylepath'] def _get_defaults(self): "Get the default values for all config/options/flags" empty = {} for kw in CONFIG_KEYWORDS: empty[kw] = '' empty.update(self.dft_options) empty.update(self.dft_flags) empty.update(self.dft_actions) empty.update(self.dft_settings) empty['realcmdline'] = '' # internal use only empty['sourcefile'] = '' # internal use only empty['currentsourcefile'] = '' # internal use only return empty def _get_off(self): "Turns OFF all the config/options/flags" off = {} for key in self.defaults.keys(): kind = type(self.defaults[key]) if kind == type(9): off[key] = 0 elif kind == type('') or kind == type(u''): off[key] = '' elif kind == type([]): off[key] = [] else: Error('ConfigMaster: %s: Unknown type' % key) return off def _check_target(self): "Checks if the target is already defined. If not, do it" if not self.target: self.target = self.find_value('target') def get_target_raw(self): "Returns the raw config for self.target or 'all'" ret = [] self._check_target() for entry in self.raw: if entry[0] == self.target or entry[0] == 'all': ret.append(entry) return ret def add(self, key, val): "Adds the key:value pair to the config dictionary (if needed)" # %!options if key == 'options': # Actions are not valid inside %!options ignoreme = self.dft_actions.keys() # --target inside %!options is not allowed (use %!target) ignoreme.append('target') # But there are some exceptions that are allowed (XXX why?) ignoreme.remove('dump-config') ignoreme.remove('dump-source') ignoreme.remove('targets') raw_opts = CommandLine().get_raw_config( val, ignore=ignoreme) for target, key, val in raw_opts: self.add(key, val) return # The no- prefix turns OFF this key if key.startswith('no-'): key = key[3:] # remove prefix val = self.off.get(key) # turn key OFF # Is this key valid? if key not in self.defaults: Debug('Bogus Config %s:%s' % (key, val), 1) return # Is this value the default one? if val == self.defaults.get(key): # If default value, remove previous key:val if key in self.parsed: del self.parsed[key] # Nothing more to do return # Flags ON comes empty. we'll add the 1 value now if val == '' and ( key in self.dft_flags or key in self.dft_actions): val = 1 # Multi value or single? if key in self.multi: # First one? start new list if key not in self.parsed: self.parsed[key] = [] self.parsed[key].append(val) # Incremental value? so let's add it elif key in self.incremental: self.parsed[key] = (self.parsed.get(key) or 0) + val else: self.parsed[key] = val fancykey = dotted_spaces("%12s" % key) Message(_("Added config") + (" %s : %s" % (fancykey, val)), 3) def get_outfile_name(self, config={}): "Dirname is the same for {in,out}file" infile, outfile = config['sourcefile'], config['outfile'] # Set output to STDOUT/MODULEOUT when no real inputfile if infile == STDIN and not outfile: outfile = STDOUT if infile == MODULEIN and not outfile: outfile = MODULEOUT # Automatic outfile name: infile.target if not outfile and (infile and config.get('target')): # .t2t and .txt are the only "official" source extensions basename = re.sub('\.t[2x]t$', '', infile) outfile = "%s.%s" % (basename, config['target']) if config['target'] == 'aat' and config['slides']: outfile = "%s.%s" % (basename, 'aap') if config['target'] == 'aat' and config['spread']: outfile = "%s.%s" % (basename, 'aas') if config['target'] == 'aat' and config['web']: outfile = "%s.%s" % (basename, 'aatw') if config['target'] == 'aat' and config['slides'] and config['web']: outfile = "%s.%s" % (basename, 'aapw') if config['target'] == 'aat' and config['spread'] and config['web']: outfile = "%s.%s" % (basename, 'aasw') if config['target'] == 'aat' and config['slides'] and config['print']: outfile = "%s.%s" % (basename, 'aapp') Debug(" infile: '%s'" % infile , 1) Debug("outfile: '%s'" % outfile, 1) return outfile def sanity(self, config, gui=0): "Basic config sanity checking" global AA global RST global CSV if not config: return {} target = config.get('target') # Some actions don't require target specification if not target: for action in NO_TARGET: if config.get(action): target = 'txt' break # On GUI, some checking are skipped if not gui: # We *need* a target if not target: Error(_('No target specified (try --help)') + '\n\n' + _('Please inform a target using the -t option or the %!target command.') + '\n' + _('Example:') + ' %s -t html %s' % (my_name, _('file.t2t')) + '\n\n' + _("Run 'txt2tags --targets' to see all the available targets.")) # And of course, an infile also # TODO#1: It seems that this checking is never reached if not config.get('infile'): Error(_('Missing input file (try --help)')) # Is the target valid? if not TARGETS.count(target): Error(_("Invalid target '%s'") % target + '\n\n' + _("Run 'txt2tags --targets' to see all the available targets.")) # Ensure all keys are present empty = self.defaults.copy() empty.update(config) config = empty.copy() # Check integers options for key in config.keys(): if key in self.numeric: try: config[key] = int(config[key]) except ValueError: Error(_('--%s value must be a number') % key) # Check split level value if config['split'] not in (0, 1, 2): Error(_('Option --split must be 0, 1 or 2')) if target == 'aap': target, config['slides'] = 'aat', True if target == 'aas': target, config['spread'] = 'aat', True if target == 'aatw': target, config['web'] = 'aat', True if target == 'aapw': target, config['slides'], config['web'] = 'aat', True, True if target == 'aasw': target, config['spread'], config['web'] = 'aat', True, True if target == 'aapp': target, config['slides'], config['print'] = 'aat', True, True # Slides needs width and height if config['slides'] and target == 'aat': if config['web']: if not config['width']: config['width'] = DFT_SLIDE_WEB_WIDTH if not config['height']: config['height'] = DFT_SLIDE_WEB_HEIGHT if config['print']: if not config['width']: config['width'] = DFT_SLIDE_PRINT_WIDTH if not config['height']: config['height'] = DFT_SLIDE_PRINT_HEIGHT if not config['width'] and not config['height'] and os.name == 'posix': import fcntl, termios data = fcntl.ioctl(sys.stdout.fileno(), termios.TIOCGWINSZ, 4*'00') term_height, term_width = struct.unpack('4H',data)[:2] config['height'], config['width'] = term_height - 1, term_width if not config['width']: config['width'] = DFT_SLIDE_WIDTH if not config['height']: config['height'] = DFT_SLIDE_HEIGHT # ASCII Art needs a width if target == 'aat' and not config['width']: config['width'] = DFT_TEXT_WIDTH if target == 'aat' and config['width'] < 5: Error(_("--width: Expected width > 4, got %i") % config['width']) # Check/set user ASCII Art formatting characters config['unicode_art'] = False if config['chars']: try: # Peace for ASCII 7-bits only config['chars'] = config['chars'].encode() except: if config['encoding'].lower() == 'utf-8' and locale.getpreferredencoding() != 'UTF-8': Error(_("--chars: Expected chars from an UTF-8 terminal for your UTF-8 file")) if config['encoding'].lower() != 'utf-8' and locale.getpreferredencoding() == 'UTF-8': if not config['encoding']: Error(_("--chars: Expected an UTF-8 file for your chars from an UTF-8 terminal, you could set %!encoding: UTF-8")) else: Error(_("--chars: Expected an UTF-8 file for your chars from an UTF-8 terminal")) if target == 'aat': if config['chars'] == 'unicode': config['unicode_art'] = True if config['encoding'].lower() != 'utf-8': if not config['encoding']: Error(_("--chars: Expected an UTF-8 file for the unicode chars set, you could set %!encoding: UTF-8")) else: Error(_("--chars: Expected an UTF-8 file for the unicode chars set")) config['chars'] = unichr(0x250c) + unichr(0x2510) + unichr(0x2514) + unichr(0x2518) + unichr(0x252C) + unichr(0x2534) + unichr(0x251c) + unichr(0x2524) + unichr(0x255e) + unichr(0x256a) + unichr(0x2561) + unichr(0x256c) + unichr(0x2565) + unichr(0x256b) + unichr(0x2568) + unichr(0x253c) + unichr(0x2500) + unichr(0x2502) + unichr(0x2500) + unichr(0x2550) + unichr(0x2550) + unichr(0x2500) + '^"' + unichr(0x2043) + unichr(0x2550) + unichr(0x2551) + '8' if len(config['chars']) != len(AA_SIMPLE) and len(config['chars']) != len(AA_ADVANCED): Error(_("--chars: Expected %i or %i chars, got %i") % ( len(AA_SIMPLE), len(AA_ADVANCED), len(config['chars']))) if isinstance(config['chars'], unicode): for char in config['chars']: if unicodedata.east_asian_width(char) in ('F', 'W'): Error(_("--chars: Expected no CJK double width chars, but got %s") % char.encode('utf-8')) if len(config['chars']) == len(AA_SIMPLE): config['chars'] = 15 * config['chars'][0] + config['chars'] AA = dict(zip(AA_KEYS, config['chars'])) elif target == 'rst': if len(config['chars']) != len(RST_VALUES): Error(_("--chars: Expected %i chars, got %i") % ( len(RST_VALUES), len(config['chars']))) else: # http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections chars_section = '!"#$%&\'()*+,-./:;<=>?@[\]^_`{|}~' for char in config['chars'][:7]: if char not in chars_section: if locale.getpreferredencoding() == 'UTF-8': char = char.encode('utf-8') Error(_("--chars: Expected chars in : %s but got %s") % (chars_section, char)) # http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#bullet-lists chars_bullet, char_8 = '*+-', config['chars'][7] if char_8 not in chars_bullet: if locale.getpreferredencoding() == 'UTF-8': char_8 = char_8.encode('utf-8') Error(_("--chars: Expected chars in : %s but got %s") % (chars_bullet, char_8)) RST = dict(zip(RST_KEYS, config['chars'])) elif target in ('csv', 'csvs'): if len(config['chars']) != len(CSV_VALUES) and len(config['chars']) != len(CSV_VALUES) + 1: Error(_("--chars: Expected %i or %i chars, got %i") % ( len(CSV_VALUES), len(CSV_VALUES) + 1, len(config['chars']))) else: CSV = dict(zip(CSV_KEYS, config['chars'])) if target == 'ods': if not config['encoding']: config['encoding'] = 'UTF-8' # --toc-only is stronger than others if config['toc-only']: config['headers'] = 0 config['toc'] = 0 config['split'] = 0 config['gui'] = 0 config['outfile'] = config['outfile'] or STDOUT # Splitting is disable for now (future: HTML only, no STDOUT) config['split'] = 0 # Restore target config['target'] = target # Set output file name config['outfile'] = self.get_outfile_name(config) # Checking suicide if os.path.abspath(config['sourcefile']) == os.path.abspath(config['outfile']) and \ config['outfile'] not in [STDOUT, MODULEOUT] and not gui: Error(_("Input and Output files are the same: %s") % config['outfile']) if target == 'db': try: import sqlite3 except: Error('No sqlite3 module') global DB, DBC try: os.remove(config['outfile']) except: pass DB = sqlite3.connect(config['outfile']) DBC = DB.cursor() return config def parse(self): "Returns the parsed config for the current target" raw = self.get_target_raw() for target, key, value in raw: if key == 'chars' and locale.getpreferredencoding() == 'UTF-8': self.add(key, value.decode('utf-8')) else: self.add(key, value) Message(_("Added the following keys: %s") % ', '.join(self.parsed.keys()), 2) return self.parsed.copy() def find_value(self, key='', target=''): "Scans ALL raw config to find the desired key" ret = [] # Scan and save all values found for targ, k, val in self.raw: if k == key and (targ == target or targ == 'all'): ret.append(val) if not ret: return '' # If not multi value, return only the last found if key in self.multi: return ret else: return ret[-1] ######################################################################## class ConfigLines: """ ConfigLines class - the config file data extractor This class reads and parse the config lines on the %!key:val format, converting it to RAW config. It deals with user config file (RC file), source document CONF area and %!includeconf directives. Call it passing a file name or feed the desired config lines. Then just call the get_raw_config() method and wait to receive the full config data on the RAW format. This method also follows the possible %!includeconf directives found on the config lines. Example: raw = ConfigLines(file=".txt2tagsrc").get_raw_config() The parse_line() method is also useful to be used alone, to identify and tokenize a single config line. For example, to get the %!include command components, on the source document BODY: target, key, value = ConfigLines().parse_line(body_line) """ # parse_line regexes, moved here to avoid recompilation _parse_cfg = re.compile(""" ^%!\s* # leading id with opt spaces (?P\w+)\s* # config name ($(?P\w*)$)? # optional target spec inside () \s*:\s* # key:value delimiter with opt spaces (?P\S.+?) # config value \s*$ # rstrip() spaces and hit EOL """, re.I + re.VERBOSE) _parse_prepost = re.compile(""" # ---[ PATTERN ]--- ^( "([^"]*)" # "double quoted" or | '([^']*)' # 'single quoted' or | ([^\s]+) # single_word ) \s+ # separated by spaces # ---[ REPLACE ]--- ( "([^"]*)" # "double quoted" or | '([^']*)' # 'single quoted' or | (.*) # anything ) \s*$ """, re.VERBOSE) _parse_guicolors = re.compile("^([^\s]+\s+){3}[^\s]+") # 4 tokens def __init__(self, file_='', lines=[], first_line=1): self.file = file_ or 'NOFILE' self.lines = lines self.first_line = first_line if file_: self.folder = os.path.dirname(self.file) else: self.folder = '' def load_lines(self): "Make sure we've loaded the file contents into buffer" if not self.lines and not self.file: Error("ConfigLines: No file or lines provided") if not self.lines: self.lines = self.read_config_file(self.file) def read_config_file(self, filename=''): "Read a Config File contents, aborting on invalid line" if not filename: return [] errormsg = _("Invalid CONFIG line on %s") + "\n%03d:%s" lines = Readfile(filename, remove_linebreaks=1) # Sanity: try to find invalid config lines for i in xrange(len(lines)): line = lines[i].rstrip() if not line: # empty continue if line[0] != '%': Error(errormsg % (filename, i + 1, line)) return lines def include_config_file(self, file_=''): "Perform the %!includeconf action, returning RAW config" if not file_: return [] # Fix config file path file_ = self.fix_config_relative_path(file_) # Read and parse included config file contents return ConfigLines(file_=file_).get_raw_config() def fix_config_relative_path(self, path_): """ The path for external files must be relative to the config file path. External files appear in: %!includeconf, %!style, %!template. See issue 71. """ return PathMaster().join(self.folder, path_) def get_raw_config(self): "Scan buffer and extract all config as RAW (including includes)" ret = [] self.load_lines() first = self.first_line def add(target, key, val): "Save the RAW config" ret.append([target, key, val]) Message(_("Added %s") % key, 3) for i in xrange(len(self.lines)): line = self.lines[i] Message(_("Processing line %03d: %s") % (first + i, line), 2) target, key, val = self.parse_line(line) if not key: # no config on this line continue # %!style # We need to fix the CSS files path. See issue 71. # # This stylepath config holds the fixed path for each CSS file. # This path is used when composing headers, inside doHeader(). # if key == 'style': stylepath = self.fix_config_relative_path(val) add(target, 'stylepath', stylepath) # Note: the normal 'style' config will be added later # %!options if key == 'options': # Prepend --dirname option to track config file original folder if self.folder: val = '--dirname %s %s' % (self.folder, val) # %!includeconf if key == 'includeconf': # Sanity err = _('A file cannot include itself (loop!)') if val == self.file: Error("%s: %%!includeconf: %s" % (err, self.file)) more_raw = self.include_config_file(val) ret.extend(more_raw) Message(_("Finished Config file inclusion: %s") % val, 2) # Normal config, except %!includeconf else: add(target, key, val) return ret def parse_line(self, line='', keyname='', target=''): "Detects %!key:val config lines and extract data from it" empty = ['', '', ''] if not line: return empty no_target = ['target', 'includeconf'] # XXX TODO \S.+? requires TWO chars, breaks %!include:a cfgregex = ConfigLines._parse_cfg prepostregex = ConfigLines._parse_prepost guicolors = ConfigLines._parse_guicolors # Give me a match or get out match = cfgregex.match(line) if not match: return empty if keyname and keyname != match.group('name'): return empty if target and match.group('target') not in (None, '', target): return empty # Save information about this config name = (match.group('name') or '').lower() target = (match.group('target') or 'all').lower() value = match.group('value') # %!keyword(target) not allowed for these if name in no_target and match.group('target'): Error( _("You can't use (target) with %s") % ('%!' + name) + "\n%s" % line) # Force no_target keywords to be valid for all targets if name in no_target: target = 'all' # Special config for GUI colors if name == 'guicolors': valmatch = guicolors.search(value) if not valmatch: return empty value = re.split('\s+', value) # Special config with two quoted values (%!preproc: "foo" 'bar') if name in ['preproc', 'postproc', 'postvoodoo']: valmatch = prepostregex.search(value) if not valmatch: return empty getval = valmatch.group patt = getval(2) or getval(3) or getval(4) or '' repl = getval(6) or getval(7) or getval(8) or '' value = (patt, repl) return [target, name, value] ############################################################################## class MaskMaster: """(Un)Protect important structures from escaping and formatting. Some inline markup must be protected, because its contents may match other markup, or because we should not escape or format its contents in any way. When the source line is read, we call the mask() method to identify those inliners (link, mono, macro, raw, tagged) and change each one for an (ugly) internal identifier. For example, ''this'' will become vvvTAGGED0vvv. The number increases as other inliners of the same type are found. The method undo() is called at the end of the line processing, expanding all masks back to their original (untouched) content. """ def __init__(self): self.linkmask = 'vvvLINKNNNvvv' # NNN will be replaced by the index self.monomask = 'vvvMONONNNvvv' self.macromask = 'vvvMACRONNNvvv' self.rawmask = 'vvvRAWNNNvvv' self.taggedmask = 'vvvTAGGEDNNNvvv' self.mathmask = 'vvvMATHNNNvvv' self.tocmask = 'vvvTOCvvv' self.linkmaskre = re.compile('vvvLINK(\d+)vvv') self.monomaskre = re.compile('vvvMONO(\d+)vvv') self.macromaskre = re.compile('vvvMACRO(\d+)vvv') self.rawmaskre = re.compile('vvvRAW(\d+)vvv') self.taggedmaskre = re.compile('vvvTAGGED(\d+)vvv') self.mathmaskre = re.compile('vvvMATH(\d+)vvv') self.macroman = MacroMaster() self.reset() def reset(self): self.linkbank = [] self.monobank = [] self.macrobank = [] self.rawbank = [] self.taggedbank = [] self.mathbank = [] self.math_masks = [] def mask(self, line=''): global AUTOTOC # The verbatim, raw, tagged and math inline marks are mutually exclusive. # This means that one can't appear inside the other. # If found, the inner marks must be ignored. # Example: ``foo ""bar"" ''baz''`` # In HTML: foo ""bar"" ''baz'' # # The trick here is to protect the mark who appears first on the line. # The four regexes are tried and the one with the lowest index wins. # If none is found (else), we get out of the loop. # while True: # Try to match the line for the three marks # Note: 'z' > 99999999... # t = r = v = m = 'z' try: t = regex['tagged'].search(line).start() except: pass try: r = regex['raw'].search(line).start() except: pass try: v = regex['fontMono'].search(line).start() except: pass try: m = regex['math'].search(line).start() except: pass # Protect tagged text if t >= 0 and t < r and t < v and t < m: txt = regex['tagged'].search(line).group(1) txt = doProtect(TARGET, txt) i = len(self.taggedbank) self.taggedbank.append(txt) mask = self.taggedmask.replace('NNN', str(i)) line = regex['tagged'].sub(mask, line, 1) # Protect raw text elif r >= 0 and r < t and r < v and r < m: txt = regex['raw'].search(line).group(1) txt = doEscape(TARGET, txt) i = len(self.rawbank) self.rawbank.append(txt) mask = self.rawmask.replace('NNN', str(i)) line = regex['raw'].sub(mask, line, 1) # Protect verbatim text elif v >= 0 and v < t and v < r and v < m: txt = regex['fontMono'].search(line).group(1) txt = doEscape(TARGET, txt) i = len(self.monobank) self.monobank.append(txt) mask = self.monomask.replace('NNN', str(i)) line = regex['fontMono'].sub(mask, line, 1) # Protect math formula elif m >= 0 and m < t and m < v and m < r: txt = regex['math'].search(line).group(1) txt = doEscape(TARGET, txt) i = len(self.mathbank) self.mathbank.append(txt) mask = self.mathmask.replace('NNN', str(i)) self.math_masks.append(mask) line = regex['math'].sub(mask, line, 1) else: break # Protect macros while regex['macros'].search(line): txt = regex['macros'].search(line).group() i = len(self.macrobank) self.macrobank.append(txt) mask = self.macromask.replace('NNN', str(i)) line = regex['macros'].sub(mask, line, 1) # Protect TOC location while regex['toc'].search(line): line = regex['toc'].sub(self.tocmask, line) AUTOTOC = 0 # Protect URLs and emails while regex['linkmark'].search(line) or \ regex['link'].search(line): # Try to match plain or named links match_link = regex['link'].search(line) match_named = regex['linkmark'].search(line) # Define the current match if match_link and match_named: # Both types found, which is the first? m = match_link if match_named.start() < match_link.start(): m = match_named else: # Just one type found, we're fine m = match_link or match_named # Extract link data and apply mask if m == match_link: # plain link link = m.group() label = '' link_re = regex['link'] else: # named link link = fix_relative_path(m.group('link')) label = m.group('label').rstrip() link_re = regex['linkmark'] # Save link data to the link bank i = len(self.linkbank) self.linkbank.append((label, link)) # Mask the link mark in the original line mask = self.linkmask.replace('NNN', str(i)) line = link_re.sub(mask, line, 1) return line def undo(self, line): # url & email matches = list(self.linkmaskre.finditer(line)) while matches: m = matches.pop() i = int(m.group(1)) label, url = self.linkbank[i] link = get_tagged_link(label, url) line = line[0:m.start()] + link + line[m.end():] # Expand macros matches = list(self.macromaskre.finditer(line)) while matches: m = matches.pop() i = int(m.group(1)) macro = self.macroman.expand(self.macrobank[i]) line = line[0:m.start()] + macro + line[m.end():] # Expand verb matches = list(self.monomaskre.finditer(line)) while matches: m = matches.pop() i = int(m.group(1)) open_, close = TAGS['fontMonoOpen'], TAGS['fontMonoClose'] line = line[0:m.start()] + open_ + self.monobank[i] + close + line[m.end():] # Expand raw matches = list(self.rawmaskre.finditer(line)) while matches: m = matches.pop() i = int(m.group(1)) line = line[0:m.start()] + self.rawbank[i] + line[m.end():] # Expand tagged matches = list(self.taggedmaskre.finditer(line)) while matches: m = matches.pop() i = int(m.group(1)) line = line[0:m.start()] + self.taggedbank[i] + line[m.end():] # Expand math matches = list(self.mathmaskre.finditer(line)) while matches: mask = self.math_masks.pop() m = matches.pop() i = int(m.group(1)) if TARGET in ['aat', 'tex', 'html5']: mathp = math_print(TARGET, self.mathbank[i]).split("\n") if not isinstance(line, list): line_start = line[0:m.start()] line_end = line[m.end():] else: midline_start = line[middle][0:m.start()] midline_end = line[middle][m.end():] if TARGET == 'aat': if len(mathp) > 1: middle = len(mathp) / 2 if not isinstance(line, list): line = [" " * len(line_start) + l for l in mathp[:middle]] + [line_start + mathp[middle] + line_end] + [" " * len(line_start) + l for l in mathp[middle+1:]] else: diff_mask = len(mask) - len(mathp[middle]) diff_line = len(line) - len(mathp) up_lines = [''] * (abs(diff_line)/2) down_lines = [''] * (abs(diff_line) - (abs(diff_line)/2)) if diff_line < 0: line = up_lines + line + down_lines else: mathp = up_lines + mathp + down_lines middle = len(mathp) / 2 if diff_mask < 0: line = [" " * len(midline_start) + m + " " * abs(diff_mask) + l[len(midline_start + m):] for m, l in zip(mathp[:middle], line[:middle])] + [midline_start + mathp[middle] + midline_end] + [" " * len(midline_start) + m + " " * abs(diff_mask) + l[len(midline_start + m):] for m, l in zip(mathp[middle+1:], line[middle+1:])] else: line = [" " * len(midline_start) + m + l[len(midline_start + m) + diff_mask:] for m, l in zip(mathp[:middle], line[:middle])] + [midline_start + mathp[middle] + midline_end] + [" " * len(midline_start) + m + l[len(midline_start + m) + diff_mask:] for m, l in zip(mathp[middle+1:], line[middle+1:])] else: if not isinstance(line, list): line = line_start + mathp[0] + line_end else: diff_mask = len(mask) - len(mathp[0]) if diff_mask < 0: line = [" " * abs(diff_mask) + l for l in line[:middle]] + [midline_start + mathp[0] + midline_end] + [" " * abs(diff_mask) + l for l in line[middle+1:]] else: line = [l[diff_mask:] for l in line[:middle]] + [midline_start + mathp[0] + midline_end] + [l[diff_mask:] for l in line[middle+1:]] elif TARGET == 'tex': line = line_start + '$' + mathp[0] + '$' + line_end elif TARGET == 'html5': line = '\n'.join([line_start, ' $'] + mathp[2:-1] + [line_end])
else:
line = line_start + self.mathbank[i] + line_end

return line

##############################################################################

class TitleMaster:
"Title things"
def __init__(self):
self.count = ['', 0, 0, 0, 0, 0]
self.toc = []
self.level = 0
self.kind = ''
self.txt = ''
self.label = ''
self.tag = ''
self.tag_hold = []
self.last_level = 0
self.count_id = ''
self.anchor_count = 0
self.anchor_prefix = 'toc'

def _open_close_blocks(self):
"Open new title blocks, closing the previous (if any)"
if not rules['titleblocks']:
return
tag = ''
last = self.last_level
curr = self.level

# Same level, just close the previous
if curr == last:
tag = TAGS.get('title%dClose' % last)
if tag:
self.tag_hold.append(tag)

# Section -> subsection, more depth
while curr > last:
last += 1

# Open the new block of subsections
tag = TAGS.get('blockTitle%dOpen' % last)
if tag:
self.tag_hold.append(tag)

# Jump from title1 to title3 or more
# Fill the gap with an empty section
if curr - last > 0:
tag = TAGS.get('title%dOpen' % last)
tag = regex['x'].sub('', tag) # del \a
if tag:
self.tag_hold.append(tag)

# Section <- subsection, less depth
while curr < last:
# Close the current opened subsection
tag = TAGS.get('title%dClose' % last)
if tag:
self.tag_hold.append(tag)

# Close the current opened block of subsections
tag = TAGS.get('blockTitle%dClose' % last)
if tag:
self.tag_hold.append(tag)

last -= 1

# Close the previous section of the same level
# The subsections were under it
if curr == last:
tag = TAGS.get('title%dClose' % last)
if tag:
self.tag_hold.append(tag)

def add(self, line):
"Parses a new title line."
if not line:
return
self._set_prop(line)
self._open_close_blocks()
self._set_count_id()
self._set_label()
self._save_toc_info()

def close_all(self):
"Closes all opened title blocks"
ret = []
ret.extend(self.tag_hold)
while self.level:
tag = TAGS.get('title%dClose' % self.level)
if tag:
ret.append(tag)
tag = TAGS.get('blockTitle%dClose' % self.level)
if tag:
ret.append(tag)
self.level -= 1
return ret

def _save_toc_info(self):
"Save TOC info, used by self.dump_marked_toc()"
self.toc.append((self.level, self.count_id, self.txt, self.label))

def _set_prop(self, line=''):
"Extract info from original line and set data holders."
# Detect title type (numbered or not)
id_ = line.lstrip()[0]
if id_ == '=':
kind = 'title'
elif id_ == '+':
kind = 'numtitle'
else:
Error("Unknown Title ID '%s'" % id_)
# Extract line info
match = regex[kind].search(line)
level = len(match.group('id'))
txt = match.group('txt').strip()
label = match.group('label')
# Parse info & save
if CONF['enum-title']:
kind = 'numtitle' # force
if rules['titleblocks']:
self.tag = TAGS.get('%s%dOpen' % (kind, level)) or \
TAGS.get('title%dOpen' % level)
else:
self.tag = TAGS.get(kind + str(level)) or \
TAGS.get('title' + str(level))
self.last_level = self.level
self.kind = kind
self.level = level
self.txt = txt
self.label = label

def _set_count_id(self):
"Compose and save the title count identifier (if needed)."
count_id = ''
if self.kind == 'numtitle' and not rules['autonumbertitle']:
# Manually increase title count
self.count[self.level] += 1
# Reset sublevels count (if any)
max_levels = len(self.count)
if self.level < max_levels - 1:
for i in xrange(self.level + 1, max_levels):
self.count[i] = 0
# Compose count id from hierarchy
for i in xrange(self.level):
count_id = "%s%d." % (count_id, self.count[i + 1])
self.count_id = count_id

def _set_label(self):
"Compose and save title label, used by anchors."
# Remove invalid chars from label set by user
self.label = re.sub('[^A-Za-z0-9_-]', '', self.label or '')
# Generate name as 15 first :alnum: chars
#TODO how to translate safely accented chars to plain?
#self.label = re.sub('[^A-Za-z0-9]', '', self.txt)[:15]
# 'tocN' label - sequential count, ignoring 'toc-level'
#self.label = self.anchor_prefix + str(len(self.toc) + 1)

def _get_tagged_anchor(self):
"Return anchor if user defined a label, or TOC is on."
ret = ''
label = self.label
if CONF['toc'] and self.level <= CONF['toc-level']:
# This count is needed bcos self.toc stores all
# titles, regardless of the 'toc-level' setting,
# so we can't use self.toc length to number anchors
self.anchor_count += 1
# Autonumber label (if needed)
label = label or '%s%s' % (self.anchor_prefix, self.anchor_count)
if label and TAGS['anchor']:
ret = regex['x'].sub(label, TAGS['anchor'])
return ret

def _get_full_title_text(self):
"Returns the full title contents, already escaped."
ret = self.txt
# Insert count_id (if any) before text
if self.count_id:
ret = '%s %s' % (self.count_id, ret)
# Escape specials
ret = doEscape(TARGET, ret)
# Same targets needs final escapes on title lines
# It's here because there is a 'continue' after title
if rules['finalescapetitle']:
ret = doFinalEscape(TARGET, ret)
return ret

def get(self):
"Returns the tagged title as a list."
global AA_TITLE, AA_COUNT
ret = []

# Maybe some anchoring before?
anchor = self._get_tagged_anchor()
self.tag = regex['_anchor'].sub(anchor, self.tag)

### Compose & escape title text (TOC uses unescaped)
full_title = self._get_full_title_text()

# Close previous section area
ret.extend(self.tag_hold)
self.tag_hold = []

tagged = regex['x'].sub(full_title, self.tag)

if rules['tableonly']:
AA_TITLE = full_title.replace(' ', '_')
AA_COUNT = 0

# Adds "underline" on TXT target
if TARGET == 'txt':
if BLOCK.count > 1:
ret.append('') # blank line before
ret.append(tagged)
i = aa_lencjk(full_title)
ret.append(regex['x'].sub('=' * i, self.tag))
elif TARGET == 'aat' and self.level == 1:
if CONF['slides']:
AA_TITLE = tagged
else :
if BLOCK.count > 1:
ret.append('') # blank line before
box = aa_box([tagged], AA, CONF['width'])
if CONF['web'] and CONF['toc']:
ret.extend([anchor] + box + [''])
else:
ret.extend(box)
elif TARGET == 'aat':
level = 'level' + str(self.level)
if BLOCK.count > 1:
ret.append('') # blank line before
if CONF['slides']:
under = aa_under(tagged, AA[level], CONF['width'] - 2, False)
else:
under = aa_under(tagged, AA[level], CONF['width'], False)
if CONF['web'] and CONF['toc'] and self.level <= CONF['toc-level'] and not CONF['slides']:
ret.extend([anchor] + under + [''])
else:
ret.extend(under)
elif TARGET == 'rst' and self.level == 1:
if BLOCK.count > 1:
ret.append('') # blank line before
ret.extend(aa_under(tagged, RST['level1'], 10000, True))
elif TARGET == 'rst':
level = 'level' + str(self.level)
if BLOCK.count > 1:
ret.append('') # blank line before
ret.extend(aa_under(tagged, RST[level], 10000, False))
else:
ret.append(tagged)
return ret

def dump_marked_toc(self, max_level=99):
"Dumps all toc itens as a valid t2t-marked list"
ret = []
toc_count = 1
head = 0
if CONF['headers'] and CONF['header1']:
head = 1
for level, count_id, txt, label in self.toc:
if level > max_level: # ignore
continue
indent = ' ' * level
id_txt = ('%s %s' % (count_id, txt)).lstrip()
if CONF['target'] == 'aat' and CONF['slides']:
indent = ' ' * (level - 1)
if CONF['web'] and not CONF['toc-only']:
label = str(AA_PW_TOC[txt] / CONF['height'] + head + 2) + '.0'
label = label or self.anchor_prefix + str(toc_count)
toc_count += 1

# TOC will have crosslinks to anchors
if TAGS['anchor']:
if CONF['enum-title'] and level == 1:
# 1. [Foo #anchor] is more readable than [1. Foo #anchor] in level 1.
# This is a stoled idea from Windows .CHM help files.
tocitem = '%s+ [""%s"" #%s]' % (indent, txt, label)
else:
tocitem = '%s- [""%s"" #%s]' % (indent, id_txt, label)

# TOC will be plain text (no links)
else:
if rules['plaintexttoc'] and not CONF['slides']:
# For these, the list is not necessary, just dump the text
tocitem = '%s""%s""' % (indent, id_txt)
elif TARGET == 'aat' and CONF['enum-title'] and level == 1:
tocitem = '%s+ ""%s""' % (indent, txt)
else:
tocitem = '%s- ""%s""' % (indent, id_txt)
ret.append(tocitem)
return ret

##############################################################################

# Table syntax reference for targets:
# http://www.mediawiki.org/wiki/Help:Tables
# http://moinmo.in/HelpOnMoinWikiSyntax#Tables
# http://moinmo.in/HelpOnTables
# http://www.wikicreole.org/wiki/Creole1.0#section-Creole1.0-Tables
# http://www.wikicreole.org/wiki/Tables
# http://www.pmwiki.org/wiki/PmWiki/Tables
# http://www.dokuwiki.org/syntax#tables
# http://michelf.com/projects/php-markdown/extra/#table
# http://code.google.com/p/support/wiki/WikiSyntax#Tables
# http://www.biblioscape.com/rtf15_spec.htm
# http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#tables
#
# Good reading:
# http://www.wikicreole.org/wiki/ListOfTableMarkups
#
# See also:
# test/marks/table.t2t
# test/target/table.t2t

#TODO check all this table mess
# It uses parse_row properties for table lines
# BLOCK.table() replaces the cells by the parsed content
#
class TableMaster:
def __init__(self, line=''):
self.rows = []
self.border = 0
self.align = 'Left'
self.cellalign = []
self.colalign = []
self.cellspan = []
if line:
prop = self.parse_row(line)
self.border = prop['border']
self.title = prop['title']
self.vert_head = prop['vert_head']
self.align = prop['align']
self.cellalign = prop['cellalign']
self.cellspan = prop['cellspan']
self.n_cols = sum(self.cellspan)
self.colalign = self._get_col_align()

def _get_col_align(self):
colalign = []
for cell in range(0, len(self.cellalign)):
align = self.cellalign[cell]
span = self.cellspan[cell]
colalign.extend([align] * span)
return colalign

def _get_full_tag(self, topen):
# topen = TAGS['tableOpen']
tborder = TAGS['_tableBorder']
talign = TAGS['_tableAlign' + self.align]
calignsep = TAGS['tableColAlignSep']
calign = ''

# The first line defines if table has border or not
if not self.border:
tborder = ''
# Set the columns alignment
if rules['tablecellaligntype'] == 'column':
calign = map(lambda x: TAGS['_tableColAlign%s' % x], self.colalign)
calign = calignsep.join(calign)
# Align full table, set border and Column align (if any)
topen = regex['_tableAlign'].sub(talign , topen)
topen = regex['_tableBorder'].sub(tborder, topen)
topen = regex['_tableColAlign'].sub(calign , topen)
# Tex table spec, border or not: {|l|c|r|} , {lcr}
if calignsep and not self.border:
# Remove cell align separator
topen = topen.replace(calignsep, '')
return topen

def _get_cell_align(self, cells):
ret = []
for cell in cells:
align = 'Left'
if cell.strip():
if cell[0] == ' ' and cell[-1] == ' ':
align = 'Center'
elif cell[0] == ' ':
align = 'Right'
ret.append(align)
return ret

def _get_cell_span(self, cells):
ret = []
for cell in cells:
span = 1
m = re.search('\a(\|+)$', cell)
if m:
span = len(m.group(1)) + 1
ret.append(span)
return ret

def _tag_cells(self, rowdata):
cells = rowdata['cells']
open_ = TAGS['tableCellOpen']
close = TAGS['tableCellClose']
sep = TAGS['tableCellSep']
head = TAGS['tableCellHead']
caligntag = map(lambda x: TAGS['tableCellAlign' + x], rowdata['cellalign'])
if TARGET == 'ods' and self.border:
rowdata['cellalign'] = [align + 'Border' for align in rowdata['cellalign']]
calign = map(lambda x: TAGS['_tableCellAlign' + x], rowdata['cellalign'])
calignsep = TAGS['tableColAlignSep']
ncolumns = len(self.colalign)

# Populate the span and multicol open tags
cspan = []
ccovered = []
multicol = []
colindex = 0

thisspan = 0
spanmultiplier = rules['cellspanmultiplier'] or 1
spanprofil = []

cellhead = []
cellbody = []

for cellindex in range(0, len(rowdata['cellspan'])):

span = rowdata['cellspan'][cellindex]
align = rowdata['cellalign'][cellindex]

# hack to get cell size/span into rtf, in twips
if rules['cellspancumulative']:
thisspan += span
else:
thisspan = span
span = thisspan * spanmultiplier

if span > 1:

if TAGS['_tableCellColSpanChar']:
# spanchar * n
cspan.append(TAGS['_tableCellColSpanChar'] * (span - 1))
# Note: using -1 for moin, where spanchar == cell delimiter
else:
# \a replaced by n
cspan.append(regex['x'].sub(str(span), TAGS['_tableCellColSpan']))
ccovered.append(regex['x'].sub(str(span - 1), TAGS['tableCellCovered']))

mcopen = regex['x'].sub(str(span), TAGS['_tableCellMulticolOpen'])
multicol.append(mcopen)
else:
cspan.append('')
ccovered.append('')

if colindex < ncolumns and align != self.colalign[colindex]:
mcopen = regex['x'].sub('1', TAGS['_tableCellMulticolOpen'])
multicol.append(mcopen)
else:
multicol.append('')

if not self.border:
multicol[-1] = multicol[-1].replace(calignsep, '')

colindex += span

# Maybe is it a title row?
if rowdata['title']:
# Defaults to normal cell tag if not found
open_ = TAGS['tableTitleCellOpen'] or open_
close = TAGS['tableTitleCellClose'] or close
sep = TAGS['tableTitleCellSep'] or sep
head = TAGS['tableTitleCellHead'] or head

# Should we break the line on *each* table cell?
if rules['breaktablecell']:
close = close + '\n'

# Cells pre processing
if rules['tablecellstrip']:
cells = map(lambda x: x.strip(), cells)
if rowdata['title'] and rules['tabletitlerowinbold']:
cells = map(lambda x: enclose_me('fontBold', x), cells)

# Add cell BEGIN/END tags
for i, cell in enumerate(cells):

# Lout requires special routines, cells are labeled according alphabet
if TARGET == 'lout':
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
if len(rowdata['cellspan']) < self.n_cols:
tblabel = alphabet[sum(rowdata['cellspan'][0:i])]
else:
tblabel = alphabet[i]
copen = re.sub('[\x07]', '', tblabel + open_ )
else:
copen = open_
cclose = close
chead = head
if self.vert_head and i == 0:
if TARGET == 'lout':
copen = copen + TAGS['tableTitleCellOpen']
cclose = cclose + TAGS['tableTitleCellClose']
else:
copen = TAGS['tableTitleCellOpen'] or copen
cclose = TAGS['tableTitleCellClose'] or cclose

if rules['breaktablecell']:
cclose = cclose + '\n'

# Make sure we will pop from some filled lists
# Fixes empty line bug '| |'
this_align = this_cell = this_span = this_mcopen = ''
if calign:
this_align = calign.pop(0)
if caligntag:
this_cell = caligntag.pop(0)
if cspan:
this_span = cspan.pop(0)
if ccovered:
this_covered = ccovered.pop(0)
if multicol:
this_mcopen = multicol.pop(0)

# Insert cell align into open tag (if cell is alignable)
if rules['tablecellaligntype'] == 'cell':
copen = regex['_tableCellAlign'].sub(this_align, copen)
cclose = regex['_tableCellAlign'].sub(this_align, cclose)
chead = regex['_tableCellAlign'].sub(this_align, chead)

# Insert cell data into cellAlign tags
if this_cell:
cell = regex['x'].sub(cell, this_cell)

# Insert cell span into open tag (if cell is spannable)
if rules['tablecellspannable']:
copen = regex['_tableCellColSpan'].sub(this_span, copen)
cclose = regex['_tableCellColSpan'].sub(this_span, cclose)
chead = regex['_tableCellColSpan'].sub(this_span, chead)

if rules['tablecellcovered']:
cclose = cclose + this_covered

# Use multicol tags instead (if multicol supported, and if
# cell has a span or is aligned differently to column)
if rules['tablecellmulticol']:
if this_mcopen:
copen = regex['_tableColAlign'].sub(this_align, this_mcopen)
cclose = TAGS['_tableCellMulticolClose']

# RTF target requires the border in each cell
border = ''
if self.border:
border = TAGS['_tableCellBorder']
copen = regex['_tableBorder'].sub(border, copen)
cclose = regex['_tableBorder'].sub(border, cclose)
chead = regex['_tableBorder'].sub(border, chead)

# Attribute delimiter, added when align/span attributes were used
# Example: Wikipedia table cell, without and with attributes:
# | cell contents
# | align="right" colspan="2" | cell contents
#
if regex['_tableAttrDelimiter'].search(copen):
if this_align or this_span:
copen = regex['_tableAttrDelimiter'].sub(TAGS['_tableAttrDelimiter'], copen)
else:
copen = regex['_tableAttrDelimiter'].sub('', copen) # remove marker

if chead:
cellhead.append(chead)
cellbody.append(copen + cell + cclose)

# Maybe there are cell separators?
return ''.join(cellhead) + sep.join(cellbody)

def add_row(self, cells):
self.rows.append(cells)

def parse_row(self, line):
# Default table properties
ret = {
'border': 0, 'title': 0, 'vert_head': 0, 'align': 'Left',
'cells': [], 'cellalign': [], 'cellspan': []
}
# Detect table align (and remove spaces mark)
if line[0] == ' ':
ret['align'] = 'Center'
line = line.lstrip()
# Detect vertical header mark
if line[1] == '_':
ret['vert_head'] = 1
line = line[0] + line[2:]
# Detect horizontal and vertical headers mark
if line[1] == '/':
ret['vert_head'] = 1
line = line[0] + '|' + line[2:]
# Detect title mark
if line[1] == '|':
ret['title'] = 1
# Detect border mark and normalize the EOL
m = re.search(' (\|+) *$', line)
if m:
line = line + ' '
ret['border'] = 1
else:
line = line + ' | '
# Delete table mark
line = regex['table'].sub('', line)
# Detect colspan | foo | bar baz |||
line = re.sub(' (\|+)\| ', '\a\\1 | ', line)
# Split cells (the last is fake)
ret['cells'] = line.split(' | ')[:-1]
# Find cells span
ret['cellspan'] = self._get_cell_span(ret['cells'])
# Remove span ID
ret['cells'] = map(lambda x: re.sub('\a\|+$', '', x), ret['cells'])
# Find cells align
ret['cellalign'] = self._get_cell_align(ret['cells'])
# Hooray!
Debug('Table Prop: %s' % ret, 7)
return ret

def dump(self):
open_ = self._get_full_tag(TAGS['tableOpen'])
if TARGET in ['ods', 'csv', 'csvs']:
global AA_COUNT
if AA_TITLE:
if AA_COUNT:
if TARGET == 'ods':
open_ = re.sub('table_name', AA_TITLE + '_', open_)
open_ = re.sub('n_table', str(AA_COUNT), open_)
else:
csv_file = AA_TITLE + '_' + str(AA_COUNT)
AA_COUNT += 1
else:
if TARGET == 'ods':
open_ = re.sub('table_name', AA_TITLE, open_)
open_ = re.sub('n_table', '', open_)
else:
csv_file = AA_TITLE
AA_COUNT = 2
else:
if TARGET == 'ods':
open_ = re.sub('n_table', str(BLOCK.tablecount), open_)
open_ = re.sub('table_name', _('Sheet'), open_)
else:
csv_file = _('Sheet') + str(BLOCK.tablecount)
if rules['tablenumber']:
open_ = re.sub('n_table', str(BLOCK.tablecount), open_)
if rules['tablecolumnsnumber']:
# Lout requires alphabetical index
alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
open_ = re.sub('n_cols', str(self.n_cols), open_)
rows = self.rows
close = self._get_full_tag(TAGS['tableClose'])

rowopen = self._get_full_tag(TAGS['tableRowOpen'])
rowclose = self._get_full_tag(TAGS['tableRowClose'])
rowsep = self._get_full_tag(TAGS['tableRowSep'])
titrowopen = self._get_full_tag(TAGS['tableTitleRowOpen']) or rowopen
titrowclose = self._get_full_tag(TAGS['tableTitleRowClose']) or rowclose

if rules['breaktablelineopen']:
rowopen = rowopen + '\n'
titrowopen = titrowopen + '\n'

# Tex gotchas
if TARGET in ['tex', 'texs']:
if not self.border:
rowopen = titrowopen = ''
else:
close = rowopen + close

# Now we tag all the table cells on each row
#tagged_cells = map(lambda x: self._tag_cells(x), rows) #!py15
tagged_cells = []
for cell in rows:
tagged_cells.append(self._tag_cells(cell))

# Add row separator tags between lines
tagged_rows = []
if rowsep:
#!py15
#tagged_rows = map(lambda x: x + rowsep, tagged_cells)
for cell in tagged_cells:
tagged_rows.append(cell + rowsep)
# Remove last rowsep, because the table is over
tagged_rows[-1] = tagged_rows[-1].replace(rowsep, '')
# Add row BEGIN/END tags for each line
else:
for rowdata in rows:
if rowdata['title']:
o, c = titrowopen, titrowclose
else:
o, c = rowopen, rowclose
row = tagged_cells.pop(0)
if TARGET == 'lout':
calign = map(lambda x: TAGS['_tableCellAlign' + x], rowdata['cellalign'])
element = rowdata['cellspan']
spandef = ''
regdef = ''
# In Lout the format of a row can be defined once after table
# setup or for each row separately. For automatization the
# second approach is chosen.
formatdef = 'format { '

for i in range(0, len(element)):
regdef += ' @Cell '+ calign[i] + alphabet[i] + ' |'
regdef = formatdef + re.sub('\|$','',regdef) + ' }\n'

# Set up Lout definition for spanning cells
if 1 <= len(element) < self.n_cols:
spandef += formatdef
inccol = 0
for i in range(0, len(element)):
if (element[i] == 1 and i == 0) or (element[i] == 1 and element[i-1] == 1):
spandef += ' @Cell ' + calign[i] + alphabet[i+inccol] + ' |'
if element[i] == 1 and i > 0 and element[i-1] > 1:
spandef += ' @Cell ' + calign[i] + alphabet[i+inccol] + ' |'
if element[i] > 1:
spandef += ' @StartHVSpan @Cell ' + calign[i] + alphabet[i+inccol] + ' |'
for j in range(1, element[i]):
spandef += ' @HSpan |'
inccol += 1
spandef = re.sub('\|$','',spandef)
spandef += '}\n'
regdef = spandef

if rowdata['title']:
# Necessary to get a heading over every page on Multi-page
# tables AND over first row
tagged_rows.append(titrowopen + regdef + row + titrowclose)
tagged_rows.append(rowopen + regdef + row + rowclose)
else:
tagged_rows.append(o + regdef + row + c)

else:
tagged_rows.append(o + row + c)

# Join the pieces together
fulltable = []
if open_:
fulltable.append(open_)
fulltable.extend(tagged_rows)
if close:
fulltable.append(close)

if TARGET in ['csv', 'csvs']:
file_dict[csv_file + '.' + TARGET] = fulltable

return fulltable

##############################################################################

class BlockMaster:
"TIP: use blockin/out to add/del holders"
def __init__(self):
self.BLK = []
self.HLD = []
self.PRP = []
self.depth = 0
self.count = 0
self.last = ''
self.tableparser = None
self.tablecount = 0
self.contains = {
'para' : ['comment', 'raw', 'tagged'],
'verb' : [],
'table' : ['comment'],
'raw' : [],
'tagged' : [],
'comment' : [],
'quote' : ['quote', 'comment', 'raw', 'tagged'],
'list' : ['list', 'numlist', 'deflist', 'para', 'verb', 'comment', 'raw', 'tagged'],
'numlist' : ['list', 'numlist', 'deflist', 'para', 'verb', 'comment', 'raw', 'tagged'],
'deflist' : ['list', 'numlist', 'deflist', 'para', 'verb', 'comment', 'raw', 'tagged'],
'bar' : [],
'title' : [],
'numtitle': [],
}
self.allblocks = self.contains.keys()

# If one is found inside another, ignore the marks
self.exclusive = ['comment', 'verb', 'raw', 'tagged']

# May we include bars inside quotes?
if rules['barinsidequote']:
self.contains['quote'].append('bar')

def block(self):
if not self.BLK:
return ''
return self.BLK[-1]

def isblock(self, name=''):
return self.block() == name

def prop(self, key):
if not self.PRP:
return ''
return self.PRP[-1].get(key) or ''

def propset(self, key, val):
self.PRP[-1][key] = val
#Debug('BLOCK prop ++: %s->%s' % (key, repr(val)), 1)
#Debug('BLOCK props: %s' % (repr(self.PRP)), 1)

def hold(self):
if not self.HLD:
return []
return self.HLD[-1]

def holdadd(self, line):
if self.block().endswith('list'):
line = [line]
self.HLD[-1].append(line)
Debug('HOLD add: %s' % repr(line), 4)
Debug('FULL HOLD: %s' % self.HLD, 4)

def holdaddsub(self, line):
self.HLD[-1][-1].append(line)
Debug('HOLD addsub: %s' % repr(line), 4)
Debug('FULL HOLD: %s' % self.HLD, 4)

def holdextend(self, lines):
if self.block().endswith('list'):
lines = [lines]
self.HLD[-1].extend(lines)
Debug('HOLD extend: %s' % repr(lines), 4)
Debug('FULL HOLD: %s' % self.HLD, 4)

def blockin(self, block):
ret = []
if block not in self.allblocks:
Error("Invalid block '%s'" % block)

# First, let's close other possible open blocks
while self.block() and block not in self.contains[self.block()]:
ret.extend(self.blockout())

# Now we can gladly add this new one
self.BLK.append(block)
self.HLD.append([])
self.PRP.append({})
self.count += 1
if block == 'table':
self.tableparser = TableMaster()
# Deeper and deeper
self.depth = len(self.BLK)
Debug('block ++ (%s): %s' % (block, self.BLK), 3)
return ret

def blockout(self):
if not self.BLK:
Error('No block to pop')
blockname = self.BLK.pop()
result = getattr(self, blockname)()
parsed = self.HLD.pop()
self.PRP.pop()
self.depth = len(self.BLK)

if rules['tableonly'] and blockname != 'table':
return []

if blockname == 'table':
del self.tableparser

# Inserting a nested block into mother
if self.block():
if blockname != 'comment': # ignore comment blocks
if self.block().endswith('list'):
self.HLD[-1][-1].append(result)
else:
self.HLD[-1].append(result)
# Reset now. Mother block will have it all
result = []

Debug('block -- (%s): %s' % (blockname, self.BLK), 3)
Debug('RELEASED (%s): %s' % (blockname, parsed), 3)

# Save this top level block name (produced output)
# The next block will use it
if result:
self.last = blockname
if rules['iswrapped']:
final = []
if TARGET != 'aat':
for line in result:
if not line or blockname in ('verb', 'tagged'):
final.append(line)
else:
final.extend(aa_wrap(line, CONF['width'], False))
elif CONF['slides'] and blockname in ('list', 'numlist', 'deflist'):
final.extend(aa_box(result, AA, CONF['width'], False, web=CONF['web'], slides=CONF['slides']))
else:
for line in result:
if not line or (blockname in ('table', 'tagged', 'verb') and not CONF['slides']) or (blockname == 'quote' and CONF['slides']):
final.append(line)
else:
if CONF['slides'] and blockname in ('table', 'tagged', 'verb'):
final.append(line[:CONF['width']])
elif CONF['slides']:
final.extend(' ' + line for line in aa_wrap(line, CONF['width'] - 2, CONF['web']))
else:
final.extend(aa_wrap(line, CONF['width'], CONF['web']))
result = final[:]

Debug('BLOCK: %s' % result, 6)

# ASCII Art processing
global AA_TITLE, AA_COUNT, AA_PW_TOC, AA_IMG
if TARGET == 'aat' and CONF['slides'] and not CONF['toc-only'] and not CONF.get('art-no-title'):
len_res = len(result)
for el in result:
if ' CONF['height'] - 6:
result[CONF['height'] - 7] = result[CONF['height'] - 7] + '$
' for i, line in enumerate(result[CONF['height'] - 6:]): j = i % CONF['height'] if j == 0: result[i + CONF['height'] - 6] = '
' + line elif j == CONF['height'] - 1: result[i + CONF['height'] - 6] = line + '
' result = spaces + [end_line + ''] + slide_head + [''] + result elif CONF['print']: result = spaces + [end_line + ''] + slide_head + [''] + result else: result = spaces + [end_line] + slide_head + [''] + result if (blockname in ["title", "numtitle"] and TITLE.level == 1) or not AA_TITLE: if not AA_TITLE: AA_TITLE = os.path.splitext(CONF['sourcefile'])[0].capitalize() end_line = aa_line(AA['bar2'], CONF['width']) aa_title = aa_slide(AA_TITLE, AA['bar2'], CONF['width'], CONF['web']) + [''] if AA_COUNT: if CONF['web']: aa_title = spaces + [end_line + ''] + aa_title elif CONF['print']: aa_title = spaces + [end_line + ''] + aa_title else: aa_title = spaces + [end_line] + aa_title result = aa_title + result AA_COUNT += len(result) + AA_IMG AA_IMG = 0 if blockname in ["title", "numtitle"]: AA_PW_TOC[TITLE.txt] = AA_COUNT return result def _last_escapes(self, line): return doFinalEscape(TARGET, line) def _get_escaped_hold(self): ret = [] for line in self.hold(): linetype = type(line) if linetype == type('') or linetype == type(u''): ret.append(self._last_escapes(line)) elif linetype == type([]): ret.extend(line) else: Error("BlockMaster: Unknown HOLD item type: %s" % linetype) return ret def _remove_twoblanks(self, lastitem): if len(lastitem) > 1 and lastitem[-2:] == ['', '']: return lastitem[:-2] return lastitem def _should_add_blank_line(self, where, blockname): "Validates the blanksaround* rules" # Nestable blocks: only mother blocks (level 1) are spaced if blockname.endswith('list') and self.depth > 1: return False # The blank line after the block is always added if where == 'after' \ and rules['blanksaround' + blockname]: return True # # No blank before if it's the first block of the body # elif where == 'before' \ # and BLOCK.count == 1: # return False # # No blank before if it's the first block of this level (nested) # elif where == 'before' \ # and self.count == 1: # return False # The blank line before the block is only added if # the previous block haven't added a blank line # (to avoid consecutive blanks) elif where == 'before' \ and rules['blanksaround' + blockname] \ and not rules.get('blanksaround' + self.last): return True # Nested quotes are handled here, # because the mother quote isn't closed yet elif where == 'before' \ and blockname == 'quote' \ and rules['blanksaround' + blockname] \ and self.depth > 1: return True return False # functions to help encode block depth into RTF formatting def _apply_depth(self, line, level): # convert block depth into an indent in twips depth = level multiply = rules['blockdepthmultiply'] if depth > 0 and rules['depthmultiplyplus']: depth = depth + rules['depthmultiplyplus'] if multiply: depth = depth * multiply return regex['_blockDepth'].sub(str(depth), line) def _apply_list_level(self, line, level): mylevel = level if rules['listlevelzerobased']: mylevel = mylevel - 1 return regex['_listLevel'].sub(str(mylevel), line) def comment(self): return '' def raw(self): lines = self.hold() return map(lambda x: doEscape(TARGET, x), lines) def tagged(self): return self.hold() def para(self): result = [] open_ = TAGS['paragraphOpen'] close = TAGS['paragraphClose'] lines = self._get_escaped_hold() # Blank line before? if self._should_add_blank_line('before', 'para'): result.append('') # RTF needs depth level encoded into nested paragraphs mydepth = self.depth if rules['zerodepthparagraph']: mydepth = 0 open_ = self._apply_depth(open_, mydepth) # Open tag if open_: result.append(open_) # Pagemaker likes a paragraph as a single long line if rules['onelinepara']: result.append(' '.join(lines)) # Others are normal :) else: result.extend(lines) # Close tag if close: result.append(close) # Blank line after? if self._should_add_blank_line('after', 'para'): result.append('') # Very very very very very very very very very UGLY fix # Needed because can't appear inside
try: if len(lines) == 1 and \ TARGET in ('html', 'xhtml', 'xhtmls', 'wp') and \ re.match('^\s*
.*\s*$', lines[0]): result = [lines[0]] except: pass return result def verb(self): "Verbatim lines are not masked, so there's no need to unmask" result = [] open_ = TAGS['blockVerbOpen'] close = TAGS['blockVerbClose'] sep = TAGS['blockVerbSep'] # Blank line before? if self._should_add_blank_line('before', 'verb'): result.append('') # Open tag if open_: result.append(open_) # Get contents for line in self.hold(): if self.prop('mapped') == 'table': line = MacroMaster().expand(line) if not rules['verbblocknotescaped']: line = doEscape(TARGET, line) if TAGS['blockVerbLine']: line = TAGS['blockVerbLine'] + line if rules['indentverbblock']: line = ' ' + line if rules['verbblockfinalescape']: line = doFinalEscape(TARGET, line) result.append(line) if sep: result.append(sep) if sep: result.pop() # Close tag if close: result.append(close) # Blank line after? if self._should_add_blank_line('after', 'verb'): result.append('') return result def numtitle(self): return self.title('numtitle') def title(self, name='title'): result = [] # Blank line before? if self._should_add_blank_line('before', name): result.append('') # Get contents result.extend(TITLE.get()) # Blank line after? if self._should_add_blank_line('after', name): result.append('') return result def table(self): self.tablecount += 1 result = [] if TARGET == 'aat' and self.tableparser.rows: if CONF['spread']: data = spreadsheet(completes_table(self.tableparser.rows), rules['spreadmarkup'], rules['spreadgrid']) return aa_table(data, AA, CONF['width'], True, True, True, 'Center', True, CONF['web']) + [''] else: return aa_table(completes_table(self.tableparser.rows), AA, CONF['width'], self.tableparser.border, self.tableparser.title, self.tableparser.vert_head, self.tableparser.align, False, False) + [''] if TARGET == 'rst' and self.tableparser.rows: chars = AA.copy() if not self.tableparser.border: chars['border'] = '=' chars['tlcorner'] = chars['trcorner'] = chars['cross'] = chars['blcorner'] = chars['brcorner'] = chars['lcross'] = chars['side'] = chars['rcross'] = chars['tcross'] = chars['bcross'] = chars['lhhead'] = chars['rhhead'] = ' ' return aa_table(completes_table(self.tableparser.rows), chars, CONF['width'], self.tableparser.border, self.tableparser.title, False, 'Left', False, False) + [''] if TARGET == 'mgp' and self.tableparser.rows: aa_t = aa_table(completes_table(self.tableparser.rows), AA, CONF['width'], True, self.tableparser.title, False, 'Left', False, False) try: import aafigure t_name = 'table_' + str(self.tablecount) + '.png' aafigure.render(unicode('\n'.join(aa_t)), t_name, {'format':'png', 'background':'#000000', 'foreground':'#FFFFFF', 'textual':True}) return ['%center', '%newimage "' + t_name + '"'] except: return ['%font "mono"'] + aa_t + [''] if TARGET == 'db' and self.tableparser.rows: data = completes_table(self.tableparser.rows) n = max([len(line[0]) for line in data]) table = 'table_' + str(self.tablecount) if self.tableparser.title: cols = [s.strip().replace(' ', '_') for s in data[0][0]] del data[0] else: cols = [] for i in range(n): cols.append('col_' + str(i + 1)) cols_insert = ', '.join(cols) cols_create = ' text, '.join(cols) + ' text' sql_create = 'create table ' + table + ' (id integer primary key, ' + cols_create + ')' DBC.execute(sql_create) sql_insert = 'insert into ' + table + ' (' + cols_insert + ') values' + ' (' + ('?,' * n)[:-1] + ')' for line in data: DBC.execute(sql_insert, line[0]) DB.commit() # Blank line before? if self._should_add_blank_line('before', 'table'): result.append('') # Rewrite all table cells by the unmasked and escaped data lines = self._get_escaped_hold() for i in xrange(len(lines)): cells = lines[i].split(SEPARATOR) self.tableparser.rows[i]['cells'] = cells if rules['spread']: data = spreadsheet(completes_table(self.tableparser.rows), rules['spreadmarkup'], rules['spreadgrid']) self.tableparser.border, len_line = True, len(data[0][0]) self.tableparser.cellalign = len_line self.tableparser.colalign = len_line * ['Left'] if rules['spreadgrid']: self.tableparser.vert_head = True self.tableparser.rows = [{'cells':data[0][0], 'cellspan':data[0][1], 'cellalign':['Left']*len_line, 'title':1}] + self.tableparser.rows for i, row in enumerate(self.tableparser.rows[1:]): row['cells'], row['cellspan'], row['cellalign'], row['title'] = data[i+1][0], data[i+1][1], ['Left']*len_line, 0 else: for i, row in enumerate(self.tableparser.rows): row['cells'], row['cellspan'], row['cellalign'], row['title'] = data[i][0], data[i][1], ['Left']*len_line, 0 result.extend(self.tableparser.dump()) # Blank line after? if self._should_add_blank_line('after', 'table'): result.append('') return result def quote(self): result = [] open_ = TAGS['blockQuoteOpen'] # block based close = TAGS['blockQuoteClose'] qline = TAGS['blockQuoteLine'] # line based indent = tagindent = '\t' * self.depth # Apply rules if rules['tagnotindentable']: tagindent = '' if not rules['keepquoteindent']: indent = '' # Blank line before? if self._should_add_blank_line('before', 'quote'): result.append('') # RTF needs depth level encoded into almost everything open_ = self._apply_depth(open_, self.depth) # Open tag if open_: result.append(tagindent + open_) itemisclosed = False # Get contents if rules['onelinequote']: # XXX Dirty hack, won't work for nested blocks inside quote (when TABS are used in your t2t source), even subquotes result.append(' '.join([regex['quote'].sub('', x) for x in self.hold()])) else: for item in self.hold(): if type(item) == type([]): if close and rules['quotenotnested']: result.append(tagindent + close) itemisclosed = True result.extend(item) # subquotes else: if open_ and itemisclosed: result.append(tagindent + open_) item = regex['quote'].sub('', item) # del TABs item = self._last_escapes(item) if CONF['target'] == 'aat' and not CONF['slides']: result.extend(aa_quote(item, qline, ' ', CONF['width'], self.depth, CONF['web'])) elif CONF['target'] == 'aat' and CONF['slides']: result.extend(aa_box([item], AA, CONF['width'], web=CONF['web'], slides=CONF['slides'])) else: item = qline * self.depth + item result.append(indent + item) # quote line # Close tag if close and not itemisclosed: result.append(tagindent + close) # Blank line after? if self._should_add_blank_line('after', 'quote'): result.append('') return result def bar(self): result = [] bar_tag = '' # Blank line before? if self._should_add_blank_line('before', 'bar'): result.append('') # Get the original bar chars bar_chars = self.hold()[0].strip() # Set bar type if bar_chars.startswith('='): bar_tag = TAGS['bar2'] else: bar_tag = TAGS['bar1'] # To avoid comment tag confusion like (sgml) if TAGS['comment'].count('--'): bar_chars = bar_chars.replace('--', '__') # Get the bar tag (may contain \a) result.append(regex['x'].sub(bar_chars, bar_tag)) # Blank line after? if self._should_add_blank_line('after', 'bar'): result.append('') return result def deflist(self): return self.list('deflist') def numlist(self): return self.list('numlist') def list(self, name='list'): result = [] items = self.hold() indent = self.prop('indent') tagindent = indent listline = TAGS.get(name + 'ItemLine') itemcount = 0 if name == 'deflist': itemopen = TAGS[name + 'Item1Open'] itemclose = TAGS[name + 'Item2Close'] itemsep = TAGS[name + 'Item1Close'] +\ TAGS[name + 'Item2Open'] else: itemopen = TAGS[name + 'ItemOpen'] itemclose = TAGS[name + 'ItemClose'] itemsep = '' # Apply rules if rules['tagnotindentable']: tagindent = '' if not rules['keeplistindent']: indent = tagindent = '' # RTF encoding depth itemopen = self._apply_depth(itemopen, self.depth) itemopen = self._apply_list_level(itemopen, self.depth) # ItemLine: number of leading chars identifies list depth if listline: if rules['listlineafteropen']: itemopen = itemopen + listline * self.depth else: itemopen = listline * self.depth + itemopen # Adds trailing space on opening tags if (name == 'list' and rules['spacedlistitemopen']) or \ (name == 'numlist' and rules['spacednumlistitemopen']): itemopen = itemopen + ' ' # Remove two-blanks from list ending mark, to avoid
items[-1] = self._remove_twoblanks(items[-1]) # Blank line before? if self._should_add_blank_line('before', name): result.append('') if rules['blanksaroundnestedlist']: result.append('') # Tag each list item (multiline items), store in listbody itemopenorig = itemopen listbody = [] widelist = 0 if CONF['slides']: width = CONF['width'] - 6 else: width = CONF['width'] for item in items: # Add "manual" item count for noautonum targets itemcount += 1 if name == 'numlist' and not rules['autonumberlist']: n = str(itemcount) itemopen = regex['x'].sub(n, itemopenorig) del n # Tag it item[0] = self._last_escapes(item[0]) if name == 'deflist': z, term, rest = item[0].split(SEPARATOR, 2) item[0] = rest if not item[0]: del item[0] # to avoid
listbody.append(tagindent + itemopen + term + itemsep) else: fullitem = tagindent + itemopen if TARGET == 'aat': listbody.extend(aa_quote(item[0].replace(SEPARATOR, ''), tagindent, itemopen, width, 1, CONF['web'], True)) else: listbody.append(item[0].replace(SEPARATOR, fullitem)) del item[0] itemisclosed = False # Process next lines for this item (if any) for line in item: if type(line) == type([]): # sublist inside if rules['listitemnotnested'] and itemclose: listbody.append(tagindent + itemclose) itemisclosed = True if TARGET == 'rst' and name == 'deflist': del line[0] listbody.extend(line) else: line = self._last_escapes(line) # Blank lines turns to
if not line and rules['parainsidelist']: line = indent + TAGS['paragraphOpen'] + TAGS['paragraphClose'] line = line.rstrip() widelist = 1 elif not line and TARGET == 'rtf': listbody.append(TAGS['paragraphClose']) line = TAGS['paragraphOpen'] line = self._apply_depth(line, self.depth) # Some targets don't like identation here (wiki) if not rules['keeplistindent'] or (name == 'deflist' and rules['deflisttextstrip']): line = line.lstrip() # Maybe we have a line prefix to add? (wiki) if name == 'deflist' and TAGS['deflistItem2LinePrefix']: line = TAGS['deflistItem2LinePrefix'] + line if TARGET == 'aat': indent = ' ' * (len(line) - len(line.lstrip())) listbody.extend(aa_quote(line.lstrip(), indent, '', width, 1, CONF['web'])) else: listbody.append(line) # Close item (if needed) if itemclose and not itemisclosed: if rules['notbreaklistitemclose']: listbody[-1] += itemclose else: listbody.append(tagindent + itemclose) if not widelist and rules['compactlist']: listopen = TAGS.get(name + 'OpenCompact') listclose = TAGS.get(name + 'CloseCompact') else: listopen = TAGS.get(name + 'Open') listclose = TAGS.get(name + 'Close') # Open list (not nestable lists are only opened at mother) if listopen and not \ (rules['listnotnested'] and BLOCK.depth != 1): result.append(tagindent + listopen) result.extend(listbody) # Close list (not nestable lists are only closed at mother) if listclose and not \ (rules['listnotnested'] and self.depth != 1): result.append(tagindent + listclose) # Blank line after? if self._should_add_blank_line('after', name): result.append('') if rules['blanksaroundnestedlist']: if result[-1]: result.append('') return result ############################################################################## class MacroMaster: def __init__(self, config={}): self.name = '' self.config = config or CONF self.infile = self.config['sourcefile'] self.outfile = self.config['outfile'] self.currentfile = self.config['currentsourcefile'] self.currdate = time.gmtime(int(os.environ.get('SOURCE_DATE_EPOCH', time.time()))) self.rgx = regex.get('macros') or getRegexes()['macros'] self.fileinfo = {'infile': None, 'outfile': None} self.dft_fmt = MACROS def walk_file_format(self, fmt): "Walks the %%{in/out}file format string, expanding the % flags" i = 0 ret = '' while i < len(fmt): # char by char c = fmt[i] i += 1 if c == '%': # hot char! if i == len(fmt): # % at the end ret = ret + c break c = fmt[i] # read next i += 1 ret = ret + self.expand_file_flag(c) else: ret = ret + c # common char return ret def expand_file_flag(self, flag): "%f: filename %F: filename (w/o extension)" "%d: dirname %D: dirname (only parent dir)" "%p: file path %e: extension" info = self.fileinfo[self.name] # get dict if flag == '%': x = '%' # %% -> % elif flag == 'f': x = info['name'] elif flag == 'F': x = os.path.splitext(info['name'])[0] elif flag == 'd': x = info['dir'] elif flag == 'D': x = os.path.split(info['dir'])[-1] elif flag == 'p': x = info['path'] elif flag == 'e': x = os.path.splitext(info['name'])[1].replace('.', '') else: x = '%' + flag # false alarm return x def set_file_info(self, macroname): if (macroname == 'currentfile'): self.currentfile = self.config['currentsourcefile'] else: if self.fileinfo.get(macroname): # already done return file_ = getattr(self, self.name) # self.infile if file_ == STDOUT or file_ == MODULEOUT: dir_ = '' path = name = file_ else: path = os.path.abspath(file_) dir_ = os.path.dirname(path) name = os.path.basename(path) self.fileinfo[macroname] = {'path': path, 'dir': dir_, 'name': name} def expand(self, line=''): if CONF.get('encoding') and CONF.get('encoding').lower() == 'utf-8' and not isinstance(line, unicode): line = line.decode('utf-8') "Expand all macros found on the line" while self.rgx.search(line): m = self.rgx.search(line) name = self.name = m.group('name').lower() fmt = m.group('fmt') or self.dft_fmt.get(name) if name == 'date': txt = time.strftime(fmt, self.currdate) elif name == 'mtime': if self.infile in (STDIN, MODULEIN): fdate = self.currdate elif PathMaster().is_url(self.infile): try: # Doing it the easy way: fetching the URL again. # The right way would be doing it in Readfile(). # But I'm trying to avoid yet another global var # or fake 'sourcefile_mtime' config. # # >>> f= urllib.urlopen('http://txt2tags.org/index.t2t') # >>> f.info().get('last-modified') # 'Thu, 18 Nov 2010 22:42:11 GMT' # >>> # from urllib import urlopen from email.Utils import parsedate f = urlopen(self.infile) mtime_rfc2822 = f.info().get('last-modified') fdate = parsedate(mtime_rfc2822) except: # If mtime cannot be found, defaults to current date fdate = self.currdate else: mtime = os.path.getmtime(self.infile) fdate = time.gmtime(mtime) txt = time.strftime(fmt, fdate) elif name in ('infile', 'outfile', 'currentfile'): self.set_file_info(name) txt = self.walk_file_format(fmt) elif name == 'appurl': txt = my_url elif name == 'appname': txt = my_name elif name == 'appversion': txt = my_version elif name == 'target': txt = TARGET elif name == 'encoding': txt = self.config['encoding'] elif name == 'cmdline': txt = '%s %s' % (my_name, ' '.join(self.config['realcmdline'])) elif name in ('header1', 'header2', 'header3'): txt = self.config[name] elif name == 'cc': txt = cc_formatter(self.config, fmt) else: # Never reached because the macro regex list the valid keys Error("Unknown macro name '%s'" % name) line = self.rgx.sub(txt, line, 1) return line ############################################################################## def cc_formatter(conf, size): cc, target = conf['cc'].lower(), conf['target'] licenses = 'by, by-sa, by-nc-sa, by-nd, by-nc-nd, by-nc' if cc not in licenses.split(', '): Error(_('Please, choose one of the six valid Creative Commons licenses : %s.') % licenses) if target in ('html', 'xhtml', 'xhtmls', 'html5') or (target == 'aat' and conf['web']): if size == 'small': end_img = '/3.0/80x15.png' else: end_img = '/3.0/88x31.png' url = 'http://creativecommons.org/licenses/' + cc + '/3.0' img = 'http://i.creativecommons.org/l/' + cc + end_img alt = 'Creative Commons ' + cc ret = '' else: if size == 'small': ret = 'Creative Commons %s' % cc else: ret = 'Creative Commons %s' % cc.upper() return ret def listTargets(): """list all available targets""" for typ in TARGET_TYPES: targets = list(TARGET_TYPES[typ][1]) targets.sort() print print TARGET_TYPES[typ][0] + ':' for target in targets: print "\t%s\t%s" % (target, TARGET_NAMES.get(target)) if OTHER_TARGETS: print print _('OTHERS:') for target in OTHER_TARGETS: print "\t%s\t%s" % (target, TARGET_NAMES.get(target)) print if NOT_LOADED: print _('Targets %s from the targets directory not loaded, because there is already targets with the same name in txt2tags core.') % ', '.join(NOT_LOADED) print def dumpConfig(source_raw, parsed_config): onoff = {1: _('ON'), 0: _('OFF')} data = [ (_('RC file') , RC_RAW ), (_('source document'), source_raw ), (_('command line') , CMDLINE_RAW) ] # First show all RAW data found for label, cfg in data: print _('RAW config for %s') % label for target, key, val in cfg: target = '(%s)' % target key = dotted_spaces("%-14s" % key) val = val or _('ON') print ' %-8s %s: %s' % (target, key, val) print # Then the parsed results of all of them print _('Full PARSED config') keys = parsed_config.keys() keys.sort() # sorted for key in keys: val = parsed_config[key] # Filters are the last if key in ['preproc', 'postproc', 'postvoodoo']: continue # Flag beautifier if key in FLAGS or key in ACTIONS: val = onoff.get(val) or val # List beautifier if type(val) == type([]): if key == 'options': sep = ' ' else: sep = ', ' val = sep.join(val) print "%25s: %s" % (dotted_spaces("%-14s" % key), val) print print _('Active filters') for filter_ in ['preproc', 'postproc', 'postvoodoo']: for rule in parsed_config.get(filter_) or []: print "%25s: %s -> %s" % ( dotted_spaces("%-14s" % filter_), rule[0], rule[1]) def get_file_body(file_): "Returns all the document BODY lines" return process_source_file(file_, noconf=1)[1][2] def post_voodoo(lines, config): r''' %!postvoodoo handler - Beware! Voodoo here. For advanced users only. Your entire output document will be put in a single string, to your search/replace pleasure. Line breaks are single \n's in all platforms. You can change multiple lines at once, or even delete them. This is the last txt2tags processing in your file. All %!postproc's were already applied. It's the same as: $ txt2tags myfile.t2t | postvoodoo Your regex will be compiled with no modifiers. The default behavior is: ^ and $ match begin/end of entire string . doesn't match \n \w is not locale aware \w is not Unicode aware You can use (?...) in the beginning of your regex to change behavior: (?s) the dot . will match \n, so .* will get everything (?m) the ^ and $ match begin/end of EACH inner line (?u) the \w, \d, \s and friends will be Unicode aware You can also use (?smu) or any combination of those. Learn more in http://docs.python.org/library/re.html ''' loser1 = _('No, no. Your PostVoodoo regex is wrong. Maybe you should call mommy?') loser2 = _('Dear PostVoodoo apprentice: You got the regex right, but messed the replacement') subject = '\n'.join(lines) spells = compile_filters(config['postvoodoo'], loser1) for (magic, words) in spells: try: subject = magic.sub(words, subject) except: Error("%s: '%s'" % (loser2, words)) return subject.split('\n') def finish_him(outlist, config): "Writing output to screen or file" outfile = config['outfile'] outlist = unmaskEscapeChar(outlist) outlist = expandLineBreaks(outlist) # Apply PostProc filters if config['postproc']: filters = compile_filters(config['postproc'], _('Invalid PostProc filter regex')) postoutlist = [] errmsg = _('Invalid PostProc filter replacement') for line in outlist: for rgx, repl in filters: try: line = rgx.sub(repl, line) except: Error("%s: '%s'" % (errmsg, repl)) postoutlist.append(line) outlist = postoutlist[:] if config['postvoodoo']: outlist = post_voodoo(outlist, config) if MAILING and not rules['tableonly']: reader = MAILING repl_dict = {} for i, val in enumerate(reader): if i == 0: for j, el in enumerate(val): repl_dict[el] = j else: write_file = outfile for key in repl_dict: write_file = write_file.replace('<<%s>>' % key, val[repl_dict[key]]) point = write_file.rfind('.') if write_file == outfile or write_file in file_dict: if point == -1: write_file = write_file + '_' + str(i) else: write_file = write_file[:point] + '_' + str(i) + write_file[point:] newout = [] for line in outlist: for key in repl_dict: line = line.replace('<<%s>>' % key, val[repl_dict[key]]) newout.append(line) file_dict[write_file] = newout elif config['target'] not in ['csv', 'csvs']: file_dict[outfile] = outlist if config['target'] == 'db': DBC.close() if outfile in [MODULEOUT, STDOUT]: outlist = [open(config['outfile']).read()] os.remove(config['outfile']) outlist = [] if outfile == MODULEOUT: for write_file in file_dict: outlist.append(file_dict[write_file]) return outlist elif outfile == STDOUT: for write_file in file_dict: outlist.extend(file_dict[write_file]) if GUI: return outlist, config else: for line in outlist: if isinstance(line, unicode): line = line.encode('utf-8') print line else: if not config['target'] == 'db': for write_file in file_dict: Savefile(write_file, addLineBreaks(file_dict[write_file])) if not GUI and not QUIET: for write_file in file_dict: print _('%s wrote %s') % (my_name, write_file) if config['split']: if not QUIET: print "--- html..." sgml2html = 'sgml2html -s %s -l %s %s' % ( config['split'], config['lang'] or lang, outfile) if not QUIET: print "Running system command:", sgml2html os.system(sgml2html) def toc_inside_body(body, toc, config): ret = [] if AUTOTOC: return body # nothing to expand toc_mark = MaskMaster().tocmask # Expand toc mark with TOC contents flag, n = False, 0 for i,line in enumerate(body): if line.count(toc_mark): # toc mark found if config['toc']: if config['target'] == 'aat' and config['slides']: j = i % config['height'] title = body[i - j + 2 + n] ret.extend([''] * (config['height'] - j - 1 + n)) ret.extend([aa_line(AA['bar1'], config['width'])] + toc + aa_slide(title, AA['bar2'], config['width'], CONF['web']) + ['']) flag = True else: ret.extend(toc) # include if --toc else: pass # or remove %%toc line else: if flag and config['target'] == 'aat' and config['slides'] and body[i] == body[i + 4] == aa_line(AA['bar2'], config['width']): end = [ret[-1]] del ret[-1] ret.extend([''] * (j - 6 - n) + end) flag, n = False, n + 1 ret.append(line) # common line else: ret.append(line) # common line return ret def toc_tagger(toc, config): "Returns the tagged TOC, as a single tag or a tagged list" ret = [] # Convert the TOC list (t2t-marked) to the target's list format if config['toc-only'] or (config['toc'] and not TAGS['TOC']): fakeconf = config.copy() fakeconf['headers'] = 0 fakeconf['toc-only'] = 0 fakeconf['mask-email'] = 0 fakeconf['preproc'] = [] fakeconf['postproc'] = [] fakeconf['postvoodoo'] = [] fakeconf['css-sugar'] = 0 fakeconf['fix-path'] = 0 fakeconf['art-no-title'] = 1 # needed for --toc and --slides together, avoids slide title before TOC ret, foo = convert(toc, fakeconf) set_global_config(config) # restore config # Our TOC list is not needed, the target already knows how to do a TOC elif config['toc'] and TAGS['TOC']: ret = [TAGS['TOC']] return ret def toc_formatter(toc, config): "Formats TOC for automatic placement between headers and body" if config['toc-only']: return toc # no formatting needed if not config['toc']: return [] # TOC disabled ret = toc # Art: An automatic "Table of Contents" header is added to the TOC slide if config['target'] == 'aat' and config['slides']: n = config['height'] - (len(toc) + 6) % config['height'] toc = aa_slide(config['toc-title'] or _("Table of Contents"), AA['bar2'], config['width'], CONF['web']) + toc + ([''] * n) end_toc = aa_line(AA['bar2'], config['width']) if config['web']: end_toc = end_toc + '' toc.append(end_toc) return toc if config['target'] == 'aat' and not config['slides']: ret = aa_box([config['toc-title'] or _("Table of Contents")], AA, config['width']) + toc # TOC open/close tags (if any) if TAGS['tocOpen']: ret.insert(0, TAGS['tocOpen']) if TAGS['tocClose']: ret.append(TAGS['tocClose']) # Autotoc specific formatting if AUTOTOC: if rules['autotocwithbars']: # TOC between bars para = TAGS['paragraphOpen'] + TAGS['paragraphClose'] bar = regex['x'].sub('-' * DFT_TEXT_WIDTH, TAGS['bar1']) tocbar = [para, bar, para] if config['target'] == 'aat' and config['headers']: # exception: header already printed a bar ret = [para] + ret + tocbar else: ret = tocbar + ret + tocbar if rules['blankendautotoc']: # blank line after TOC ret.append('') if rules['autotocnewpagebefore']: # page break before TOC ret.insert(0, TAGS['pageBreak']) if rules['autotocnewpageafter']: # page break after TOC ret.append(TAGS['pageBreak']) return ret # XXX change function name. Now it's called at the end of the execution, dumping the full template. def doHeader(headers, config): if not config['headers']: return config['fullBody'] if not headers: empty_headers = True headers = ['', '', ''] else: empty_headers = False target = config['target'] if target not in HEADER_TEMPLATE: Error("doHeader: Unknown target '%s'" % target) # Use default templates if config['template'] == '' : if target in ('html', 'xhtml', 'xhtmls', 'html5') and config.get('css-sugar'): template = HEADER_TEMPLATE[target + 'css'].split('\n') else: template = HEADER_TEMPLATE[target].split('\n') template.append('%(BODY)s') if TAGS['EOD']: template.append(TAGS['EOD'].replace('%', '%%')) # escape % chars # Read user's template file else: if PathMaster().is_url(config['template']): template = Readfile(config['template'], remove_linebreaks=1) else: templatefile = '' names = [config['template'] + '.' + target, config['template']] for filename in names: if os.path.isfile(filename): templatefile = filename break if not templatefile: Error(_("Cannot find template file:") + ' ' + config['template']) template = Readfile(templatefile, remove_linebreaks=1) head_data = {'STYLE': [], 'ENCODING': ''} # Fix CSS files path config['stylepath_out'] = fix_css_out_path(config) # Populate head_data with config info for key in head_data.keys(): val = config.get(key.lower()) if key == 'STYLE' and 'html' in target: val = config.get('stylepath_out') or [] # Remove .sty extension from each style filename (freaking tex) # XXX Can't handle --style foo.sty, bar.sty if target in ['tex', 'texs'] and key == 'STYLE': val = map(lambda x: re.sub('(?i)\.sty$', '', x), val) if key == 'ENCODING': val = get_encoding_string(val, target) head_data[key] = val # Parse header contents for i in 0, 1, 2: # Expand macros contents = MacroMaster(config=config).expand(headers[i]) # Escapes - on tex, just do it if any \tag{} present if target not in ['tex', 'texs'] or \ (target in ['tex', 'texs'] and re.search(r'\\\w+{', contents)): contents = doEscape(target, contents) if target == 'lout': contents = doFinalEscape(target, contents) head_data['HEADER%d' % (i + 1)] = contents # When using --css-inside, the template's