""" MediaWiki-style markup parse(text) -- returns safe-html from wiki markup code based off of mediawiki """ import re, random, math, locale from base64 import b64encode, b64decode from trac.core import * from trac.wiki.api import IWikiMacroProvider class MediaWikiRenderer(Component): """ Renders plain text in MediaWiki format as HTML """ implements(IWikiMacroProvider) def get_macros(self): """Return a list of provided macros""" yield 'mediawiki' def get_macro_description(self, name): return '''desc''' def expand_macro(self, formatter, name, content): if name == 'mediawiki': return parse(content) if __name__ == "__main__": import sys try: file = open(sys.argv[1]) print parse(file.read()) except: pass mTagHooks = {} def registerTagHook(tag, function): mTagHooks[tag] = function def removeHtmlComments(text): """remove comments from given text""" sb = [] start = text.find(u'', start) if end == -1: break end += 3 spaceStart = max(0, start-1) spaceEnd = end while text[spaceStart] == u' ' and spaceStart > 0: spaceStart -= 1 while text[spaceEnd] == u' ': spaceEnd += 1 if text[spaceStart] == u'\n' and text[spaceEnd] == u'\n': sb.append(text[last:spaceStart]) sb.append(u'\n') last = spaceEnd+1 else: sb.append(text[last:spaceStart+1]) last = spaceEnd start = text.find(u'") showToc = True return text, showToc _bracketedLinkPat = re.compile(ur'(?:\[((?:https?://|ftp://|/)[^<>\]\[' + u"\x00-\x20\x7f" + ur']+)\s*(.*?)\])', re.UNICODE) def replaceExternalLinks(text): sb = [] bits = _bracketedLinkPat.split(text) l = len(bits) i = 0 num_links = 0 while i < l: if i%3 == 0: sb.append(replaceFreeExternalLinks(bits[i])) i += 1 else: sb.append(u'') if not bits[i+1]: sb.append(u'[') num_links += 1 sb.append(to_unicode(num_links)) sb.append(u']') else: sb.append(bits[i+1]) sb.append(u'') i += 2 return ''.join(sb) _protocolPat = re.compile(ur'(\b(?:https?://|ftp://))', re.UNICODE) _specialUrlPat = re.compile(ur'^([^<>\]\[' + u"\x00-\x20\x7f" + ur']+)(.*)$', re.UNICODE) _protocolsPat = re.compile(ur'^(https?://|ftp://)$', re.UNICODE) def replaceFreeExternalLinks(text): bits = _protocolPat.split(text) sb = [bits.pop(0)] i = 0 l = len(bits) while i < l: protocol = bits[i] remainder = bits[i+1] i += 2 match = _specialUrlPat.match(remainder) if match: # Found some characters after the protocol that look promising url = protocol + match.group(1) trail = match.group(2) # special case: handle urls as url args: # http://www.example.com/foo?=http://www.example.com/bar if len(trail) == 0 and len(bits) > i and _protocolsPat.match(bits[i]): match = _specialUrlPat.match(remainder) if match: url += bits[i] + match.groups(1) i += 2 trail = m[2] # The characters '<' and '>' (which were escaped by # removeHTMLtags()) should not be included in # URLs, per RFC 2396. pos = max(url.find('<'), url.find('>')) if pos != -1: trail = url[pos:] + trail url = url[0:pos] sep = ',;.:!?' if '(' not in url: sep += ')' i = len(url)-1 while i >= 0: char = url[i] if char not in sep: break i -= 1 i += 1 if i != len(url): trail = url[i:] + trail url = url[0:i] url = cleanURL(url) sb.append(u'') sb.append(url) sb.append(u'') sb.append(text) sb.append(trail) else: sb.append(protocol) sb.append(remainder) return ''.join(sb) def urlencode(char): num = ord(char) if num == 32: return '+' return "%%%02x" % num _controlCharsPat = re.compile(ur'[\]\[<>"' + u"\\x00-\\x20\\x7F" + ur']]', re.UNICODE) _hostnamePat = re.compile(ur'^([^:]+:)(//[^/]+)?(.*)$', re.UNICODE) _stripPat = re.compile(u'\\s|\u00ad|\u1806|\u200b|\u2060|\ufeff|\u03f4|\u034f|\u180b|\u180c|\u180d|\u200c|\u200d|[\ufe00-\ufe0f]', re.UNICODE) def cleanURL(url): # Normalize any HTML entities in input. They will be # re-escaped by makeExternalLink(). url = decodeCharReferences(url) # Escape any control characters introduced by the above step url = _controlCharsPat.sub(urlencode, url) # Validate hostname portion match = _hostnamePat.match(url) if match: protocol, host, rest = match.groups() # Characters that will be ignored in IDNs. # http://tools.ietf.org/html/3454#section-3.1 # Strip them before further processing so blacklists and such work. _stripPat.sub('', host) # @fixme: validate hostnames here return protocol + host + rest else: return url _zomgPat = re.compile(ur'^(:*)\{\|(.*)$', re.UNICODE) def doTableStuff(text, state): t = text.split(u"\n") td = [] # Is currently a td tag open? ltd = [] # Was it TD or TH? tr = [] # Is currently a tr tag open? ltr = [] # tr attributes has_opened_tr = [] # Did this table open a