| 1 | # -*- coding: utf-8 -*- |
|---|
| 2 | """ |
|---|
| 3 | Copyright (c) 2010 by Martin Scharrer <martin@scharrer-online.de> |
|---|
| 4 | """ |
|---|
| 5 | |
|---|
| 6 | import StringIO |
|---|
| 7 | from gzip import GzipFile |
|---|
| 8 | |
|---|
| 9 | from trac.config import Option, ListOption, BoolOption, IntOption |
|---|
| 10 | from trac.core import Component, implements |
|---|
| 11 | from trac.util.datefmt import format_datetime |
|---|
| 12 | from trac.util.html import html as tag |
|---|
| 13 | from trac.web.api import IRequestHandler, RequestDone |
|---|
| 14 | |
|---|
| 15 | |
|---|
| 16 | class GoogleSitemapPlugin(Component): |
|---|
| 17 | """Generates a Google compatible sitemap with all wiki pages and/or |
|---|
| 18 | tickets. |
|---|
| 19 | |
|---|
| 20 | The sitemap can be compressed with the `compress_sitemap` option. |
|---|
| 21 | In this case the XML file can be sent compressed in two different |
|---|
| 22 | ways: |
|---|
| 23 | * If the XML file (.xml) is requested it will be send with a |
|---|
| 24 | gzip `content-encoding` if the requesting HTTP client supports |
|---|
| 25 | it, i.e. sent a `accept-encoding` header with either includes |
|---|
| 26 | '`gzip`' or indentical to '`*`'. |
|---|
| 27 | * If a gzipped XML file is requested (.xml.gz) directly the |
|---|
| 28 | compressed sitemap will be sent as gzip file (mime-type |
|---|
| 29 | `application/x-gzip`). This is also done if the `sitemappath` |
|---|
| 30 | ends in '`.gz`'. |
|---|
| 31 | """ |
|---|
| 32 | implements(IRequestHandler) |
|---|
| 33 | |
|---|
| 34 | sitemappath = Option( |
|---|
| 35 | 'googlesitemap', 'sitemappath', 'sitemap.xml', |
|---|
| 36 | """Path of sitemap relative to Trac main URL (default: "sitemap.xml"). |
|---|
| 37 | If this path ends in `.gz` the sidemap will automatically be |
|---|
| 38 | compressed. |
|---|
| 39 | """) |
|---|
| 40 | |
|---|
| 41 | ignoreusers = ListOption( |
|---|
| 42 | 'googlesitemap', 'ignore_users', 'trac', |
|---|
| 43 | doc="""Do not list wiki pages from this users (default: "trac")""") |
|---|
| 44 | |
|---|
| 45 | ignorewikis = ListOption( |
|---|
| 46 | 'googlesitemap', 'ignore_wikis', '', |
|---|
| 47 | doc="List of wiki pages to not be included in sitemap") |
|---|
| 48 | |
|---|
| 49 | listrealms = ListOption( |
|---|
| 50 | 'googlesitemap', 'list_realms', 'wiki,ticket', |
|---|
| 51 | doc="""Which realms should be listed. Supported are "wiki" |
|---|
| 52 | and "ticket". |
|---|
| 53 | """) |
|---|
| 54 | |
|---|
| 55 | compress_sitemap = BoolOption( |
|---|
| 56 | 'googlesitemap', 'compress_sitemap', False, |
|---|
| 57 | doc="Send sitemap compressed. Useful for larger sitemaps.") |
|---|
| 58 | |
|---|
| 59 | compression_level = IntOption( |
|---|
| 60 | 'googlesitemap', 'compression_level', 6, |
|---|
| 61 | doc="Compression level. Value range: 1 (low) to 9 (high). Default: 6") |
|---|
| 62 | |
|---|
| 63 | changefreq = Option( |
|---|
| 64 | 'googlesitemap', 'change_frequency', '', |
|---|
| 65 | """Change frequency of URLs. Valid values: always, hourly, daily, |
|---|
| 66 | weekly, monthly, yearly, never. Disabled if empty. |
|---|
| 67 | """) |
|---|
| 68 | |
|---|
| 69 | _urlset_attrs = { |
|---|
| 70 | 'xmlns': "http://www.sitemaps.org/schemas/sitemap/0.9", |
|---|
| 71 | 'xmlns:xsi': "http://www.w3.org/2001/XMLSchema-instance", |
|---|
| 72 | 'xsi:schemaLocation': "http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" |
|---|
| 73 | } |
|---|
| 74 | |
|---|
| 75 | def _get_sql_exclude(self, list): |
|---|
| 76 | import re |
|---|
| 77 | if not list: |
|---|
| 78 | return '' |
|---|
| 79 | star = re.compile(r'(?<!\\)\*') |
|---|
| 80 | ques = re.compile(r'(?<!\\)\?') |
|---|
| 81 | sql_excludelist = [] |
|---|
| 82 | sql_excludepattern = '' |
|---|
| 83 | for pattern in list: |
|---|
| 84 | pattern = pattern.replace('%', r'\%').replace('_', r'\_') |
|---|
| 85 | npattern = star.sub('%', pattern) |
|---|
| 86 | npattern = ques.sub('_', npattern) |
|---|
| 87 | if pattern == npattern: |
|---|
| 88 | sql_excludelist.append(pattern) |
|---|
| 89 | else: |
|---|
| 90 | sql_excludepattern = sql_excludepattern + \ |
|---|
| 91 | " AND name NOT LIKE '%s' " % npattern |
|---|
| 92 | sql_excludename = " AND name NOT in ('%s') " \ |
|---|
| 93 | % "','".join(sql_excludelist) |
|---|
| 94 | return sql_excludename + sql_excludepattern |
|---|
| 95 | |
|---|
| 96 | # IRequestHandler methods |
|---|
| 97 | |
|---|
| 98 | def match_request(self, req): |
|---|
| 99 | path = '/' + self.sitemappath |
|---|
| 100 | return req.path_info == path or \ |
|---|
| 101 | (self.compress_sitemap and req.path_info == path + '.gz') |
|---|
| 102 | |
|---|
| 103 | def _fixtime(self, timestring): |
|---|
| 104 | """Ensure that the timestring has a colon between hours and minute""" |
|---|
| 105 | if not timestring.endswith('Z') and timestring[-3] != ':': |
|---|
| 106 | return timestring[:-2] + ':' + timestring[-2:] |
|---|
| 107 | else: |
|---|
| 108 | return timestring |
|---|
| 109 | |
|---|
| 110 | def process_request(self, req): |
|---|
| 111 | try: |
|---|
| 112 | db = self.env.get_db_cnx() |
|---|
| 113 | cursor = db.cursor() |
|---|
| 114 | |
|---|
| 115 | if 'wiki' in self.listrealms: |
|---|
| 116 | sql_exclude = self._get_sql_exclude(self.ignorewikis) |
|---|
| 117 | |
|---|
| 118 | sql = "SELECT name,time,version FROM wiki AS w1 WHERE " \ |
|---|
| 119 | "author NOT IN ('%s') " % \ |
|---|
| 120 | "','".join(self.ignoreusers) + sql_exclude + \ |
|---|
| 121 | "AND version=(SELECT MAX(version) FROM wiki AS w2 " \ |
|---|
| 122 | "WHERE w1.name=w2.name) ORDER BY name" |
|---|
| 123 | # self.log.debug(sql) |
|---|
| 124 | cursor.execute(sql) |
|---|
| 125 | urls = [tag.url( |
|---|
| 126 | tag.loc(self.env.abs_href.wiki(name)), |
|---|
| 127 | tag.lastmod(self._fixtime( |
|---|
| 128 | format_datetime(time, 'iso8601'))), |
|---|
| 129 | self.changefreq and tag.changefreq(self.changefreq) or '' |
|---|
| 130 | ) for n, [name, time, version] in enumerate(cursor)] |
|---|
| 131 | else: |
|---|
| 132 | urls = [] |
|---|
| 133 | |
|---|
| 134 | if 'ticket' in self.listrealms: |
|---|
| 135 | cursor.execute( |
|---|
| 136 | "SELECT id,changetime FROM ticket" |
|---|
| 137 | ) |
|---|
| 138 | urls.append([tag.url( |
|---|
| 139 | tag.loc(req.base_url + req.href.ticket(ticketid)), |
|---|
| 140 | tag.lastmod(self._fixtime( |
|---|
| 141 | format_datetime(changetime, 'iso8601'))) |
|---|
| 142 | ) for n, [ticketid, changetime] in enumerate(cursor)]) |
|---|
| 143 | |
|---|
| 144 | xml = tag.urlset(urls, **self._urlset_attrs) |
|---|
| 145 | content = xml.generate().render('xml', 'utf-8') |
|---|
| 146 | |
|---|
| 147 | accept_enc = req.get_header('accept-encoding') |
|---|
| 148 | accept_gzip = accept_enc and ( |
|---|
| 149 | accept_enc.find('gzip') != -1 or accept_enc == '*') |
|---|
| 150 | compressed = self.sitemappath.endswith('.gz') or \ |
|---|
| 151 | req.path_info == '/' + self.sitemappath + '.gz' |
|---|
| 152 | if compressed or (self.compress_sitemap and accept_gzip): |
|---|
| 153 | gzbuf = StringIO.StringIO() |
|---|
| 154 | gzfile = GzipFile(mode='wb', fileobj=gzbuf, |
|---|
| 155 | compresslevel=self.compression_level) |
|---|
| 156 | gzfile.write(content) |
|---|
| 157 | gzfile.close() |
|---|
| 158 | zcontent = gzbuf.getvalue() |
|---|
| 159 | gzbuf.close() |
|---|
| 160 | |
|---|
| 161 | req.send_response(200) |
|---|
| 162 | req.send_header('Cache-control', 'must-revalidate') |
|---|
| 163 | if compressed: |
|---|
| 164 | req.send_header('Content-Type', 'application/x-gzip') |
|---|
| 165 | else: |
|---|
| 166 | req.send_header('Content-Type', 'text/xml;charset=utf-8') |
|---|
| 167 | req.send_header('Content-Encoding', 'gzip') |
|---|
| 168 | req.send_header('Content-Length', len(zcontent)) |
|---|
| 169 | req.end_headers() |
|---|
| 170 | |
|---|
| 171 | if req.method != 'HEAD': |
|---|
| 172 | req.write(zcontent) |
|---|
| 173 | raise RequestDone |
|---|
| 174 | else: |
|---|
| 175 | req.send(content, content_type='text/xml', status=200) |
|---|
| 176 | |
|---|
| 177 | except RequestDone: |
|---|
| 178 | pass |
|---|
| 179 | except Exception, e: |
|---|
| 180 | self.log.error(e) |
|---|
| 181 | req.send_response(500) |
|---|
| 182 | req.end_headers() |
|---|
| 183 | raise RequestDone |
|---|