| 1 | #!/usr/pkg/bin/python2.5 |
|---|
| 2 | |
|---|
| 3 | # drupal2fullblog uses Html2Wiki here. |
|---|
| 4 | # find my code below |
|---|
| 5 | |
|---|
| 6 | # Copyright (C) 2006 Samuel Abels, http://debain.org |
|---|
| 7 | # |
|---|
| 8 | # This program is free software; you can redistribute it and/or modify |
|---|
| 9 | # it under the terms of the GNU General Public License version 2, as |
|---|
| 10 | # published by the Free Software Foundation. |
|---|
| 11 | # |
|---|
| 12 | # This program is distributed in the hope that it will be useful, |
|---|
| 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 15 | # GNU General Public License for more details. |
|---|
| 16 | # |
|---|
| 17 | # You should have received a copy of the GNU General Public License |
|---|
| 18 | # along with this program; if not, write to the Free Software |
|---|
| 19 | # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|---|
| 20 | |
|---|
| 21 | import HTMLParser, re, sys |
|---|
| 22 | |
|---|
| 23 | class Cell: |
|---|
| 24 | data = '' |
|---|
| 25 | colspan = 1 |
|---|
| 26 | rowspan = 1 |
|---|
| 27 | |
|---|
| 28 | class Html2Wiki(HTMLParser.HTMLParser): |
|---|
| 29 | def __init__(self): |
|---|
| 30 | HTMLParser.HTMLParser.__init__(self) |
|---|
| 31 | self.wiki = '' |
|---|
| 32 | self.buffer = '' |
|---|
| 33 | self.indent = 0 |
|---|
| 34 | self.linebreak = '\n' |
|---|
| 35 | self.rows = [] |
|---|
| 36 | self.cells = [] |
|---|
| 37 | self.in_table = False |
|---|
| 38 | self.in_td = False |
|---|
| 39 | self.in_heading = False |
|---|
| 40 | self.in_ul = False |
|---|
| 41 | self.in_ol = False |
|---|
| 42 | self.in_li = False |
|---|
| 43 | self.in_a = False |
|---|
| 44 | self.in_pre = False |
|---|
| 45 | self.last_href = '' |
|---|
| 46 | self.span_path = [] |
|---|
| 47 | |
|---|
| 48 | def __output(self, text, linebreak = True): |
|---|
| 49 | self.buffer += (' ' * self.indent * 2) |
|---|
| 50 | self.buffer += text + self.linebreak |
|---|
| 51 | |
|---|
| 52 | def __flush(self): |
|---|
| 53 | self.wiki += self.buffer |
|---|
| 54 | self.buffer = '' |
|---|
| 55 | |
|---|
| 56 | def handle_starttag(self, tag, attrs): |
|---|
| 57 | if tag == 'table': self.start_table() |
|---|
| 58 | elif tag == 'tr': self.start_tr() |
|---|
| 59 | elif tag == 'th': self.start_th(attrs) |
|---|
| 60 | elif tag == 'td': self.start_td(attrs) |
|---|
| 61 | elif tag == 'h1': self.start_h1() |
|---|
| 62 | elif tag == 'h2': self.start_h2() |
|---|
| 63 | elif tag == 'h3': self.start_h3() |
|---|
| 64 | elif tag == 'ul': self.start_ul() |
|---|
| 65 | elif tag == 'ol': self.start_ol() |
|---|
| 66 | elif tag == 'li': self.start_li() |
|---|
| 67 | elif tag == 'i': self.start_i() |
|---|
| 68 | elif tag == 'b': self.start_b() |
|---|
| 69 | elif tag == 'u': self.start_u() |
|---|
| 70 | elif tag == 'a': self.start_a(attrs) |
|---|
| 71 | elif tag == 'pre': self.start_pre() |
|---|
| 72 | elif tag == 'strike': self.start_strike() |
|---|
| 73 | elif tag == 'span': self.start_span(attrs) |
|---|
| 74 | elif tag == 'br': self.newline() |
|---|
| 75 | |
|---|
| 76 | def handle_endtag(self, tag): |
|---|
| 77 | if tag == 'table': self.end_table(); |
|---|
| 78 | elif tag == 'tr': self.end_tr() |
|---|
| 79 | elif tag == 'th': self.end_th() |
|---|
| 80 | elif tag == 'td': self.end_td() |
|---|
| 81 | elif tag == 'h1': self.end_h1() |
|---|
| 82 | elif tag == 'h2': self.end_h2() |
|---|
| 83 | elif tag == 'h3': self.end_h3() |
|---|
| 84 | elif tag == 'ul': self.end_ul() |
|---|
| 85 | elif tag == 'ol': self.end_ol() |
|---|
| 86 | elif tag == 'li': self.end_li() |
|---|
| 87 | elif tag == 'i': self.end_i() |
|---|
| 88 | elif tag == 'b': self.end_b() |
|---|
| 89 | elif tag == 'u': self.end_u() |
|---|
| 90 | elif tag == 'a': self.end_a() |
|---|
| 91 | elif tag == 'pre': self.end_pre() |
|---|
| 92 | elif tag == 'strike': self.end_strike() |
|---|
| 93 | elif tag == 'span': self.end_span() |
|---|
| 94 | |
|---|
| 95 | def start_h1(self): |
|---|
| 96 | self.buffer += '=' |
|---|
| 97 | |
|---|
| 98 | def end_h1(self): |
|---|
| 99 | self.buffer += '=\n\n' |
|---|
| 100 | |
|---|
| 101 | def start_h2(self): |
|---|
| 102 | self.buffer += '==' |
|---|
| 103 | |
|---|
| 104 | def end_h2(self): |
|---|
| 105 | self.buffer += '==\n\n' |
|---|
| 106 | |
|---|
| 107 | def start_h3(self): |
|---|
| 108 | self.buffer += '===' |
|---|
| 109 | |
|---|
| 110 | def end_h3(self): |
|---|
| 111 | self.buffer += '===\n\n' |
|---|
| 112 | |
|---|
| 113 | def start_ul(self): |
|---|
| 114 | self.in_ul = True |
|---|
| 115 | |
|---|
| 116 | def end_ul(self): |
|---|
| 117 | self.in_ul = False |
|---|
| 118 | |
|---|
| 119 | def start_ol(self): |
|---|
| 120 | self.in_ol = True |
|---|
| 121 | |
|---|
| 122 | def end_ol(self): |
|---|
| 123 | self.in_ol = False |
|---|
| 124 | |
|---|
| 125 | def start_li(self): |
|---|
| 126 | self.in_li = True |
|---|
| 127 | if self.in_ol: |
|---|
| 128 | self.buffer += '# ' |
|---|
| 129 | elif self.in_ul: |
|---|
| 130 | self.buffer += '* ' |
|---|
| 131 | |
|---|
| 132 | def end_li(self): |
|---|
| 133 | self.in_li = False |
|---|
| 134 | |
|---|
| 135 | def start_i(self): |
|---|
| 136 | self.buffer += '/' |
|---|
| 137 | |
|---|
| 138 | def end_i(self): |
|---|
| 139 | self.buffer += '/' |
|---|
| 140 | |
|---|
| 141 | def start_b(self): |
|---|
| 142 | self.buffer += '*' |
|---|
| 143 | |
|---|
| 144 | def end_b(self): |
|---|
| 145 | self.buffer += '*' |
|---|
| 146 | |
|---|
| 147 | def start_u(self): |
|---|
| 148 | self.buffer += '_' |
|---|
| 149 | |
|---|
| 150 | def end_u(self): |
|---|
| 151 | self.buffer += '_' |
|---|
| 152 | |
|---|
| 153 | def start_a(self, attrs): |
|---|
| 154 | self.in_a = True |
|---|
| 155 | self.last_href = '' |
|---|
| 156 | for key, value in attrs: |
|---|
| 157 | if key == 'href': |
|---|
| 158 | self.last_href = value |
|---|
| 159 | self.buffer += '[' + self.last_href |
|---|
| 160 | |
|---|
| 161 | def end_a(self): |
|---|
| 162 | self.in_a = False |
|---|
| 163 | self.buffer += ']' |
|---|
| 164 | |
|---|
| 165 | def start_pre(self): |
|---|
| 166 | self.in_pre = True |
|---|
| 167 | self.buffer += '#Text\n' |
|---|
| 168 | |
|---|
| 169 | def end_pre(self): |
|---|
| 170 | self.in_pre = False |
|---|
| 171 | self.buffer += '#End\n' |
|---|
| 172 | |
|---|
| 173 | def start_strike(self): |
|---|
| 174 | self.buffer += '-' |
|---|
| 175 | |
|---|
| 176 | def end_strike(self): |
|---|
| 177 | self.buffer += '-' |
|---|
| 178 | |
|---|
| 179 | def start_span(self, attrs): |
|---|
| 180 | cls = None |
|---|
| 181 | for key, value in attrs: |
|---|
| 182 | if key == 'class': |
|---|
| 183 | cls = value |
|---|
| 184 | if cls == 'underline': |
|---|
| 185 | char = '_' |
|---|
| 186 | elif cls == 'bold': |
|---|
| 187 | char = '*' |
|---|
| 188 | elif cls == 'italic': |
|---|
| 189 | char = '/' |
|---|
| 190 | else: |
|---|
| 191 | char = '' |
|---|
| 192 | self.buffer += char |
|---|
| 193 | self.span_path.append(char) |
|---|
| 194 | |
|---|
| 195 | def end_span(self): |
|---|
| 196 | self.buffer += self.span_path.pop() |
|---|
| 197 | |
|---|
| 198 | def start_table(self): |
|---|
| 199 | self.in_table = True |
|---|
| 200 | |
|---|
| 201 | def start_tr(self): |
|---|
| 202 | pass |
|---|
| 203 | |
|---|
| 204 | def start_th(self, attrs): |
|---|
| 205 | self.in_heading = True |
|---|
| 206 | self.start_td(attrs) |
|---|
| 207 | |
|---|
| 208 | def start_td(self, attrs): |
|---|
| 209 | self.__flush() |
|---|
| 210 | self.in_td = True |
|---|
| 211 | cell = Cell() |
|---|
| 212 | for key, value in attrs: |
|---|
| 213 | if key == 'rowspan': |
|---|
| 214 | cell.rowspan = int(value) |
|---|
| 215 | elif key == 'colspan': |
|---|
| 216 | cell.colspan = int(value) |
|---|
| 217 | self.cells.append(cell) |
|---|
| 218 | |
|---|
| 219 | |
|---|
| 220 | def handle_data(self, data): |
|---|
| 221 | if not self.in_pre: |
|---|
| 222 | data = data.replace('\n', '') |
|---|
| 223 | if self.in_a: |
|---|
| 224 | if data == self.last_href: |
|---|
| 225 | return |
|---|
| 226 | self.buffer += ' ' |
|---|
| 227 | if self.in_li: |
|---|
| 228 | self.buffer += data.strip() + '\n' |
|---|
| 229 | if self.in_ul or self.in_ol: |
|---|
| 230 | self.__flush() |
|---|
| 231 | elif self.in_td: |
|---|
| 232 | self.buffer += data |
|---|
| 233 | elif not self.in_table: |
|---|
| 234 | self.buffer += data |
|---|
| 235 | self.__flush() |
|---|
| 236 | |
|---|
| 237 | def end_td(self): |
|---|
| 238 | self.cells[-1].data += self.buffer.strip() |
|---|
| 239 | self.buffer = '' |
|---|
| 240 | self.in_td = False |
|---|
| 241 | |
|---|
| 242 | def end_th(self): |
|---|
| 243 | self.end_td() |
|---|
| 244 | |
|---|
| 245 | def end_tr(self): |
|---|
| 246 | if len(self.cells) is 0: |
|---|
| 247 | return |
|---|
| 248 | if self.in_heading: |
|---|
| 249 | self.__output('#Heading') |
|---|
| 250 | self.in_heading = False |
|---|
| 251 | else: |
|---|
| 252 | self.__output('#Row') |
|---|
| 253 | self.indent += 1 |
|---|
| 254 | line = ('|' * self.cells[0].colspan) + ' ' + self.cells[0].data.strip() |
|---|
| 255 | for cell in self.cells[1:]: |
|---|
| 256 | line += ' ' + ('|' * cell.colspan) + ' ' + cell.data.strip() |
|---|
| 257 | if len(line) <= 80: |
|---|
| 258 | self.__output(line) |
|---|
| 259 | else: |
|---|
| 260 | for cell in self.cells: |
|---|
| 261 | self.__output(('|' * cell.colspan) + ' ' + cell.data.strip()) |
|---|
| 262 | self.cells = [] |
|---|
| 263 | self.indent -= 1 |
|---|
| 264 | self.__flush() |
|---|
| 265 | |
|---|
| 266 | def end_table(self): |
|---|
| 267 | self.in_table = False |
|---|
| 268 | self.__flush() |
|---|
| 269 | |
|---|
| 270 | def newline(self): |
|---|
| 271 | self.buffer += '\n' |
|---|
| 272 | |
|---|
| 273 | |
|---|
| 274 | # drupal2fullblog |
|---|
| 275 | # Copyright 2009 Roy Marples <roy@marples.name> |
|---|
| 276 | # BSD-2 licensed |
|---|
| 277 | # I'm crap at python, but this works for me :) |
|---|
| 278 | |
|---|
| 279 | import time, datetime |
|---|
| 280 | from pyPgSQL import PgSQL |
|---|
| 281 | |
|---|
| 282 | # Tunables |
|---|
| 283 | drupal = PgSQL.connect(database='drupal') |
|---|
| 284 | trac = PgSQL.connect(database='trac.blog') |
|---|
| 285 | home = "http://roy.marples.name" |
|---|
| 286 | node = home + "/node/" |
|---|
| 287 | blog = home + "/projects/blog" |
|---|
| 288 | |
|---|
| 289 | dc = drupal.cursor() |
|---|
| 290 | tc = trac.cursor() |
|---|
| 291 | |
|---|
| 292 | tc.execute("DELETE FROM fullblog_posts") |
|---|
| 293 | dc.execute("SELECT users.name AS author, node.title, body, node.created, node.changed, term_data.name AS category" |
|---|
| 294 | " FROM node" |
|---|
| 295 | " JOIN node_revisions ON node_revisions.nid=node.nid" |
|---|
| 296 | " JOIN term_node ON term_node.nid=node.nid" |
|---|
| 297 | " JOIN term_data ON term_data.tid=term_node.tid" |
|---|
| 298 | " JOIN users ON users.uid=node.uid" |
|---|
| 299 | " WHERE type='blog'" |
|---|
| 300 | " ORDER BY created") |
|---|
| 301 | r = dc.fetchall() |
|---|
| 302 | for author, title, body, created, changed, category in r: |
|---|
| 303 | # Create a blog link |
|---|
| 304 | npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/") |
|---|
| 305 | title = title.lower() |
|---|
| 306 | sp = title.find(" ") |
|---|
| 307 | if sp < 1: |
|---|
| 308 | npath += title |
|---|
| 309 | else: |
|---|
| 310 | npath += title[0:sp] |
|---|
| 311 | |
|---|
| 312 | # Make relative for the below code |
|---|
| 313 | body = body.replace("<a href=\"" + node, "<a href=\"/node/") |
|---|
| 314 | |
|---|
| 315 | # Convert /node/n links to blog links |
|---|
| 316 | while body.find("<a href=\"/node/") != -1: |
|---|
| 317 | start = body.find("<a href=\"/node/") + len("<a href=\"/node/") |
|---|
| 318 | end = body.find("\"", start) |
|---|
| 319 | nid = body[start:end] |
|---|
| 320 | dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid)) |
|---|
| 321 | nr = dc.fetchone() |
|---|
| 322 | np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/") |
|---|
| 323 | sp = nr["title"].find(" ") |
|---|
| 324 | if sp < 1: |
|---|
| 325 | np += nr["title"] |
|---|
| 326 | else: |
|---|
| 327 | np += nr["title"][0:sp] |
|---|
| 328 | body = body.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np) |
|---|
| 329 | |
|---|
| 330 | # trac does not like relative links |
|---|
| 331 | body = body.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/") |
|---|
| 332 | body = body.replace("<a href=\"/", "<a href=\"" + home) |
|---|
| 333 | |
|---|
| 334 | # Code tags |
|---|
| 335 | body = body.replace("<code>", "{{{\r\n") |
|---|
| 336 | body = body.replace("</code>", "\r\n}}}") |
|---|
| 337 | |
|---|
| 338 | # Special case stuff |
|---|
| 339 | body = body.replace("NetworkManager", "!NetworkManager") |
|---|
| 340 | |
|---|
| 341 | # Markup |
|---|
| 342 | parser = Html2Wiki() |
|---|
| 343 | parser.feed(body) |
|---|
| 344 | |
|---|
| 345 | tc.execute("INSERT INTO fullblog_posts (name, version, title, body, publish_time, version_time, version_comment, version_author, author, categories)" |
|---|
| 346 | " VALUES(%s, 1, %s, %s, %s, %s, '', %s, %s, %s)", |
|---|
| 347 | (npath, title, parser.wiki, created, changed, author, author, category)) |
|---|
| 348 | |
|---|
| 349 | tc.execute("DELETE FROM fullblog_comments") |
|---|
| 350 | dc.execute("SELECT name, mail, node.title, node.created, comments.comment, timestamp" |
|---|
| 351 | " FROM comments" |
|---|
| 352 | " JOIN node ON node.nid=comments.nid" |
|---|
| 353 | " WHERE type='blog'" |
|---|
| 354 | " ORDER BY timestamp") |
|---|
| 355 | r = dc.fetchall() |
|---|
| 356 | for name, mail, title, created, comment, timestamp in r: |
|---|
| 357 | # Create a blog link |
|---|
| 358 | npath = datetime.datetime.fromtimestamp(created).strftime("%Y/%m/%d/") |
|---|
| 359 | title = title.lower() |
|---|
| 360 | sp = title.find(" ") |
|---|
| 361 | if sp < 1: |
|---|
| 362 | npath += title |
|---|
| 363 | else: |
|---|
| 364 | npath += title[0:sp] |
|---|
| 365 | |
|---|
| 366 | # Make relative for the below code |
|---|
| 367 | body = body.replace("<a href=\"" + node, "<a href=\"/node/") |
|---|
| 368 | |
|---|
| 369 | # Convert /node/n links to blog links |
|---|
| 370 | while comment.find("<a href=\"/node/") != -1: |
|---|
| 371 | start = comment.find("<a href=\"/node/") + len("<a href=\"/node/") |
|---|
| 372 | end = comment.find("\"", start) |
|---|
| 373 | nid = comment[start:end] |
|---|
| 374 | dc.execute("SELECT title, created FROM node WHERE nid=%s", (nid)) |
|---|
| 375 | nr = dc.fetchone() |
|---|
| 376 | np = datetime.datetime.fromtimestamp(nr["created"]).strftime("%Y/%m/%d/") |
|---|
| 377 | sp = nr["title"].find(" ") |
|---|
| 378 | if sp < 1: |
|---|
| 379 | np += nr["title"] |
|---|
| 380 | else: |
|---|
| 381 | np += nr["title"][0:sp] |
|---|
| 382 | comment = comment.replace("<a href=\"/node/" + nid, "<a href=\"/blog/" + np) |
|---|
| 383 | |
|---|
| 384 | # trac does not like relative links |
|---|
| 385 | comment = comment.replace("<a href=\"/blog/", "<a href=\"" + blog + "/blog/") |
|---|
| 386 | comment = comment.replace("<a href=\"/", "<a href=\"" + home) |
|---|
| 387 | |
|---|
| 388 | # Code tags |
|---|
| 389 | comment = comment.replace("<code>", "{{{\r\n") |
|---|
| 390 | comment = comment.replace("</code>", "\r\n}}}") |
|---|
| 391 | |
|---|
| 392 | # Special case stuff |
|---|
| 393 | comment = comment.replace("NetworkManager", "!NetworkManager") |
|---|
| 394 | |
|---|
| 395 | # Markup |
|---|
| 396 | parser = Html2Wiki() |
|---|
| 397 | parser.feed(comment) |
|---|
| 398 | |
|---|
| 399 | if mail != "": |
|---|
| 400 | if name == "": |
|---|
| 401 | name = mail |
|---|
| 402 | else: |
|---|
| 403 | name += " <" + mail + ">" |
|---|
| 404 | |
|---|
| 405 | tc.execute("SELECT COUNT(*) FROM fullblog_comments WHERE name=%s", (npath)) |
|---|
| 406 | nr = tc.fetchone() |
|---|
| 407 | tc.execute("INSERT INTO fullblog_comments (name, number, comment, author, time)" |
|---|
| 408 | " VALUES(%s, %s, %s, %s, %s)", |
|---|
| 409 | (npath, nr[0] + 1, parser.wiki, name, timestamp)) |
|---|
| 410 | |
|---|
| 411 | trac.commit() |
|---|