import libxslt, urllib2, libxml2, sys, SocketServer, re
from os import makedirs
base_url = "http://trac.server.com/"
project_wiki_url = "myproject/wiki/"
project_attachments_url = "myproject/attachment/wiki/"
contents_url = "manuals/usermanual" # 'table of contents' wiki page. contains the links for each chapters
docbook_url_suffix = "?format=docbook"
raw_url_suffix = "?format=raw"
images_base_path = "figure/"
chapters_base_path = "chapter/"
useSVGsIfAvailable = False
contentsDocbook_stream = urllib2.urlopen(base_url + project_wiki_url + contents_url + docbook_url_suffix).read()
contentsDocbook_doc = libxml2.parseDoc(contentsDocbook_stream) #parseDoc always receives utf8, i think
contentsDocbook_xpc = contentsDocbook_doc.xpathNewContext()
nodes = contentsDocbook_xpc.xpathEval("//ulink/@url")
if len(nodes) == 0:
print "no results"
sys.exit(1)
else:
getOriginalUrl_re = re.compile("""""")
for result in nodes:
lastSlashIdx = str(result).rfind("/")
chapter_url = base_url + str(result)[7:-1]
chapter_slug = str(result)[lastSlashIdx+1:-1]
try:
chapterDocbook_stream = urllib2.urlopen(chapter_url + docbook_url_suffix).read()
except urllib2.HTTPError:
continue
#find image urls, change them, and determine the new paths
images_original_urls = getOriginalUrl_re.findall(chapterDocbook_stream) #myproject/attachment/wiki/manuals/usermanual/section1/untitled.png?format=raw
images_modified_urls = [url.replace(".png", ".svg") for url in images_original_urls] #myproject/attachment/wiki/manuals/usermanual/section1/untitled.svg?format=raw
images_newpath_pngfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_original_urls] #figures/manuals/usermanual/untitled.png
images_newpath_svgfilenames = [images_base_path + url[len(project_attachments_url):-len(raw_url_suffix)] for url in images_modified_urls] #figures/manuals/usermanual/untitled.svg
images_newpath_filenames = []
#save images
for i in range(len(images_modified_urls)):
image_stream = None
if useSVGsIfAvailable:
try:
image_stream = urllib2.urlopen(base_url + images_modified_urls[i]).read()
images_newpath_filenames.append(images_newpath_svgfilenames[i])
except urllib2.HTTPError:
print "Could not retrieve image resource: " + images_modified_urls[i]
if image_stream==None:
try:
image_stream = urllib2.urlopen(base_url + images_original_urls[i]).read()
images_newpath_filenames.append(images_newpath_pngfilenames[i])
except urllib2.HTTPError:
print "Could not retrieve image resource: " + images_original_urls[i]
sys.exit(1)
dirEndIdx = images_newpath_filenames[i].rfind("/")+1
try:
makedirs(images_newpath_filenames[i][0:dirEndIdx])
except OSError, x:
#[Errno 17] File exists:
pass
image_file = file(images_newpath_filenames[i], "wb")
image_file.write(image_stream)
image_file.close()
#save docbook files, changing them to have the right image paths, and unique ids
for i in range(len(images_original_urls)):
chapterDocbook_stream = chapterDocbook_stream.replace("/" + images_original_urls[i], images_newpath_filenames[i])
chapterDocbook_stream = chapterDocbook_stream.replace("