Generating SeeAlso Indexes for the Python Library Reference

December 10, 2005 | Fredrik Lundh

Here’s a simple script that uses the global module index (dead link) to generate a seealso index for the Python library reference.

Note that it queries for all reference pages, so you shouldn’t run it more often than needed. If you just want a sample file to play with, you can use this copy.


Required components (elementtree and elementtidy) can be found at the downloads site.

import os, re, urllib, urlparse

from elementtidy.TidyHTMLTreeBuilder import parse
from elementtree.ElementTree import dump

URI = ""

# helpers

class NS:
    def __init__(self, uri):
        self.uri = uri
    def __getattr__(self, name):
        return self.uri + name

XHTML = NS("{}")

def innertext(elem):
    text = elem.text or ""
    for elem in elem.getiterator():
        if elem.text: text += elem.text
        if elem.tail: text += elem.tail
    return text

def gettitle(href, default):
    s = urllib.urlopen(href).read(1024)
    m ="<title>[\d\s.]+(.*)</title>", s)
    if m:
    return default

# get going

tree = parse(urllib.urlopen(URI))

# the index is in the second table on the page
tables = tree.findall(".//" + XHTML.table)

print "<seealso target-domain='python'>"

print " <info xmlns:dc=''>"
print "  <dc:title>Python Library Reference</dc:title>"
print "  <dc:identifier></dc:identifier>"
print "  <dc:language>en</dc:language>"
print " </info>"

for a in tables[1].findall(".//" + XHTML.a):
    href = urlparse.urljoin(URI, a.get("href"))
    text = innertext(a)
    target = text.split()[0]
    print " <item href='%s'>" % href
    print "  <title>%s</title>" % gettitle(href, text)
    print "  <target>%s</target>" % target
    print " </item>"

print "</seealso>"

A Django site. rendered by a django application. hosted by webfaction.