#!/usr/local/bin/io // ugly, but it works SGML Gutenberg := Object clone do( run := method( "abcdefghijklmnopqrstuvwxyz" foreach(letter, authorsAt(letter asCharacter)) ) authorsAt := method(letter, url := "http://www.gutenberg.org/browse/authors/" .. letter URL with(url) fetch asSGML elementsWithName("div") select(attributes at("class") == "pgdbbyauthor") first elementsWithName("a") foreach(t, // keep track of which author we're on if(t attributes at("name") ?beginsWithSeq("a"), author := t subitems first text strip // remove the date from the author name authorParts := author split(",") if(authorParts last strip at(0) isLetter == false, authorParts pop; author = authorParts join(",")) ) // print a book record if found if(t attributes at("href"), book := t subitems first text strip bookUrl := t attributes at("href") if(bookUrl and bookUrl beginsWithSeq("/etext"), contentUrl := contentUrlForBookUrl("http://www.gutenberg.org" .. bookUrl) if(contentUrl, writeln(author, "\t", book, "\t", contentUrl asMutable strip) File standardOutput flush ) ) ) ) ) contentUrlForBookUrl := method(bookUrl, // scrape the book page for the first plain text book link and return it contentUrl := nil try( links := URL with(bookUrl) fetch asSGML elementsWithName("a") map(attributes at("href")) contentUrl := links select(endsWithSeq(".txt")) map(h, "http://www.gutenberg.org" .. h) first ) contentUrl ) ) run