LJ Friendsmap Gode
as of 9 July 2007
Python Data Collector
#LJ Friends Map Python module
#(c) 2005 Jon Evans
#hereby released under terms of the GNU General Public License, version 2
import urllib
import os.path
import sys
import time
import xml.dom.minidom
#Constants
xmlHeaderString = "\n"
#Entry-point methods
def generate(username, forceRegenerate=0): # convenience to save typing
return generateFriendsMapXML(username, forceRegenerate)
def generateFriendsMapXML(username, forceRegenerate=0):
# check input
try :
if len(username) == 0:
raise UsernameError, "Empty string passed as username"
elif len(username) > 256:
raise UsernameError, "Excessive-length username "+username+"."
else:
rootUser = LJUser(username)
rootUser.findLocation(forceRegenerate)
friends = getUserRelations(username)
for friend in friends:
print "
Parsing "+friend.name
friend.findLocation(forceRegenerate)
filename = writeMappableXML(rootUser, friends)
except:
logError(username, sys.exc_info()[0])
raise;
return filename
#Heavy resource hog, to be used with caution
def generateFriendsMapXMLForAllFriendsOf(username):
try:
rootFriends = getUserRelations(username)
for rootFriend in rootFriends:
generateFriendsMapXML(rootFriend.name)
except:
logError("friendsfriends error for "+username, sys.exc_info()[0])
raise;
#Classes
#Can't seem to find FancyURLopener, alas.
class LJScrapeURLOpener(urllib.FancyURLopener):
def __init__(self, *args):
self.version = "rezendi.zigamorph.net/lj-friendsmap.htm: jonemevans@yahoo.com"
apply(urllib.FancyURLopener.__init__, (self,) + args)
class LJLocation:
def __init__(self, locname):
self.locationName=locname
self.latitude="unknown"
self.longitude="unknown"
self.asof=""
self.locationName=self.locationName.replace("&","|")
self.locationName=self.locationName.replace("/","|")
def __str__(self):
output = ""
try:
output = output+str(self.locationName)
output = output+": "+str(self.latitude)
output = output+": "+str(self.longitude)
output = output+": "+str(self.asof)
except:
output = output+"---Error printing: "+str(sys.exc_info()[0])
return output
#XML methods
def getXMLPath(self):
xmlFileName=self.locationName+"-location.xml"
if os.path.isdir("lj-location-xml"):
xmlFileName=os.path.normpath("lj-location-xml/"+xmlFileName)
return xmlFileName;
def toXML(self):
xml=""
xml=xml+""+self.locationName+""
xml=xml+""+self.latitude+""
xml=xml+""+self.longitude+""
xml=xml+""
return xml
def writeXML(self):
try:
xmlFileName=self.getXMLPath()
xmlToWrite=xmlHeaderString+self.toXML()
xmlFile=open(xmlFileName, "w")
xmlFile.write(xmlToWrite.encode("utf-8"))
xmlFile.close()
except:
logError("Error writing XML for "+self.locationName, sys.exc_info()[0])
#Application methods
def isLatLongUnknown(self):
if (self.latitude=="unknown"):
return 1
if (self.longitude=="unknown"):
return 1
return 0
def findLatitudeLongitude(self, forceRegenerate=0):
#First of all, see if location data is locally cached
xmlFileName=self.getXMLPath().encode("utf-8")
if os.path.exists(xmlFileName):
self.loadFromFile(xmlFileName)
if forceRegenerate!=0 or isStaleOrInvalidData(self.asof, "latitude-longitude"):
try :
self.loadFromGoogle()
if (self.isLatLongUnknown()):
self.loadFromMultimap()
except:
logError("Error finding latitude/longitude for "+self.locationName, sys.exc_info()[0])
self.writeXML()
def loadFromFile(self, xmlFileName):
try:
addXMLHeaderIfNecessary(xmlFileName)
dom=xml.dom.minidom.parse(xmlFileName)
locations=dom.getElementsByTagName("location")
self.asof = locations[0].getAttribute("as-of")
latitudes = dom.getElementsByTagName("latitude")
self.latitude = latitudes[0].firstChild.nodeValue
longitudes = dom.getElementsByTagName("longitude")
self.longitude = longitudes[0].firstChild.nodeValue
dom.unlink()
except:
logError("Error loading lat-long from file for "+self.locationName, sys.exc_info()[0])
def loadFromGoogle(self):
queryString = self.locationName
queryString = queryString.replace(" ","+")
#www.google.com
latLongPageURL = "http://72.14.207.99/maps?q="+queryString
latLongPageSource=getHTMLPage(latLongPageURL)
#Not that getting a substring should be a function or anything...
markerHeadText = ""
markerFootLocation=latLongPageSource.find(markerFootText)
if (markerFootLocation == -1): #highly unlikely this will ever happen
raise GeocodeError, "Error finding lat-long in "+latLongPageSource
latLongPageSource = latLongPageSource[:markerFootLocation]
#Another split hack, this one even worse
latLongList=latLongPageSource.split("\"")
if len(latLongList) < 4:
raise GeocodeError, "Unexpected latitude-longitude text "+str(latLongList)
self.latitude=latLongList[1]
self.longitude=latLongList[3]
def loadFromMultimap(self):
#If we've only got city/country or state/country, Multimap is
#currentlyunreliable: give up immediately
if (self.locationName.count(",") < 2):
return
queryString = self.locationName.replace(" ","+")
#www.multimap.com
latLongPageURL = "http://146.101.143.10/map/places.cgi?client=public&db=w3&place="+queryString
latLongPageSource=getHTMLPage(latLongPageURL)
#Usual screen scraper problem: vulnerable to HTML format changes
if (latLongPageSource.find("lon=") == -1):
return
if (latLongPageSource.find("lat=") == -1):
return
#If there are no matches, mark it as unknown
if (latLongPageSource.find("no exact matches") > 0):
return
if (latLongPageSource.find("no matching places") > 0):
return
#If there's only one result, go get it
markerHeadLocation=latLongPageSource.find("geo.position")
latLongPageSource=latLongPageSource[markerHeadLocation:]
markerHeadLocation=latLongPageSource.find("content")+9
markerFootLocation=latLongPageSource.find("\" />", markerHeadLocation)
geoPosition=latLongPageSource[markerHeadLocation:markerFootLocation]
if geoPosition!="0;0":
geoPositions=geoPosition.split(";")
self.latitude=geoPositions[0]
self.longitude=geoPositions[1]
return
#If we're here, we have multiple results
#First of all, make sure the country matches, because Multimap
#does some pretty weird things.
selfCountryName=self.locationName[self.locationName.rfind(",")+1:]
#Get Multimap's country idea
#zoom down to results list
#Incredibly messy
markerHeadLocation = latLongPageSource.find("browse.cgi")
latLongPageSource=latLongPageSource[markerHeadLocation:]
markerFootLocation = latLongPageSource.find("")
countryHrefChunk= latLongPageSource[:markerFootLocation]
countryChunkTwo=countryHrefChunk[countryHrefChunk.rfind(",")+2:]
mmCountry=countryChunkTwo[:countryChunkTwo.rfind("\"")]
#If one country includes the other, OK; otherwise no
if (selfCountryName.find(mmCountry)==-1):
if (mmCountry.find(selfCountryName)==-1):
return
markerHeadLocation = latLongPageSource.find("lon=")
#if location not found, just return
if (markerHeadLocation == -1):
return
markerFootLocation=latLongPageSource.find("&", markerHeadLocation)
if (markerFootLocation == -1): #highly unlikely this will ever happen
raise GeocodeError, "Error finding longitude in "+latLongPageSource
self.longitude = latLongPageSource[4+markerHeadLocation:markerFootLocation]
markerHeadLocation = latLongPageSource.find("lat=")
#if location not found, just return
if (markerHeadLocation == -1):
return
markerFootLocation=latLongPageSource.find("&", markerHeadLocation)
if (markerFootLocation == -1): #highly unlikely this will ever happen
raise GeocodeError, "Error finding latitude in "+latLongPageSource
self.latitude = latLongPageSource[4+markerHeadLocation:markerFootLocation]
class LJUser:
def __init__(self, username):
self.name=username
self.isFriend="False"
self.isFriendedBy="False"
self.location=None
self.asof=""
def __str__(self):
output = ""
output = output+self.name
if self.isFriend:
output = output+": Friend "
else:
output = output+": NotFriend"
if self.isFriendedBy:
output = output+": FriendedBy "
else:
output = output+": NotFriendedBy"
output = output+": "+str(self.location)
output = output+": "+str(self.asof)
return output
#XML methods
def getUserLocationXMLPath(self):
xmlFileName=self.name+"-location.xml"
if os.path.isdir("lj-user-location-xml"):
xmlFileName=os.path.normpath("lj-user-location-xml/"+xmlFileName)
return xmlFileName;
def toUserLocationXML(self):
xml=""
xml=xml+""
xml=xml+""+self.name+""
xml=xml+""+self.location.locationName+""
xml=xml+""
return xml
def writeUserLocationXML(self):
try:
xmlFileName=self.getUserLocationXMLPath()
xmlToWrite=xmlHeaderString+self.toUserLocationXML()
xmlFile=open(xmlFileName, "w")
xmlFile.write(xmlToWrite.encode("utf-8"))
xmlFile.close()
except:
logError("Error writing user location XML for "+self.name, sys.exc_info()[0])
def toMappableXML(self):
xml=""
xml=xml+""
xml=xml+""+self.name+""
xml=xml+""+str(self.isFriend)+""
xml=xml+""+str(self.isFriendedBy)+""
if (self.location!=None):
xml=xml+self.location.toXML();
xml=xml+"\n"
return xml
#Application methods
def findLocation(self, forceRegenerate=0):
#First of all, see if location data is locally cached
xmlFileName=self.getUserLocationXMLPath()
if os.path.exists(xmlFileName):
self.location = self.getUserLocationFromFile(xmlFileName)
if forceRegenerate!=0 or isStaleOrInvalidData(self.asof, "user location"):
self.location = self.getUserLocationFromLJ()
self.writeUserLocationXML()
self.location.findLatitudeLongitude(forceRegenerate)
def getUserLocationFromFile(self, xmlFileName):
locationName = ""
#filtering all complex characters
try:
addXMLHeaderIfNecessary(xmlFileName)
dom=xml.dom.minidom.parse(xmlFileName)
userLocations=dom.getElementsByTagName("lj-userlocation")
self.asof = userLocations[0].getAttribute("as-of")
locationNames=dom.getElementsByTagName("location-name")
locationName = locationNames[0].firstChild.nodeValue
dom.unlink()
except:
logError("Error getting user location from file for "+self.name, sys.exc_info()[0])
locationName = "blank"
return LJLocation(locationName)
def getUserLocationFromLJ(self):
#www.livejournal.com
infoPageURL = "http://66.150.15.150/userinfo.bml?user="+self.name
infoPageSource=getHTMLPage(infoPageURL)
#get the location
markerHeadText = "| Location:"
markerHeadLocation = infoPageSource.find(markerHeadText)
#if no location keyword found, just return
if (markerHeadLocation == -1):
return LJLocation("blank")
infoPageSource = infoPageSource[markerHeadLocation:]
markerFootText = " |
"
markerFootLocation=infoPageSource.find(markerFootText)
if (markerFootLocation == -1): #highly unlikely this will ever happen
raise InfopageError, "Unable to find end of location section"
infoPageSource = infoPageSource[:markerFootLocation]
#use LJ's links to isolate the location names
#kind of ass-backwards and kludged, but hey
locationList=infoPageSource.split("")+1:entry.find("<")]
locationName = locationName + locationValue
locationName = locationName +","
#Clean, strip the last comma
locationName.strip()
#if all we've got is a comma, make it blank
if len(locationName)==1:
locationName = "blank"
elif len(locationName)==0:
locationName = "blank"
else:
locationName = locationName[:-1]
return LJLocation(unicode(locationName,"utf-8",'replace'))
class LJFMError(Exception): #Base exception class
pass
class UsernameError(LJFMError):
pass
class InfopageError(LJFMError):
pass
class GeocodeError(LJFMError):
pass
#Utility methods
def isStaleOrInvalidData(asOf, dataType):
if len(asOf) == 0:
return 1
dateWritten = float(asOf)
ageOfData = time.time() - dateWritten
if ageOfData>(30*24*60*60):
return 1
return 0
def getHTMLPage(urlString):
urlOpener = LJScrapeURLOpener()
# print "Getting HTML page for "+urlString
htmlPage = urlOpener.open(urlString.encode("utf-8"))
htmlPageSource = htmlPage.read()
return htmlPageSource
def addXMLHeaderIfNecessary(xmlFileName):
xmlFile=open(xmlFileName, "rw")
firstLine=xmlFile.readline()
if (firstLine.find("encoding")==-1):
xmlFile.open(xmlFileName, "rw")
wholeFile=xmlFile.read();
newFile=xmlHeaderLine+wholeFile;
xmlFile.write(newFile.encode("utf-8"))
xmlFile.close();
def getUserRelations(username):
friendInfoPageURL = "http://66.150.15.150/misc/fdata.bml?user="+username
friendInfoPageSource = getHTMLPage(friendInfoPageURL)
if (friendInfoPageSource.find("! not a person account")>=0):
return getCommunityUserData(username)
friends = []
friendInfoLines=friendInfoPageSource.split("\n")
friendInfoLines.sort()
for infoLine in friendInfoLines:
if len(infoLine)==0:
continue
if infoLine[0]!=">":
if infoLine[0]!="<":
continue
friendName = infoLine[2:]
#O(n^2)! OK for smallish friendslists, but should really fix
#by sorting by friendname, then creating new friend object
#just before friendname changes
friend = getExistingEntry(friends, friendName)
if (friend == None):
friend = LJUser(friendName)
friends.append(friend)
if infoLine[0]==">":
friend.isFriend="True"
else:
friend.isFriendedBy="True"
return friends
def getCommunityUserData(username):
infoPageURL = "http://community.livejournal.com/"+userName+"/profile"
infoPageURL = infoPageURL+"&mode=full"
urlOpener = LJScrapeURLOpener()
infoPage = urlOpener.open(infoPageURL)
infoPageSource = infoPage.read()
# Excise 'Friends' list
# Find start of list, excise everything before
markerHeadText = "| Members: | "
markerHeadLocation = infoPageSource.find(markerHeadText)
if (markerHeadLocation == -1):
raise InfopageError, "Unable to find friendslist on info page for "+username
infoPageSource = infoPageSource[markerHeadLocation:]
# Now that we've truncated to head of current list, pass it to the processor
# with the name of the list and the end-of-list marker
markerFootText = "
"
friends = []
processRawFriendText(friends, infoPageSource, "Members" , markerFootText);
# Get 'Watched by' or 'Friend Of' or 'Also Friend of' list, if any
markerHeadText = "Watched by:"
markerHeadLocation = infoPageSource.find(markerHeadText)
if (markerHeadLocation >= 0):
markerFootText = ""
pageSource = infoPageSource[markerHeadLocation:]
processRawFriendText(friends, infoPageSource, "Watched by", markerFootText);
#all done
return friends
# Given an existing list of friends, some HTML source, a list name, a marker
# for the end of the source, and a delimeter, add to and/or correctly modify
# the friends list as per the new source data.
def processRawFriendText(friends, friendText, listName, markerFootText, delimeter=">", delimeter2="<"):
markerFootLocation = friendText.find(markerFootText)
if (markerFootLocation == -1):
raise InfopageError, "Unable to find end of "+listName +" list on info page, looking for "+markerFootText+ " in text "+friendText
friendText=friendText[:markerFootLocation]
initialList=friendText.split(".livejournal.com/profile")
# remove first entry, which is HTML garbage
initialList.pop(0)
for entry in initialList:
entryEnd=entry.find(delimeter)+1
entryEnd=entry.find(delimeter2)
friendName = entry[entryStart:entryEnd]
#Find existing entry for this named user, if any
#O(n^2) - should clean up
friend = getExistingEntry(friends, friendName)
if (friend == None):
friend = LJUser(friendName)
friends.append(friend)
if listName == "Members":
friend.isFriend="True"
elif listName == "Watched by":
friend.isFriendedBy="True"
return friends
def getExistingEntry(friends, friendName):
for friend in friends:
if friend.name == friendName:
return friend;
return None;
def writeMappableXML(rootUser, friends):
#Create and open file
xmlFileName=rootUser.name+".xml"
if os.path.isdir("mappable-xml"):
xmlFileName=os.path.normpath("mappable-xml/"+xmlFileName)
xmlFile=open(xmlFileName, "w")
xmlFile.write(xmlHeaderString)
xmlFile.write("")
#Write root user
xmlFile.write("")
xmlFile.write(rootUser.toMappableXML().encode("utf-8"))
xmlFile.write("")
#Write all friendings
xmlFile.write("")
for friend in friends:
try:
xmlFile.write(friend.toMappableXML().encode("utf-8"))
except:
logError("Error writing final XML for "+friend.name, sys.exc_info()[0])
xmlFile.write("")
#Close file and quit
xmlFile.write("\n")
xmlFile.close()
return 1
def logError(username, errorMessage):
try:
print "
Non-fatal error: "+username+", message "+str(errorMessage)
xmlFileName=username+"-"+str(time.time())+".xml"
if os.path.isdir("error"):
xmlFileName=os.path.normpath("error/"+xmlFileName)
xmlFile=open(xmlFileName, "w")
xmlToWrite=xmlHeaderString+str(errorMessage)
xmlToWrite=unicode(xmlToWrite,"utf-8")
xmlFile.write(xmlToWrite.encode("utf-8"))
xmlFile.close()
except: # give up
pass
#TODO:
# XML Generation:
# Fix O(n^2) processing
#
# Map:
# Intelligent error message if file not found
# UK parsing
# More clustering/scaling
# Fix O(n^2) processing
# Friends/friends-of different colours
#
# Other:
# Check regenerate box if coming from regenerate page
intermediate.cgi CGI Python Script
#!/usr/bin/python
print "Content-type: text/html"
print
print "Collecting friendsmap data..."
from cgi import escape
import os
userName="nobody"
forceRegenerate=0
keys = os.environ.keys()
keys.sort()
for k in keys:
if (escape(k)=="QUERY_STRING"):
queryString=escape(os.environ[k])
userNameLoc=queryString.find("userName=")
if (userNameLoc>=0):
userName=queryString[userNameLoc+9:]
forceRegenerateLoc=queryString.find("forceRegenerate=on")
if (forceRegenerateLoc>=0):
forceRegenerate=1
print "\n"
else:
print "?userName="+userName+"\">\n"
print "\n"
print "Collecting friendsmap data for "+userName+"\n"
print "
Kick back and have a beer..."
print ""
collectData.cgi CGI Python Script
#!/usr/bin/python
print "Content-type: text/html"
print
print "Friendsmap XML Generator"
import os, sys, ljfm9
from cgi import escape
userName="nobody"
forceRegenerate=0
keys = os.environ.keys()
keys.sort()
for k in keys:
if (escape(k)=="QUERY_STRING"):
queryString=escape(os.environ[k])
userNameLoc=queryString.find("userName=")
if (userNameLoc>=0):
userName=queryString[userNameLoc+9:]
forceRegenerateLoc=queryString.find("forceRegenerate=on")
if (forceRegenerateLoc>=0):
forceRegenerate=1
print "Collecting friendsmap data for "+userName+"\n"
ljfm9.generate(userName, forceRegenerate)
print "
Finished!\n"
print "
Now go on to view "+userName+"'s friendsmap.\n"
print ""
friendsmap.htm Google Maps API HTML/JavaScript
LJ Friendsmap
|
| (Missile launch functionality coming in version 2.0, contingent on DoD funding approval) |