# dland_to_lj.py # # takes a diaryland archive page and, for each entry listed, # posts corresponding back-dated entry to livejournal # # assumes your archive page is located at http://{your name}.diaryland.com/older.html # assumes your entry filenames are all default diaryland names # import urllib2 # http://www.python.org/doc/2.3.3/lib/urllib2-examples.html import re # http://www.amk.ca/python/howto/regex/regex.html import xmlrpclib # http://docs.python.org/lib/module-xmlrpclib.html import sys import time import os from md5 import md5 host = 'http://yourdiary.diaryland.com' # REPLACE THIS WITH YOUR DIARYLAND PAGE username = 'yourljaccount' # REPLACE THIS WITH YOUR LJ USERNAME pwd = 'yourljpassword' # REPLACE THIS WITH YOUR LJ PASSWORD # patterns entryInfoPatternMatcher = re.compile(r'\d{6,6}_\d+\.html\">.*') # e.g. 051014_8.html">Title of post timePatternMatcher= re.compile(r'\d{1,2}:\d\d [ap]\.m\.|\d\d:\d\d:\d\d') # e.g. 7:29 p.m. OR 00:08:18 startingCruftPatternMatcher = re.compile(r'

\d\d.\d\d.\d\d - (\d{1,2}:\d\d [ap]\.m\.|\d\d:\d\d:\d\d)

') # e.g.

05.10.29 - 12:15 a.m.

OR

05.10.29 - 23:12:12

endingCruftPatternMatcher = re.compile(r'

previous -- next

') # e.g.

previous -- next

ljServer = xmlrpclib.ServerProxy('http://livejournal.com/interface/xmlrpc') ############################################################################ # getDiarylandPage # input: string output: either False or a string (page contents) # # prints exceptions and diaryland page errors # def getDiarylandPage(url): print 'Loading ' + url + '...' try: page = urllib2.urlopen(url) except URLError, e: print "Failed to open " + url + "; URLError: " + e.reason return False except HTTPError, e: print "Failed to open " + url + "; HTTPError: " + e.code return False else: pageContents = page.read() if pageContents.find('You\'ve just reached the DiaryLand error page!') != -1: print "Failed to open " + url + "; got the Diaryland error page" return False; return pageContents ############################################################################ # doLiveJournalCommand # input: string, dict output: either False or dict (LJ response) # # prints exceptions, retries up to 3 times if error = 'Connection reset by peer' # # http://www.bookmark-master.com/socket-error-10054.html - # this appears to be result of server overload, hopefully retrying will get past it # def doLiveJournalCommand(command, args=None): attempts = 0 if args is None: fullCommand = 'ljServer.LJ.XMLRPC.' + command + '()' else: fullCommand = 'ljServer.LJ.XMLRPC.' + command + '(args)' while attempts < 3: try: return eval(fullCommand) except xmlrpclib.Fault, f: if f.faultString.find('Connection reset by peer'): print 'Attempt %d for %s failed with socket error 10054; retrying...' % (attempts, fullCommand) time.sleep(5) # sleep 5 seconds attempts = attempts + 1 continue except: print sys.exc_info() return False print 'Failed to execute %s after 3 attempts; aborting' % (fullCommand) return False ############################################################################ # doLiveJournalChallengeResponse # input: none output: either False or dict # # http://www.livejournal.com/doc/server/ljp.csp.auth.challresp.html # def doLiveJournalChallengeResponse(): getChallengeResult = doLiveJournalCommand('getchallenge') if getChallengeResult is False: return False # sys.exit('Couldn\'t log in to livejournal; aborting') challenge = getChallengeResult['challenge'] response = md5(challenge + md5(pwd).hexdigest()).hexdigest() return { 'challenge': challenge, 'response': response } ############################################################################ # postLiveJournalEntry # input: strings output: either False or dict (LJ response) # # http://www.livejournal.com/doc/server/ljp.csp.xml-rpc.postevent.html # def postLiveJournalEntry(entry, subject, year, month, day, hour, minutes): print 'Posting entry to LiveJournal...' challengeResponse = doLiveJournalChallengeResponse() # login #print "username: %s \\nnentry: %s \n\nsubject: %s\n\n year: %s mon: %s day: %s hour: %s min: %s" % (username, entry, subject, year, month, day, hour, minutes) postArgs = { 'username': username, 'auth_method': 'challenge', 'auth_challenge': challengeResponse['challenge'], 'auth_response': challengeResponse['response'], 'event' : xmlrpclib.Binary(entry), # must specify binary since some dland entries are not UTF-8 encoded 'subject' : subject, 'year' : year, 'mon' : month, 'day' : day, 'hour' : hour, 'min' : minutes, 'lineendings' : 'pc', 'security' : 'usemask', 'allowmask' : 1, 'props' : { 'opt_backdated': True } } return doLiveJournalCommand('postevent', postArgs) ############################################################################ # deleteLiveJournalEntry # input: string output: either False or dict (LJ response) # # see http://www.livejournal.com/doc/server/ljp.csp.xml-rpc.editevent.html # def deleteLiveJournalEntry(itemId): print 'Deleting LJ item %s...' % (itemId[:len(itemId)]) challengeResponse = doLiveJournalChallengeResponse() # login editArgs = { 'username': username, 'auth_method': 'challenge', 'auth_challenge': challengeResponse['challenge'], 'auth_response': challengeResponse['response'], 'event': '', 'subject': '', 'itemid': itemId } return doLiveJournalCommand('editevent', editArgs) ############################################################################ # deleteLiveJournalEntries # input: string output: none # # takes a newline delimited list of itemids, deletes corresponding LJ entries # if no list provided, uses most recent log file in working dir # def deleteLiveJournalEntries(logFilePath=''): if logFilePath == '': newestCreationTime = 0 for currentFile in os.listdir('.'): if os.path.splitext(currentFile)[1] == '.log': if os.stat(currentFile).st_ctime > newestCreationTime: logFilePath = currentFile if logFilePath == '': sys.exit('No log file provided, couldn\'t find one in working directory; aborting') try: logFile = open(logFilePath) except IOError: sys.exit("Couldn't open " + logFilePath + "; aborting") for itemId in logFile.readlines(): deleteLiveJournalEntry(itemId) ############################################################################ # transferDiarylandEntryToLiveJournal # input: strings output: either False or string (LJ response) # # loads diaryland url, strips out dland junk, # posts page contents to livejournal. returns either False or string # def transferDiarylandEntryToLiveJournal(url, subject, year, month, day): # load entry getResult = getDiarylandPage(url) if getResult is False: return False # get time for entry - format is either either 10:05 p.m or 23:04:10 # we assume first time string found = time of entry searchResult = timePatternMatcher.search(getResult) if searchResult is None: print "Couldn't find time for entry " + url + "; ignoring" return False timeString = searchResult.group() if timeString.find('.m.') != -1: # only entries of type 9:32 a.m have ".m." in them timeString = timeString.replace('.', '') # get rid of periods, date formatter doesn't like them entryTime = time.strptime(timeString, "%I:%M %p") else: entryTime = time.strptime(timeString, "%H:%M:%S") # for details of struct_time, see http://www.python.org/doc/current/lib/module-time.html hour = entryTime[3] minutes = entryTime[4] # strip extraneous stuff off entry searchResult = startingCruftPatternMatcher.search(getResult) if searchResult is None: print "Couldn't find starting junk to strip off entry; ignoring" return False entryStart = searchResult.end() # we want the LJ entry to start where startingCruft ends searchResult = endingCruftPatternMatcher.search(getResult) if searchResult is None: print "Couldn't find ending junk to strip off entry; ignoring" return False entryEnd = searchResult.start() # we want to LJ entry to end where endingCruft begins entry = getResult[entryStart:entryEnd] return postLiveJournalEntry(entry, subject, year, month, day, hour, minutes) ############################################################################ # doBatchTransferFromDiarylandToLiveJournal # input: none output: none # # loads diaryland archive page, extracts entry filenames, does batch transfer # def doBatchTransferFromDiarylandToLiveJournal(): # start logfile logFile = file(("C:\logFile" + time.strftime('%Y%m%d%H%M%S', time.gmtime()) + '.log'), 'w+') # load the archive index getResult = getDiarylandPage(host + "/older.html") if getResult is False: sys.exit("Couldn't open archive page; aborting") print 'Extracting entry filenames...' entryList = entryInfoPatternMatcher.findall(getResult) # rows are of form: 051014_8.html">Title of post for row in entryList: # we get date from filename instead of the date field in entry html, # because that field is not consistently formatted & there's no way # to consistently disambiguate MM/DD/YY from DD/MM/YY year = '20' + row[0:2] month = row[2:4] day = row[4:6] subject = row[(row.find('>') + 1):row.find('<')] url = host + '/' + row[0:row.find('\"')] transferResult = transferDiarylandEntryToLiveJournal(url, subject, year, month, day) if transferResult is False: print 'Failed to transfer ' + url else: logFile.write('%d\n' % (transferResult['itemid']) ) print transferResult # clean up logFile.close() # this kicks things off... doBatchTransferFromDiarylandToLiveJournal() raw_input("Done, hit 'enter' to quit")