#!/usr/bin/perl -w

# migrate blosxom to WordPress
#
# v1.0 - 2006/12/15
#
#    Copyright (C) 2006 Dirk Hohndel (dirk at hohndel d0t org)
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License version 2 as published by
#    the Free Software Foundation.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
# This script makes a LOT of assumptions
# - it assumes that you have shell access to the machine that blosxom runs
#   on and that you can connect to the WP mysql database from that machine
# - you should run it from the main directory of your blosxom blog
#   I used something like "find . -name \*.blog | xargs blosxomtowp.pl"
#   (note that my blosxom posts have the suffix ".blog" - this might be
#   ".txt" for you)
# - it assumes that you use directories under that main directory for your
#   category hierarchy - just as with using the "categorytree" plugin
# - it assumes that you use the "meta" and "metadate" plugin to set the date
#   on your postings (but it's easy to change this to use the timestamp
#   instead)
# - it assumes that you are using the feedback plugin for comments
# - it assumes that you have already created /all/ categories that you have
#   in blosxom in your WP database
# - it assumes the database table layout in WP-2.0.5
#
# I suggest creating the database, creating the categories and then making a
# backup using the nice wp-backup plugin. If something goes wrong with this
# script (and it will) then you can simply restore the backup, fix the
# script and start again.
#
# Below are a few variables to set up things 
# - debug = 1 gets you a printout of some of the variables and all of the
#             queries
# - debug > 1 also gets you a dump of the wordpress database at the end
# - run = 0   is a dry-run
# - run > 0   actually executes the queries
# The rest of the variables should be self explanatory or are explained below
#
# you need a couple of CPAN modules in stalled, DateTime, DBI, and DBI::mysql

use DateTime;
use DBI;

my $debug = 1;
my $run   = 1;
my $relative_commentpath = "data/blosxom/plugins/state/feedback/";  # the path from the current dir to the comments
my $blogurl = "http://www.hohndel.org/communitymatters/";  # this is the new WordPress blog
my $post_suffix = ".blog"; # this might be .txt for many
my $database = "communitymatters";
my $databaseuser = "wordpress";
my $databasepassword = "this-is-not-it";
my $local_tz = "America/Los_Angeles";

# while I tried to put everyting that needs to be configured up above this
# comment, it might be a smart idea to read the code below to make sure that
# it doesn't make any additional assumptions that I forgot to mention... 
#

my %category; # hash that matches the category names to their WP ids
my $query;
my $sth;

# set up database handle
my $dbh = DBI->connect( "DBI:mysql:database=". $database . ";host=localhost",
    $databaseuser, $databasepassword, { 'RaiseError' => 1 } );

# figure out which categories are known in wordpress 
$sth = $dbh->prepare("SELECT cat_ID, category_nicename FROM wp_categories ");
if ( !$sth ) { die "Error:" . $dbh->errstr . "\n"; }
if ( !$sth->execute ) { die "Error:" . $sth->errstr . "\n"; }
while ( my $ref = $sth->fetchrow_arrayref ) {
    $category{ $$ref[1] } = $$ref[0];
}

# now iterate through all the postings given as arguments to the script
foreach $blogposting (@ARGV) {
    my $posttitle = "";
    my $posttext  = "";
    my $ptl;
    my $ptgmt;
    my $post_time;
    my $post_time_GMT;
    open FH, "<" . $blogposting;
    while (<FH>) {
        if ( !$posttitle ) { # first line is the title of the posting
            $posttitle = $_;
            if ($debug) { print "Title: $posttitle \n"; }
            next;
        }
	# let's parse the meta-creation_date - we allow for no time, hh:mm and hh:mm:ss
        if (/^meta-creation_date:\s*(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+):(\d+)/) {
	    $ptl = DateTime->new( year => (($3 < 100) ? $3+2000 : $3),
                 month      => $1,
                 day        => $2,
                 hour       => $4,
                 minute     => $5,
                 second     => $6,
                 time_zone  => $local_tz
                );
            next;
	}
        if (/^meta-creation_date:\s*(\d+)\/(\d+)\/(\d+)\s*(\d+):(\d+)/) {
	    $ptl = DateTime->new( year => (($3 < 100) ? $3+2000 : $3),
                 month      => $1,
                 day        => $2,
                 hour       => $4,
                 minute     => $5,
                 second     => 0,
                 time_zone  => $local_tz
                );
            next;
	}
        if (/^meta-creation_date:\s*(\d+)\/(\d+)\/(\d+)/) { # if we have no time, assume 18:00:00
	    $ptl = DateTime->new( year => (($3 < 100) ? $3+2000 : $3),
                 month      => $1,
                 day        => $2,
                 hour       => 18,
                 minute     => 0,
                 second     => 0,
                 time_zone  => $local_tz
                );
            next;
        }
        s/\n/ /;
        $posttext .= $_;
    }
    $ptgmt = $ptl->clone()->set_time_zone( 'GMT' );
    $post_time = $ptl->date() . " " . $ptl->time();
    $post_time_GMT = $ptgmt->date() . " " . $ptgmt->time();
    if ($debug) { print "Posted on: $post_time / $post_time_GMT GMT\n"; }

    # determine the correct category, post_name from the blosxom path/filename
    my $postcategory;
    my $commentpath;
    my $post_name = $blogposting;
    $post_name =~ s/$post_suffix$//;
    $post_name =~ s/^\.\///;
    $postcategory = $post_name;
    $commentpath  = $relative_commentpath . $post_name . ".wb";
    $postcategory =~ s|^(.*)/[^/]*$|$1|;
    $post_name    =~ s|.*/([^/]*)|$1|;
    $postcategory =~ tr/A-Z/a-z/;
    my $posturl = $blogurl . $postcategory . "/" . $post_name;
    if ( $postcategory =~ m|.*/.*| ) { $postcategory =~ s|.*/([^/]*)|$1|; }
    if ($debug) { print "Post filename: $post_name \n"; }
    if ($debug) { print "Post URL: $posturl \n"; }
    if ($debug) { print "Post category: $postcategory \n"; }
    if ($debug) { print "Comment path: $commentpath \n"; }

    die "Error: category " . $postcategory . " doesn't exist in wordpress!\n"
      unless ( $category{$postcategory} );

    # first insert the posting into the wp_posts table
    $query = sprintf(
      "INSERT INTO wp_posts VALUES ( 0, 1, '%s', '%s',\
       %s, %s, %d, '',\
       %s, %s, %s, '', %s, '', '',\
       %s', '%s', '',\
       %d, %s, %d, '', '', %d )",
      $post_time, $post_time_GMT,
      $dbh->quote($posttext), $dbh->quote($posttitle),
      $category{$postcategory},
      $dbh->quote('publish'), $dbh->quote('open'), $dbh->quote('open'),
      $dbh->quote($post_name),
      $post_time, $post_time_GMT,
      0, $dbh->quote($posturl), 0, 0
    );
    if ($debug) { print "Query: $query \n"; }
    if ($run)   { $dbh->do($query); }
    my $post_insertid = $dbh->{'mysql_insertid'};  # that's the post-ID in WP

    # assign a category
    $query = sprintf(
        "INSERT INTO wp_post2cat VALUES (0, %d, %d)", $post_insertid, $category{$postcategory}
    );
    if ($debug) { print "Query: $query \n"; }
    if ($run)   { $dbh->do($query); }

    # increment the posting count of that category
    $query = sprintf( "UPDATE wp_categories SET category_count=category_count+1 WHERE cat_ID = %d",
        $category{$postcategory} );
    if ($debug) { print "Query: $query \n"; }
    if ($run)   { $dbh->do($query); }

    # now let's look at the comments
    if ( open (CFH,$commentpath) ) {
        my $comment;
        my $comment_name;
        my $comment_url;
        my $comment_date;
        my $comment_date_GMT;
        my $comment_ip;
        while (<CFH>) {
	    if (/^comment:\s*(.*)/) { $comment = $1; }
	    if (/^name:\s*(.*)/) { $comment_name = $1; }
	    if (/^url:\s*(.*)/) { $comment_url = $1; }
	    if (/^date:\s*(.*)/) { 
		my $dtlocal = DateTime->from_epoch( epoch => $1, time_zone => $local_tz );
		my $dtgmt = $dtlocal->clone()->set_time_zone( 'GMT' );
		$comment_date = $dtlocal->date() . " " . $dtlocal->time();
		$comment_date_GMT = $dtgmt->date() . " " . $dtgmt->time();
	    }
	    if (/^ip:\s*(.*)/) { $comment_ip = $1; }
	    if (/^-----/) {
	        if ($debug) { 
		    print "Comment: $comment\n From: $comment_name ($comment_url) on $comment_date at $comment_ip\n"; 
                }
		# add to comment database
		$query = sprintf("INSERT INTO wp_comments VALUES (0,%d,%s,'not tracked',%s,%s,%s,%s,%s,0,'1','not tracked','',0,1)",
		  $post_insertid,$dbh->quote($comment_name),$dbh->quote($comment_url),$dbh->quote($comment_ip),
		  $dbh->quote($comment_date),$dbh->quote($comment_date_GMT),$dbh->quote($comment));
	        if ($debug) { print "Query: $query \n"; }
	        if ($run)   { $dbh->do($query); }
	        # increment the posting count of that category
	        $query = sprintf( "UPDATE wp_posts SET comment_count=comment_count+1 WHERE ID = %d", $post_insertid );
	        if ($debug) { print "Query: $query \n"; }
	        if ($run)   { $dbh->do($query); }
            }
        }
    }
    else {
        if ($debug) { print "No comments for $posttitle\n"; }
    }
}

if ( $debug > 1 ) {

    # dump the posts database
    $sth = $dbh->prepare("SELECT * FROM wp_posts ");
    if ( !$sth ) {
        die "Error:" . $dbh->errstr . "\n";
    }
    if ( !$sth->execute ) {
        die "Error:" . $sth->errstr . "\n";
    }
    my $names     = $sth->{'NAME'};
    my $numFields = $sth->{'NUM_OF_FIELDS'};
    for ( my $i = 0 ; $i < $numFields ; $i++ ) {
        printf( "%s%s", $i ? "," : "", $$names[$i] );
    }
    print "\n";
    while ( my $ref = $sth->fetchrow_arrayref ) {
        for ( my $i = 0 ; $i < $numFields ; $i++ ) {
            printf( "%s%s", $i ? "," : "", $$ref[$i] );
        }
        print "\n";
    }
}
exit 0;

