#!/usr/bin/perl
# convert.pl
#
# Fills in a dadaimc mysql database by reading a postrgres "active" database.
# Also makes soft links from dada's usermedia directory to the media files
# in active's upload directory, so that you don't need two copies of all your
# media lying around.  (Note: won't handle remote storage of media.)
# Takes care of image sizing for jpgs and gifs (if you have jpeginfo and
# gifsicle), links between related stories, and parenting comments.  
# Also creates a table to allow redirects from # active articles to dada 
# articles.  Doesn't handle creating thumbnails.
#
# This script needs DBI and the mysql and pg DBD modules.
#
# You should only need to change the following two lines to reflect the
# location of both software installations

my $dada_root = '/data/www/dcdada';
my $active_root = '/data/www/washingtondc/webcast';
my $media_root = '/data/www/dcdada/olduploads';
use DBI;
use File::stat;
use File::Basename;
use strict;


my ($dada_db_name,$dada_db_host,$dada_db_user,$dada_db_pass) = 
    map { /=\s*'(.*)'/ }
    split "\n", `grep g_db_ $dada_root/imc_dbinfo.inc`;

my ($active_db_user,$active_db_pass,$active_db_name) =  
    map { /=\s*"(.*)"/ } 
    split "\n", `grep db_setup $active_root/local/db-setup.php3`;

# We're going to be making links from subdirectories of here
# to places in active's media upload directories
#
chdir "$dada_root/usermedia" or die $!;

my $dada = DBI->connect(
    "dbi:mysql:$dada_db_name", $dada_db_user, $dada_db_pass, 
    { RaiseError => 1, AutoCommit => 0 }) || die $!;

my $active = DBI->connect(
    "dbi:Pg:dbname=$active_db_name", $active_db_user, $active_db_pass,
    { RaiseError => 1, AutoCommit => 0 }) || die $!;


my $sth = $active->prepare("SELECT * FROM webcast WHERE id > 79525 ORDER by id"); 

# We add a new table to dada which maps active ids to dada ids, so
# create it and prepare the statement to insert rows into it
#
#

{ 
  local $dada->{RaiseError};
  $dada->do("CREATE TABLE activemap (".
      "old mediumint(9) NOT NULL default '0',".
      "new mediumint(9) NOT NULL default '0',".
      "PRIMARY KEY  (old)".
      ") TYPE=MyISAM COMMENT='Maps old active ids to new dada ids';");

  $dada->do("DELETE from activemap");
}
my $a_map = $dada->prepare("INSERT INTO activemap(old,new) VALUES (?,?)");


$sth->execute;

my (%id_map,%parents);

my @field_names = qw(picturebook summary local_interest rating displayable body submitted embedded_media author feature contact_email created_datetime contact_phone contact_address mime_type edit_datetime heading parentid related_url1 media relationships);

my @media_fields = qw(created_datetime  filesize  mime_type  mime_class height width orig_filename  sequence_number  filename  parentid  artist  caption  keywords  displayable  email_verified);

my $ins = $dada->prepare("INSERT INTO articles(" . join(",",@field_names).
              ") VALUES (". "?,"x(scalar(@field_names)-1)."?)");

my $media_ins = $dada->prepare("INSERT INTO media(" . join(",",@media_fields).
              ") VALUES (". "?,"x(scalar(@media_fields)-1)."?)");

my $fields;

# This assumes that your dada dadabase has no existing articles, or
# that you'll remove them first.  If you want to keep existing articles, 
# hard code the number that exist, or write a bit of code to count the rows 
# in the existing articles table
#
my $id = 1;

while ( $fields = $sth->fetchrow_hashref) {

  my $new_fields = {};

  print "Starting record $id...\n" unless $id % 1000;
 
  next unless $fields->{created} =~ /^(.*)(\..*)?-(.*)$/;
  $new_fields->{created_datetime} = $1;
  next unless $fields->{modified} =~ /^(.*)(\..*)?-(.*)$/;
  $new_fields->{edit_datetime} = $1;

  $new_fields->{author} = $fields->{author};
  $new_fields->{contact_email} = $fields->{contact};
  $new_fields->{contact_phone} = $fields->{phone};
  $new_fields->{contact_address} = $fields->{address};
  $new_fields->{heading} = $fields->{heading};
  $new_fields->{summary} = $fields->{summary};
  $new_fields->{body} = $fields->{article};
  $new_fields->{mime_type} = $fields->{mime_type};
  $new_fields->{submitted} = 1;
  $new_fields->{displayable} = $fields->{display};
  $new_fields->{rating} = 0;
  $new_fields->{embedded_media} = 0;
  $new_fields->{feature} = 0;
  $new_fields->{picturebook} = 0;
  $new_fields->{local_interest} = 0;
  $new_fields->{parentid} = 0;
  $new_fields->{related_url1} = $fields->{link} || "";
  $new_fields->{relationships} = "";
  $new_fields->{media} = 'text';

  # Save the parent id for later; if articles are rethreaded to
  # later parents, we may not have the parent id under dada yet, so
  # we need to do parenting in a separate loop
  #
  $parents{$id} = $fields->{parent_id} if $fields->{parent_id};

  $fields->{html_file} =~ s/\s*$//;
  if ($fields->{html_file}) {
    my $file = "$active_root/".$fields->{html_file};
    if (-f $file) {
     undef $/;
     open FILE, $file || die $!;
     $new_fields->{body} = <FILE>;
     close FILE;
    }
    else {
      print "Can't find html_file $fields->{html_file} for active id $fields->{id}\n";
    }
  }
 
  # Now deal with embedded media
  #
  $fields->{linked_file} =~ s/\s*$//;
  my $file;
  if ($fields->{linked_file}) {
    #my $file = "$active_root/".$fields->{linked_file};
    #unless (-f $file) {
      # Sometimes our active database had screwy prefixes for embedded
      # media, so if I can't find them with the above method, I strip
      # off the prefix and try to find the base.  NOTE: this could 
      # conceivably lead to linking the wrong media into an article,
      # so disable if you like
      #
      my $base = $fields->{linked_file};
      $base =~ s/.*\///;
      $file = "$media_root/$base";
      unless (-f $file) {
        print "Couldn't find media $file $fields->{linked_file} for $fields->{id}\n";
     # }
    }

    if (-f $file) {
      my $simple = $fields->{linked_file};
      $simple =~ /(\.[^\.]*)$/;
      my $suf = $1;
      $simple =~ s/.*\///;
      die $fields->{mime_type} unless $fields->{mime_type} =~ /(.*)\//;
      my $class = $1;
      
      my $thumb = $file;
      $thumb =~ s/(\.[^\.]*)$/-thumb$1/;
		   
      # Make a link in the correct mime-class subdirectory to the original
      # media file
      #

      my @chars = split //, "$id$suf";
      my $sum = 0;
      for my $i (@chars){
	  $sum += ord($i);
      }
      my $filehash = ($sum % 13) + 1;

      my $basename = basename($file);
      chdir "./$class/$filehash" || die $!;
      `ln -s $file $id$suf`;
      
      if(-f $thumb && ($thumb ne $file)){
	  my $thumbbase= basename($thumb);
	  chdir "./thumb" || die $!;
	  `ln -s $media_root/$thumbbase $id$suf`;
	  chdir "..";
      }
      
      # Now we need to add an entry into the media table
      #
      my $mf = {};
      $mf->{created_datetime} = $new_fields->{created_datetime};
      $mf->{filesize} = stat($file)->size;
      $mf->{mime_type} = $fields->{mime_type};
      $mf->{mime_class} = $class;
      $mf->{height} = $mf->{width} = 0;
      $new_fields->{media} .= ",$class";
      $mf->{orig_filename} = $simple;
      $mf->{sequence_number} = 1;
      $mf->{filename} = $id.$suf;
      $mf->{parentid} = $id;
      $mf->{artist} = $fields->{author};
      $mf->{caption} = $fields->{summary};
      $mf->{keywords} = "";
      $mf->{displayable} = $fields->{display};
      $mf->{email_verified} = "";

      # If it's an image, try to find its size.  This is dependent
      # on two programs I use for jpgs/gifs...if you don't have them
      # you won't get image sizes, but things should still work.
      #
      if ($mf->{mime_class} eq 'image') {
        if ($suf =~ /(jpg)|(jpeg)/i &&
            `jpeginfo $mf->{filename}` =~ /(\d+) x (\d+)/) {
          ($mf->{width},$mf->{height}) = ($1,$2);
        }
        elsif ($suf =~ /(gif)/i &&
            `gifsicle -I $mf->{filename} | grep logical` =~ /(\d+)x(\d+)/) {
          ($mf->{width},$mf->{height}) = ($1,$2);
        }
      }
      $media_ins->execute(map $mf->{$_},@media_fields);
      chdir "../..";
  }
}
  
  # Add the new entry in the articles table
  #
  $ins->execute(map $new_fields->{$_},@field_names);
  
  # Add an entry to the activemap table
  # 
  $a_map->execute($fields->{id},$id);
  
  # Make a hash of old id to new id
  #
  $id_map{$fields->{id}} = $id++;

}

print "Creating links...\n";

# Walk the weblink table, and build the relationships field 
# (which dada annoyingly stores as a serialized php array)
#
for my $id (keys %id_map) { 
  my $links_stmt = 
    $active->prepare("SELECT * FROM weblink WHERE fromlink = $id"); 
  $links_stmt->execute;
  my ($links, @links);
  while ( $links = $links_stmt->fetchrow_hashref) {
    if (exists $id_map{$links->{tolink}}) {
      push @links, $id_map{$links->{tolink}};
      $dada->do(
		"UPDATE media SET parentid=$id_map{$id} where parentid = $id_map{$links->{tolink}}");

    }
    else {
      print "Bad link $links->{tolink}\n";
    }
  }
  if (@links) {
    # The format of the relationships field is 
    # a:[length]:(i:[index];s:4:"[related-id]";[...]} 
    my $str = "";
    my $cnt = 0;
    map { $str .= "i:".($cnt++).";s:(strlen($_)):\"$_\";" } @links;
    $str = "a:".scalar(@links).':{'.$str.'}';
    $dada->do(
        "UPDATE articles SET relationships='$str' WHERE objectid=$id_map{$id}");
  }
}

print "Linking parents...\n";

# Walk the parent map, and update the parentid field now that we know
# the dadaimc objectid for every active id
#
for my $id (keys %parents) {
  unless (exists $id_map{$parents{$id}}) {
    print "Can't find parent $parents{$id} for $id\n";
    next;
  }
  $dada->do(
      "UPDATE articles SET parentid=$id_map{$parents{$id}} WHERE objectid=$id");
}

$dada->disconnect;
$active->disconnect;


