#!/usr/bin/perl # convert.pl # # Fills in a dadaimc mysql database by reading a postrgres "active" database. # Also makes soft links from dada's usermedia directory to the media files # in active's upload directory, so that you don't need two copies of all your # media lying around. (Note: won't handle remote storage of media.) # Takes care of image sizing for jpgs and gifs (if you have jpeginfo and # gifsicle), links between related stories, and parenting comments. # Also creates a table to allow redirects from # active articles to dada # articles. Doesn't handle creating thumbnails. # # This script needs DBI and the mysql and pg DBD modules. # # You should only need to change the following two lines to reflect the # location of both software installations my $dada_root = '/data/www/dcdada'; my $active_root = '/data/www/washingtondc/webcast'; my $media_root = '/data/www/dcdada/olduploads'; use DBI; use File::stat; use File::Basename; use strict; my ($dada_db_name,$dada_db_host,$dada_db_user,$dada_db_pass) = map { /=\s*'(.*)'/ } split "\n", `grep g_db_ $dada_root/imc_dbinfo.inc`; my ($active_db_user,$active_db_pass,$active_db_name) = map { /=\s*"(.*)"/ } split "\n", `grep db_setup $active_root/local/db-setup.php3`; # We're going to be making links from subdirectories of here # to places in active's media upload directories # chdir "$dada_root/usermedia" or die $!; my $dada = DBI->connect( "dbi:mysql:$dada_db_name", $dada_db_user, $dada_db_pass, { RaiseError => 1, AutoCommit => 0 }) || die $!; my $active = DBI->connect( "dbi:Pg:dbname=$active_db_name", $active_db_user, $active_db_pass, { RaiseError => 1, AutoCommit => 0 }) || die $!; my $sth = $active->prepare("SELECT * FROM webcast WHERE id > 79525 ORDER by id"); # We add a new table to dada which maps active ids to dada ids, so # create it and prepare the statement to insert rows into it # # { local $dada->{RaiseError}; $dada->do("CREATE TABLE activemap (". "old mediumint(9) NOT NULL default '0',". "new mediumint(9) NOT NULL default '0',". "PRIMARY KEY (old)". ") TYPE=MyISAM COMMENT='Maps old active ids to new dada ids';"); $dada->do("DELETE from activemap"); } my $a_map = $dada->prepare("INSERT INTO activemap(old,new) VALUES (?,?)"); $sth->execute; my (%id_map,%parents); my @field_names = qw(picturebook summary local_interest rating displayable body submitted embedded_media author feature contact_email created_datetime contact_phone contact_address mime_type edit_datetime heading parentid related_url1 media relationships); my @media_fields = qw(created_datetime filesize mime_type mime_class height width orig_filename sequence_number filename parentid artist caption keywords displayable email_verified); my $ins = $dada->prepare("INSERT INTO articles(" . join(",",@field_names). ") VALUES (". "?,"x(scalar(@field_names)-1)."?)"); my $media_ins = $dada->prepare("INSERT INTO media(" . join(",",@media_fields). ") VALUES (". "?,"x(scalar(@media_fields)-1)."?)"); my $fields; # This assumes that your dada dadabase has no existing articles, or # that you'll remove them first. If you want to keep existing articles, # hard code the number that exist, or write a bit of code to count the rows # in the existing articles table # my $id = 1; while ( $fields = $sth->fetchrow_hashref) { my $new_fields = {}; print "Starting record $id...\n" unless $id % 1000; next unless $fields->{created} =~ /^(.*)(\..*)?-(.*)$/; $new_fields->{created_datetime} = $1; next unless $fields->{modified} =~ /^(.*)(\..*)?-(.*)$/; $new_fields->{edit_datetime} = $1; $new_fields->{author} = $fields->{author}; $new_fields->{contact_email} = $fields->{contact}; $new_fields->{contact_phone} = $fields->{phone}; $new_fields->{contact_address} = $fields->{address}; $new_fields->{heading} = $fields->{heading}; $new_fields->{summary} = $fields->{summary}; $new_fields->{body} = $fields->{article}; $new_fields->{mime_type} = $fields->{mime_type}; $new_fields->{submitted} = 1; $new_fields->{displayable} = $fields->{display}; $new_fields->{rating} = 0; $new_fields->{embedded_media} = 0; $new_fields->{feature} = 0; $new_fields->{picturebook} = 0; $new_fields->{local_interest} = 0; $new_fields->{parentid} = 0; $new_fields->{related_url1} = $fields->{link} || ""; $new_fields->{relationships} = ""; $new_fields->{media} = 'text'; # Save the parent id for later; if articles are rethreaded to # later parents, we may not have the parent id under dada yet, so # we need to do parenting in a separate loop # $parents{$id} = $fields->{parent_id} if $fields->{parent_id}; $fields->{html_file} =~ s/\s*$//; if ($fields->{html_file}) { my $file = "$active_root/".$fields->{html_file}; if (-f $file) { undef $/; open FILE, $file || die $!; $new_fields->{body} = ; close FILE; } else { print "Can't find html_file $fields->{html_file} for active id $fields->{id}\n"; } } # Now deal with embedded media # $fields->{linked_file} =~ s/\s*$//; my $file; if ($fields->{linked_file}) { #my $file = "$active_root/".$fields->{linked_file}; #unless (-f $file) { # Sometimes our active database had screwy prefixes for embedded # media, so if I can't find them with the above method, I strip # off the prefix and try to find the base. NOTE: this could # conceivably lead to linking the wrong media into an article, # so disable if you like # my $base = $fields->{linked_file}; $base =~ s/.*\///; $file = "$media_root/$base"; unless (-f $file) { print "Couldn't find media $file $fields->{linked_file} for $fields->{id}\n"; # } } if (-f $file) { my $simple = $fields->{linked_file}; $simple =~ /(\.[^\.]*)$/; my $suf = $1; $simple =~ s/.*\///; die $fields->{mime_type} unless $fields->{mime_type} =~ /(.*)\//; my $class = $1; my $thumb = $file; $thumb =~ s/(\.[^\.]*)$/-thumb$1/; # Make a link in the correct mime-class subdirectory to the original # media file # my @chars = split //, "$id$suf"; my $sum = 0; for my $i (@chars){ $sum += ord($i); } my $filehash = ($sum % 13) + 1; my $basename = basename($file); chdir "./$class/$filehash" || die $!; `ln -s $file $id$suf`; if(-f $thumb && ($thumb ne $file)){ my $thumbbase= basename($thumb); chdir "./thumb" || die $!; `ln -s $media_root/$thumbbase $id$suf`; chdir ".."; } # Now we need to add an entry into the media table # my $mf = {}; $mf->{created_datetime} = $new_fields->{created_datetime}; $mf->{filesize} = stat($file)->size; $mf->{mime_type} = $fields->{mime_type}; $mf->{mime_class} = $class; $mf->{height} = $mf->{width} = 0; $new_fields->{media} .= ",$class"; $mf->{orig_filename} = $simple; $mf->{sequence_number} = 1; $mf->{filename} = $id.$suf; $mf->{parentid} = $id; $mf->{artist} = $fields->{author}; $mf->{caption} = $fields->{summary}; $mf->{keywords} = ""; $mf->{displayable} = $fields->{display}; $mf->{email_verified} = ""; # If it's an image, try to find its size. This is dependent # on two programs I use for jpgs/gifs...if you don't have them # you won't get image sizes, but things should still work. # if ($mf->{mime_class} eq 'image') { if ($suf =~ /(jpg)|(jpeg)/i && `jpeginfo $mf->{filename}` =~ /(\d+) x (\d+)/) { ($mf->{width},$mf->{height}) = ($1,$2); } elsif ($suf =~ /(gif)/i && `gifsicle -I $mf->{filename} | grep logical` =~ /(\d+)x(\d+)/) { ($mf->{width},$mf->{height}) = ($1,$2); } } $media_ins->execute(map $mf->{$_},@media_fields); chdir "../.."; } } # Add the new entry in the articles table # $ins->execute(map $new_fields->{$_},@field_names); # Add an entry to the activemap table # $a_map->execute($fields->{id},$id); # Make a hash of old id to new id # $id_map{$fields->{id}} = $id++; } print "Creating links...\n"; # Walk the weblink table, and build the relationships field # (which dada annoyingly stores as a serialized php array) # for my $id (keys %id_map) { my $links_stmt = $active->prepare("SELECT * FROM weblink WHERE fromlink = $id"); $links_stmt->execute; my ($links, @links); while ( $links = $links_stmt->fetchrow_hashref) { if (exists $id_map{$links->{tolink}}) { push @links, $id_map{$links->{tolink}}; $dada->do( "UPDATE media SET parentid=$id_map{$id} where parentid = $id_map{$links->{tolink}}"); } else { print "Bad link $links->{tolink}\n"; } } if (@links) { # The format of the relationships field is # a:[length]:(i:[index];s:4:"[related-id]";[...]} my $str = ""; my $cnt = 0; map { $str .= "i:".($cnt++).";s:(strlen($_)):\"$_\";" } @links; $str = "a:".scalar(@links).':{'.$str.'}'; $dada->do( "UPDATE articles SET relationships='$str' WHERE objectid=$id_map{$id}"); } } print "Linking parents...\n"; # Walk the parent map, and update the parentid field now that we know # the dadaimc objectid for every active id # for my $id (keys %parents) { unless (exists $id_map{$parents{$id}}) { print "Can't find parent $parents{$id} for $id\n"; next; } $dada->do( "UPDATE articles SET parentid=$id_map{$parents{$id}} WHERE objectid=$id"); } $dada->disconnect; $active->disconnect;