#!/usr/bin/perl
# this perl script talks to a dada db(mysql)
# in order to write out postgresql meant to populate a mir db
# the fact that it is one big perl script is intended
# primarily to annoy Zapata :)
use DBI;
use strict;
############CONFIG HERE#########################
my $mysql_host="localhost";
my $mysql_port="";
my $mysql_user="blah";
my $mysql_pass="blah";
my $mysql_database="blah";
my $limit = ""; # set to nothing if it's the real thing,
# otherwise we use this to debug quickly
my $pgsql_host="localhost";
my $pgsql_port="";
my $pgsql_user="foo";
my $pgsql_pass="bar";
my $pgsql_database="baz";
my $dadamirmapfile="dadamirmap.txt"; #file in which we will write out the mapping between dada ids and mir path+ids. this will get plugged into apache mod_rewrite, see http://httpd.apache.org/docs/mod/mod_rewrite.html#RewriteMap for details
my %articletypes = (
dadanewswire => 1,
dadafeature => 2,
otherpress => 11
);
my $dadacategorymap = "dadacategorymap.txt";
my $dadaregionmap = "dadaregionmap.txt";
my $dadasectionmap = "dadasectionmap.txt";
my $produceMaps = 1;
my $op_mediafolder = 7;
my $image_mediatype = 5;
###########END CONFIG###########################
##connect to dada db
my $mysql_dsn = "DBI:mysql:database=$mysql_database;host=$mysql_host;port=$mysql_port";
my $mysql_dbh = DBI->connect($mysql_dsn, $mysql_user, $mysql_pass) or die "can't connect to dada db";
##connect to mir db
my $pgsql_dsn = "DBI:Pg:dbname=$pgsql_database";
$pgsql_dsn = "DBI:Pg:dbname=$pgsql_database;host=$pgsql_host;port=$pgsql_port" if ($pgsql_host && $pgsql_port);
my $pgsql_dbh = DBI->connect($pgsql_dsn, $pgsql_user, $pgsql_pass) or die "can't connect to mir db";
##get topics
my @sections=('News','Announcement','Commentary','Review','Interview');
my @categories;
my @regions=('local','elsewhere');
my %category2topic_map; #maps dada topics to mir topics for use later
my %region2topic_map; #maps dada regions to mir topics for use later
my %section2topic_map; #maps dada sections to mir topics for use later
# if($produceMaps) {
#
# &generateMap("categories","dada_category", $dadacategorymap);
#
# }
print "*"x15 . "doing dada sections" . "*"x15 . "\n";
foreach my $sec (@sections){
my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')");
$psth1->execute();
my $mir_topic_id=$psth1->fetchrow_array();
$psth1->finish();
my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,filename,archiv_url) values (?,?,?,'dada_section')");
$psth2->execute($mir_topic_id,$sec,lc($sec));
$section2topic_map{$sec}=$mir_topic_id;
}
print "*"x15 . "doing dada regions" . "*"x15 . "\n";
foreach my $reg (@regions){
my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')");
$psth1->execute();
my $mir_topic_id=$psth1->fetchrow_array();
$psth1->finish();
my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,filename,archiv_url) values (?,?,?,'dada_region')");
$psth2->execute($mir_topic_id,$reg,lc($reg));
$region2topic_map{$reg}=$mir_topic_id;
}
print "*"x15 . "doing dada categories" . "*"x15 . "\n";
my $sth=$mysql_dbh->prepare("select * from categories");
$sth->execute();
while (my $cat=$sth->fetchrow_hashref()){
my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')");
$psth1->execute();
my $mir_topic_id=$psth1->fetchrow_array();
$psth1->finish();
my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,description,filename,archiv_url) values (?,?,?,?,'dada_category')");
$psth2->execute($mir_topic_id,$cat->{catname},$cat->{description},$cat->{shortname});
$category2topic_map{$cat->{objectid}}=$mir_topic_id;
}
open DADAMIRMAP,">$dadamirmapfile" or die "couldn't open dadamirmapfile $dadamirmapfile";
&insertArticles('articles', 'Article', 0);
#&insertArticles('otherpress','OtherPress', 1);
close DADAMIRMAP;
sub generateMap {
my($table, $category, $filename);
($table, $category, $filename) = @_;
print "*"x15 . "doing dada " . $category . "*"x15 . "\n";
my $sth=$mysql_dbh->prepare("select * from $table");
$sth->execute();
open MAPFILE, ">$filename" or die "couldn't open filename: $filename";
while (my $cat=$sth->fetchrow_hashref()){
my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')");
$psth1->execute();
my $mir_topic_id=$psth1->fetchrow_array();
$psth1->finish();
my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,description,filename,archiv_url) values (?,?,?,?,'$category')");
$psth2->execute($mir_topic_id,$cat->{catname},$cat->{description},$cat->{shortname});
print MAPFILE "" . $cat->{objectid} . " " . $mir_topic_id . "\n";
}
close MAPFILE;
}
sub insertArticles {
my($table, $articleType, $isOtherPress);
($table, $articleType, $isOtherPress) = @_;
#get toplevel articles and grab comments,media for each one
print "*"x15 . "doing dada $articleType" . "*"x15 . "\n";
my $extraSQL = "";
if(!$isOtherPress){
$extraSQL = "and displayable=1";
}
my $sth=$mysql_dbh->prepare("select * from $table where parentid=0 " . $extraSQL . " and deleted=0 $limit");
$sth->execute();
while (my $art=$sth->fetchrow_hashref()){
my $psth1=$pgsql_dbh->prepare_cached("select nextval('media_id_seq')");
$psth1->execute();
my $mir_content_id=$psth1->fetchrow_array();
$psth1->finish();
#establish dadamir mapping
$art->{created_datetime} =~ /^(\d\d\d\d)-(\d\d)-(\d\d)/;
my $year=$1;
my $month=$2;
my $day=$3;
my $dadamod="newswire";
if($isOtherPress){
$dadamod = "otherpress";
}
if ($art->{feature}){
$dadamod="feature";
}
if(!$isOtherPress){
#grab active article if there is one and put it in the mapping file
my $sthActive = $mysql_dbh->prepare("select * from activemap where new = ?");
$sthActive->execute($art->{objectid});
if(my $active=$sthActive->fetchrow_hashref()){
print DADAMIRMAP "news/front.php3?article_id=$active->{old} /en/$year/$month/$mir_content_id\n";
}
$sthActive->finish();
}
print DADAMIRMAP "/$dadamod/display/$art->{objectid} /en/$year/$month/$mir_content_id\n";
my $psth;
if($isOtherPress){
$psth = $pgsql_dbh->prepare_cached(qq{INSERT INTO content (id,title,date,webdb_create,creator, description,to_article_type,is_published,to_publisher) values (?,?,?,?,?,?,?,'t',1)});
}
else {
$psth = $pgsql_dbh->prepare_cached(qq{INSERT INTO content (id,title,date,webdb_create,creator,creator_email,creator_phone,creator_address,description,content_data,comment,to_article_type,is_published,to_publisher) values (?,?,?,?,?,?,?,?,?,?,?,?,'t',1)});
}
my $date="$year$month$day";
my $articletype=$articletypes{dadanewswire};
$articletype = $articletypes{dadafeature} if $art->{feature};
my $is_html = 0;
$is_html == 1 if $art->{mime_type} eq 'text/html';
my $content_data=$art->{body};
if ($is_html){
if (my $rel1 = $art->{related_url1}){
$content_data .= "
$rel1";
}
if (my $rel2 = $art->{related_url2}){
$content_data .= "
$rel2";}
}
else{
if (my $rel1 = $art->{related_url1}){
$content_data .= "\n$rel1";}
if (my $rel2 = $art->{related_url2}){
$content_data .= "\n$rel2";}
}
if($isOtherPress){
##print "WE ARE INSERTING OTHERPRESS";
$psth->execute($mir_content_id,$art->{heading},$date, $art->{created_datetime},$art->{author},$art->{summary},$articletype);
}
else{
my $a_author = substr($art->{author},0,80);
my $a_email = substr($art->{contact_email},0,80);
my $a_address = substr($art->{contact_address},0,80);
##print "OOPS";
$psth->execute($mir_content_id,$art->{heading},$date, $art->{created_datetime},$a_author,$a_email,$art->{contact_phone},$a_address,$art->{summary},$content_data,"License:".$art->{license}."\nRating:".$art->{rating},$articletype);
}
#do the topic mappings
#categories
my $sth5=$mysql_dbh->prepare_cached("select * from hash_category where ref_id=? and ref_class='".$articleType."'");
$sth5->execute($art->{objectid});
while (my $cat=$sth5->fetchrow_hashref()){
my $mirtopic=$category2topic_map{$cat->{category_id}};
my $psth6=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)");
$psth6->execute($mir_content_id,$mirtopic);
}
#sections
my $mirtopic4section=$section2topic_map{$art->{section}};
my $psth6=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)");
$psth6->execute($mir_content_id,$mirtopic4section);
#regions
my $region="elsewhere";
$region="local" if $art->{local_interest};
my $mirtopic4region=$region2topic_map{$region};
my $psth7=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)");
$psth7->execute($mir_content_id,$mirtopic4region);
#now do the comments
my $sth2=$mysql_dbh->prepare_cached("select * from ".$table." where parentid=? ".$extraSQL . " and deleted=0");
$sth2->execute($art->{objectid});
while (my $comment = $sth2->fetchrow_hashref()){
my $psth3=$pgsql_dbh->prepare_cached("select nextval('comment_id_seq')");
$psth3->execute();
my $mir_comment_id=$psth3->fetchrow_array();
$psth3->finish();
my $psth4=$pgsql_dbh->prepare_cached("insert into comment (id,to_media,title,creator,description,webdb_create,is_html,is_published) values (?,?,?,?,?,?,?,'1')");
#my $c_is_html=0;
#$c_is_html = 1 if $comment->{mime_type} eq 'text/html';
my $c_author = substr($comment->{author},0,80);
my $c_title = substr($comment->{heading},0,80);
my $c_is_html='false';
$c_is_html = 'true' if $comment->{mime_type} eq 'text/html';
$psth4->execute($mir_comment_id,$mir_content_id,$c_title,$c_author,$comment->{summary}.$comment->{body},$comment->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $comment->{created_datetime},$c_is_html);
}
#now do the media items for this article
my $msth= $mysql_dbh->prepare_cached("select * from media where parentid=? and displayable=1 and deleted=0");
$msth->execute($art->{objectid});
#for each, insert into appropriate media tables
while(my $media = $msth->fetchrow_hashref()){
my $psth9=$pgsql_dbh->prepare_cached("select nextval('media_id_seq')");
$psth9->execute();
my $mir_media_id=$psth9->fetchrow_array();
$psth9->finish();
#cheap hack, but i'm tired
my $created_media = 0;
#parse created datetime to be used as date
my $date_created = $media->{created_datetime};
$date_created =~ s/(.+)-(.+)-(.+)\s.*/$1$2$3/;
my $caption = substr($media->{caption},0, 255);
my $artist = substr($media->{artist},0, 80);
my $main_url = substr($media->{related_url1},0, 255);
my $email = substr($media->{email_verified},0, 80);
#get subdir
my $subdir = &get_subdir($media->{filename});
my $publish_path = "/".$media->{mime_class}."/".$subdir."/".$media->{filename};
#check to see if its media/video/audio
if($media->{mime_class} eq "image") {
my $psth10 = $pgsql_dbh->prepare_cached("insert into images (to_publisher,id, title , date, creator, creator_main_url,creator_email, publish_server, publish_path,
is_published, is_produced, to_media_folder, to_media_type, icon_is_produced, icon_path,
size, img_width, img_height,webdb_create)
values ('0',?,?,?,?,?,?,'/usermedia',?,
?,'0', ?, ?, '0', ?,
?, ?, ?, ?)");
my $icon_path = "/".$media->{mime_class}."/".$subdir."/thumb/".$media->{filename};
$psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $image_mediatype,
$icon_path, $media->{filesize}, $media->{width}, $media->{height}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime});
$created_media = 1;
$psth10->finish();
}
if($media->{mime_class} eq "audio") {
my $audio_mediatype = 4;
if($media->{mime_type} eq "audio/x-pn-realaudio"){
$audio_mediatype = 12;
}
elsif ($media->{mime_type} eq "audio/mpeg") {
$audio_mediatype = 14;
}
elsif ($media->{mime_type} eq "audio/x-mp3") {
$audio_mediatype = 13;
}
my $psth10 = $pgsql_dbh->prepare_cached("insert into audio (to_publisher, id, title, date, creator, creator_main_url, creator_email, publish_server, publish_path,
is_published, is_produced, to_media_folder, to_media_type, size, webdb_create)
values
('0', ?,?,?,?,?,?, '/usermedia',?,
?, '0', ?, ?, ?, ?)");
$psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $audio_mediatype,$media->{filesize}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime});
$created_media = 1;
$psth10->finish();
}
if($media->{mime_class} eq "video") {
my $video_mediatype = 20;
if($media->{mime_type} eq "video/quicktime"){
$video_mediatype = 8;
}
elsif ($media->{mime_type} eq "video/x-msvideo") {
$video_mediatype = 9;
}
elsif ($media->{mime_type} eq "video/x-ms-wmv") {
$video_mediatype = 9;
}
if($media->{mime_type} eq "video/x-mpeg"){
$video_mediatype = 7;
}
elsif ($media->{mime_type} eq "video/x-ms-asf") {
$video_mediatype = 16;
}
elsif ($media->{mime_type} eq "video/mpeg") {
$video_mediatype = 7;
}
my $psth10 = $pgsql_dbh->prepare_cached("insert into video (to_publisher, id, title, date, creator, creator_main_url, creator_email, publish_server, publish_path,
is_published, is_produced, to_media_folder, to_media_type, size, webdb_create)
values
('0', ?,?,?,?,?,?, '/usermedia',?,
?, '0', ?, ?, ?, ?)");
$psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $video_mediatype,$media->{filesize}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime});
$psth10->finish();
$created_media = 1;
}
if($created_media == 1){
#now insert into content_x_media table
my $psth11=$pgsql_dbh->prepare_cached("insert into content_x_media (content_id,media_id) values (?,?)");
$psth11->execute($mir_content_id,$mir_media_id);
$psth11->finish();
print "created media id $mir_media_id: $media->{filename}\n";
}
}
$msth->finish();
print "processed dada article ". $art->{objectid} ."\n";
}
}
#function to get the subdir. I have no idea why its done like this
sub get_subdir {
my $filename;
($filename) = @_;
my $total = 0;
my $n;
for($n = 0; $n < length($filename); $n++) {
$total += ord(substr($filename, $n, 1));
}
$total = ($total % 13) + 1;
return $total;
}
#grab orphan media
#deal with usermedia
#grab otherpress articles, recurse through comments
# grab razorwires? -- actually you can't get to them from dada so why bother?
# deal with activemap for the front.php3 servlet
# make servlets/redirects for
# media/all/display/?
# newswire/display/97074/index.php
# feature/display/9704/index.php
# otherpress/display/9704/index.php
####NOTES
# need article type "dada_newswire"
# need article type "dada_feature"
# need article type "dada_otherpress"
# need article type "dada_orphanmedia"
# should keep old dada id in the article db somewhere