#!/usr/bin/perl # this perl script talks to a dada db(mysql) # in order to write out postgresql meant to populate a mir db # the fact that it is one big perl script is intended # primarily to annoy Zapata :) use DBI; use strict; ############CONFIG HERE######################### my $mysql_host="localhost"; my $mysql_port=""; my $mysql_user="blah"; my $mysql_pass="blah"; my $mysql_database="blah"; my $limit = ""; # set to nothing if it's the real thing, # otherwise we use this to debug quickly my $pgsql_host="localhost"; my $pgsql_port=""; my $pgsql_user="foo"; my $pgsql_pass="bar"; my $pgsql_database="baz"; my $dadamirmapfile="dadamirmap.txt"; #file in which we will write out the mapping between dada ids and mir path+ids. this will get plugged into apache mod_rewrite, see http://httpd.apache.org/docs/mod/mod_rewrite.html#RewriteMap for details my %articletypes = ( dadanewswire => 1, dadafeature => 2, otherpress => 11 ); my $dadacategorymap = "dadacategorymap.txt"; my $dadaregionmap = "dadaregionmap.txt"; my $dadasectionmap = "dadasectionmap.txt"; my $produceMaps = 1; my $op_mediafolder = 7; my $image_mediatype = 5; ###########END CONFIG########################### ##connect to dada db my $mysql_dsn = "DBI:mysql:database=$mysql_database;host=$mysql_host;port=$mysql_port"; my $mysql_dbh = DBI->connect($mysql_dsn, $mysql_user, $mysql_pass) or die "can't connect to dada db"; ##connect to mir db my $pgsql_dsn = "DBI:Pg:dbname=$pgsql_database"; $pgsql_dsn = "DBI:Pg:dbname=$pgsql_database;host=$pgsql_host;port=$pgsql_port" if ($pgsql_host && $pgsql_port); my $pgsql_dbh = DBI->connect($pgsql_dsn, $pgsql_user, $pgsql_pass) or die "can't connect to mir db"; ##get topics my @sections=('News','Announcement','Commentary','Review','Interview'); my @categories; my @regions=('local','elsewhere'); my %category2topic_map; #maps dada topics to mir topics for use later my %region2topic_map; #maps dada regions to mir topics for use later my %section2topic_map; #maps dada sections to mir topics for use later # if($produceMaps) { # # &generateMap("categories","dada_category", $dadacategorymap); # # } print "*"x15 . "doing dada sections" . "*"x15 . "\n"; foreach my $sec (@sections){ my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')"); $psth1->execute(); my $mir_topic_id=$psth1->fetchrow_array(); $psth1->finish(); my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,filename,archiv_url) values (?,?,?,'dada_section')"); $psth2->execute($mir_topic_id,$sec,lc($sec)); $section2topic_map{$sec}=$mir_topic_id; } print "*"x15 . "doing dada regions" . "*"x15 . "\n"; foreach my $reg (@regions){ my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')"); $psth1->execute(); my $mir_topic_id=$psth1->fetchrow_array(); $psth1->finish(); my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,filename,archiv_url) values (?,?,?,'dada_region')"); $psth2->execute($mir_topic_id,$reg,lc($reg)); $region2topic_map{$reg}=$mir_topic_id; } print "*"x15 . "doing dada categories" . "*"x15 . "\n"; my $sth=$mysql_dbh->prepare("select * from categories"); $sth->execute(); while (my $cat=$sth->fetchrow_hashref()){ my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')"); $psth1->execute(); my $mir_topic_id=$psth1->fetchrow_array(); $psth1->finish(); my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,description,filename,archiv_url) values (?,?,?,?,'dada_category')"); $psth2->execute($mir_topic_id,$cat->{catname},$cat->{description},$cat->{shortname}); $category2topic_map{$cat->{objectid}}=$mir_topic_id; } open DADAMIRMAP,">$dadamirmapfile" or die "couldn't open dadamirmapfile $dadamirmapfile"; &insertArticles('articles', 'Article', 0); #&insertArticles('otherpress','OtherPress', 1); close DADAMIRMAP; sub generateMap { my($table, $category, $filename); ($table, $category, $filename) = @_; print "*"x15 . "doing dada " . $category . "*"x15 . "\n"; my $sth=$mysql_dbh->prepare("select * from $table"); $sth->execute(); open MAPFILE, ">$filename" or die "couldn't open filename: $filename"; while (my $cat=$sth->fetchrow_hashref()){ my $psth1=$pgsql_dbh->prepare_cached("select nextval('topic_id_seq')"); $psth1->execute(); my $mir_topic_id=$psth1->fetchrow_array(); $psth1->finish(); my $psth2=$pgsql_dbh->prepare_cached("insert into topic (id,title,description,filename,archiv_url) values (?,?,?,?,'$category')"); $psth2->execute($mir_topic_id,$cat->{catname},$cat->{description},$cat->{shortname}); print MAPFILE "" . $cat->{objectid} . " " . $mir_topic_id . "\n"; } close MAPFILE; } sub insertArticles { my($table, $articleType, $isOtherPress); ($table, $articleType, $isOtherPress) = @_; #get toplevel articles and grab comments,media for each one print "*"x15 . "doing dada $articleType" . "*"x15 . "\n"; my $extraSQL = ""; if(!$isOtherPress){ $extraSQL = "and displayable=1"; } my $sth=$mysql_dbh->prepare("select * from $table where parentid=0 " . $extraSQL . " and deleted=0 $limit"); $sth->execute(); while (my $art=$sth->fetchrow_hashref()){ my $psth1=$pgsql_dbh->prepare_cached("select nextval('media_id_seq')"); $psth1->execute(); my $mir_content_id=$psth1->fetchrow_array(); $psth1->finish(); #establish dadamir mapping $art->{created_datetime} =~ /^(\d\d\d\d)-(\d\d)-(\d\d)/; my $year=$1; my $month=$2; my $day=$3; my $dadamod="newswire"; if($isOtherPress){ $dadamod = "otherpress"; } if ($art->{feature}){ $dadamod="feature"; } if(!$isOtherPress){ #grab active article if there is one and put it in the mapping file my $sthActive = $mysql_dbh->prepare("select * from activemap where new = ?"); $sthActive->execute($art->{objectid}); if(my $active=$sthActive->fetchrow_hashref()){ print DADAMIRMAP "news/front.php3?article_id=$active->{old} /en/$year/$month/$mir_content_id\n"; } $sthActive->finish(); } print DADAMIRMAP "/$dadamod/display/$art->{objectid} /en/$year/$month/$mir_content_id\n"; my $psth; if($isOtherPress){ $psth = $pgsql_dbh->prepare_cached(qq{INSERT INTO content (id,title,date,webdb_create,creator, description,to_article_type,is_published,to_publisher) values (?,?,?,?,?,?,?,'t',1)}); } else { $psth = $pgsql_dbh->prepare_cached(qq{INSERT INTO content (id,title,date,webdb_create,creator,creator_email,creator_phone,creator_address,description,content_data,comment,to_article_type,is_published,to_publisher) values (?,?,?,?,?,?,?,?,?,?,?,?,'t',1)}); } my $date="$year$month$day"; my $articletype=$articletypes{dadanewswire}; $articletype = $articletypes{dadafeature} if $art->{feature}; my $is_html = 0; $is_html == 1 if $art->{mime_type} eq 'text/html'; my $content_data=$art->{body}; if ($is_html){ if (my $rel1 = $art->{related_url1}){ $content_data .= "
$rel1"; } if (my $rel2 = $art->{related_url2}){ $content_data .= "
$rel2";} } else{ if (my $rel1 = $art->{related_url1}){ $content_data .= "\n$rel1";} if (my $rel2 = $art->{related_url2}){ $content_data .= "\n$rel2";} } if($isOtherPress){ ##print "WE ARE INSERTING OTHERPRESS"; $psth->execute($mir_content_id,$art->{heading},$date, $art->{created_datetime},$art->{author},$art->{summary},$articletype); } else{ my $a_author = substr($art->{author},0,80); my $a_email = substr($art->{contact_email},0,80); my $a_address = substr($art->{contact_address},0,80); ##print "OOPS"; $psth->execute($mir_content_id,$art->{heading},$date, $art->{created_datetime},$a_author,$a_email,$art->{contact_phone},$a_address,$art->{summary},$content_data,"License:".$art->{license}."\nRating:".$art->{rating},$articletype); } #do the topic mappings #categories my $sth5=$mysql_dbh->prepare_cached("select * from hash_category where ref_id=? and ref_class='".$articleType."'"); $sth5->execute($art->{objectid}); while (my $cat=$sth5->fetchrow_hashref()){ my $mirtopic=$category2topic_map{$cat->{category_id}}; my $psth6=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)"); $psth6->execute($mir_content_id,$mirtopic); } #sections my $mirtopic4section=$section2topic_map{$art->{section}}; my $psth6=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)"); $psth6->execute($mir_content_id,$mirtopic4section); #regions my $region="elsewhere"; $region="local" if $art->{local_interest}; my $mirtopic4region=$region2topic_map{$region}; my $psth7=$pgsql_dbh->prepare_cached("insert into content_x_topic (content_id,topic_id) values (?,?)"); $psth7->execute($mir_content_id,$mirtopic4region); #now do the comments my $sth2=$mysql_dbh->prepare_cached("select * from ".$table." where parentid=? ".$extraSQL . " and deleted=0"); $sth2->execute($art->{objectid}); while (my $comment = $sth2->fetchrow_hashref()){ my $psth3=$pgsql_dbh->prepare_cached("select nextval('comment_id_seq')"); $psth3->execute(); my $mir_comment_id=$psth3->fetchrow_array(); $psth3->finish(); my $psth4=$pgsql_dbh->prepare_cached("insert into comment (id,to_media,title,creator,description,webdb_create,is_html,is_published) values (?,?,?,?,?,?,?,'1')"); #my $c_is_html=0; #$c_is_html = 1 if $comment->{mime_type} eq 'text/html'; my $c_author = substr($comment->{author},0,80); my $c_title = substr($comment->{heading},0,80); my $c_is_html='false'; $c_is_html = 'true' if $comment->{mime_type} eq 'text/html'; $psth4->execute($mir_comment_id,$mir_content_id,$c_title,$c_author,$comment->{summary}.$comment->{body},$comment->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $comment->{created_datetime},$c_is_html); } #now do the media items for this article my $msth= $mysql_dbh->prepare_cached("select * from media where parentid=? and displayable=1 and deleted=0"); $msth->execute($art->{objectid}); #for each, insert into appropriate media tables while(my $media = $msth->fetchrow_hashref()){ my $psth9=$pgsql_dbh->prepare_cached("select nextval('media_id_seq')"); $psth9->execute(); my $mir_media_id=$psth9->fetchrow_array(); $psth9->finish(); #cheap hack, but i'm tired my $created_media = 0; #parse created datetime to be used as date my $date_created = $media->{created_datetime}; $date_created =~ s/(.+)-(.+)-(.+)\s.*/$1$2$3/; my $caption = substr($media->{caption},0, 255); my $artist = substr($media->{artist},0, 80); my $main_url = substr($media->{related_url1},0, 255); my $email = substr($media->{email_verified},0, 80); #get subdir my $subdir = &get_subdir($media->{filename}); my $publish_path = "/".$media->{mime_class}."/".$subdir."/".$media->{filename}; #check to see if its media/video/audio if($media->{mime_class} eq "image") { my $psth10 = $pgsql_dbh->prepare_cached("insert into images (to_publisher,id, title , date, creator, creator_main_url,creator_email, publish_server, publish_path, is_published, is_produced, to_media_folder, to_media_type, icon_is_produced, icon_path, size, img_width, img_height,webdb_create) values ('0',?,?,?,?,?,?,'/usermedia',?, ?,'0', ?, ?, '0', ?, ?, ?, ?, ?)"); my $icon_path = "/".$media->{mime_class}."/".$subdir."/thumb/".$media->{filename}; $psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $image_mediatype, $icon_path, $media->{filesize}, $media->{width}, $media->{height}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime}); $created_media = 1; $psth10->finish(); } if($media->{mime_class} eq "audio") { my $audio_mediatype = 4; if($media->{mime_type} eq "audio/x-pn-realaudio"){ $audio_mediatype = 12; } elsif ($media->{mime_type} eq "audio/mpeg") { $audio_mediatype = 14; } elsif ($media->{mime_type} eq "audio/x-mp3") { $audio_mediatype = 13; } my $psth10 = $pgsql_dbh->prepare_cached("insert into audio (to_publisher, id, title, date, creator, creator_main_url, creator_email, publish_server, publish_path, is_published, is_produced, to_media_folder, to_media_type, size, webdb_create) values ('0', ?,?,?,?,?,?, '/usermedia',?, ?, '0', ?, ?, ?, ?)"); $psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $audio_mediatype,$media->{filesize}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime}); $created_media = 1; $psth10->finish(); } if($media->{mime_class} eq "video") { my $video_mediatype = 20; if($media->{mime_type} eq "video/quicktime"){ $video_mediatype = 8; } elsif ($media->{mime_type} eq "video/x-msvideo") { $video_mediatype = 9; } elsif ($media->{mime_type} eq "video/x-ms-wmv") { $video_mediatype = 9; } if($media->{mime_type} eq "video/x-mpeg"){ $video_mediatype = 7; } elsif ($media->{mime_type} eq "video/x-ms-asf") { $video_mediatype = 16; } elsif ($media->{mime_type} eq "video/mpeg") { $video_mediatype = 7; } my $psth10 = $pgsql_dbh->prepare_cached("insert into video (to_publisher, id, title, date, creator, creator_main_url, creator_email, publish_server, publish_path, is_published, is_produced, to_media_folder, to_media_type, size, webdb_create) values ('0', ?,?,?,?,?,?, '/usermedia',?, ?, '0', ?, ?, ?, ?)"); $psth10->execute($mir_media_id, $caption, $date_created, $artist, $main_url, $email, $publish_path, $media->{displayable}, $op_mediafolder, $video_mediatype,$media->{filesize}, $media->{created_datetime} eq '0000-00-00 00:00:00' ? '1969-01-01 12:00:01' : $media->{created_datetime}); $psth10->finish(); $created_media = 1; } if($created_media == 1){ #now insert into content_x_media table my $psth11=$pgsql_dbh->prepare_cached("insert into content_x_media (content_id,media_id) values (?,?)"); $psth11->execute($mir_content_id,$mir_media_id); $psth11->finish(); print "created media id $mir_media_id: $media->{filename}\n"; } } $msth->finish(); print "processed dada article ". $art->{objectid} ."\n"; } } #function to get the subdir. I have no idea why its done like this sub get_subdir { my $filename; ($filename) = @_; my $total = 0; my $n; for($n = 0; $n < length($filename); $n++) { $total += ord(substr($filename, $n, 1)); } $total = ($total % 13) + 1; return $total; } #grab orphan media #deal with usermedia #grab otherpress articles, recurse through comments # grab razorwires? -- actually you can't get to them from dada so why bother? # deal with activemap for the front.php3 servlet # make servlets/redirects for # media/all/display/? # newswire/display/97074/index.php # feature/display/9704/index.php # otherpress/display/9704/index.php ####NOTES # need article type "dada_newswire" # need article type "dada_feature" # need article type "dada_otherpress" # need article type "dada_orphanmedia" # should keep old dada id in the article db somewhere