You are here: Foswiki>Devel Web>SlashToMir (26 Jun 2005, AlsteR)Edit Attach
#This is the source code for slash2mir.pl, which we used to generate our Mir content.

#! /usr/bin/perl

use strict;
use DBI;
use URI::URL;
use Encode;
my ( $mir, $slash, %users );
#no limit!
my $limit; # = 100;
my ( %mirmedia );
my $unknownmimetype;

sub main {
   $slash = dbconnect ( "dbi:mysql:slash", ..., ... );
   $mir = dbconnect ( "dbi:Pg:dbname=philly", ..., ... );

#   moveusers ();

   movefeatures ();
   movemedia ();
   movestories ();

   $slash->disconnect() if $slash;
   $mir->disconnect() if $mir;
}

sub dbconnect {
   my ( $db, $u, $p ) = @_;
   my $conn;
   print "connecting to $db\n";
   if ( $conn = DBI->connect ( $db, $u, $p ) ) {
      print "connected\n";
   } else {
      warn "could not connect.\n";
   }
   
   return $conn;
}

sub die2 {
   if ($slash) {
      $slash->disconnect();
      print "slash disconnected\n";
   }
   if ($mir) {
      $mir->disconnect();
      print "mir disconnected\n";
   }
   die; 
}

sub movestories {
   #we are going to open up a big cursor. 
   my $sql = <<EOT;
SELECT DISTINCT stories.*, 
       story_info.*,
       DATE_FORMAT(time, '%Y%m%d') AS format_date 
  FROM stories 
       INNER JOIN story_info ON stories.sid = story_info.sid 
--       INNER JOIN comments ON comments.sid = stories.sid
 WHERE stories.displaystatus = 0
EOT

   $sql .= " LIMIT $limit" if $limit; 
   my $c = $slash->prepare_cached ( $sql ) ;
   my ( $story, $i, $media_id );
   $c->execute () or return 0;
   while ( $story = $c->fetchrow_hashref ) {
      $i ++;
      print "$i: moving story ".$story->{sid}." (".$story->{title}.")\n";
      $story->{to_article_type} = 1;
      my $content_id = insertcontent ( $story );
      # we are also going to create a row that associates the sid and 

   ## We're going to create a "Rewrite Map", which is just a text file
   ## with the old id and the new id listed per line.  We can use that
   ## in our apache rewrite directives. 
   my $original_sid = $story->{sid};
        # rolando, 28 mar 2005: the next six lines were josh's from 26-mar. i changed them slightly below to get some output.
   ## We only want the story id (the unique number) not the date part.
   ## We'll map the date part through mod_rewrite.
   #my $original_sid =~ /\/([^\/]*)$/;
   #my $sid_without_date = $1;
         #open (REWRITE_MAP, ">slash2mir_rewrite_map");
   #print REWRITE_MAP $sid_without_date ."\t".$content_id."\n";
         open (REWRITE_MAP, ">>slash2mir_rewrite_map");
   print REWRITE_MAP $original_sid ."\t".$content_id."\n";
   close (REWRITE_MAP);
      movecomments ( $story->{sid}, $content_id );
      moveratings ( $story->{sid}, $content_id );
      movestorymedia ( $story->{sid}, $content_id );
   }
   $c->finish;
}

sub movefeatures {
   my $sql = <<EOT;
SELECT BODY AS bodytext, 
       SUB_TITLE AS title, 
       DATE_FORMAT( timestamp, '%Y%m%d' ) AS format_date,
       CONCAT( 'feature ', ID ) AS sid,
       IMAGE_URL as location,
       IMAGE_URL as thumbnail,
       IMAGE_CAPTION as caption
  FROM FEATURES
EOT
   $sql .= "LIMIT $limit" if $limit;

   my $c = $slash->prepare_cached ( $sql );
   my ( $feature );
   $c->execute () or return 0;
   while ( $feature = $c->fetchrow_hashref ) {
      print "moving feature ".$feature->{ID}." (".$feature->{title}.")\n";
      $feature->{author} = 'Philly IMC Web Editorial Collective';
      $feature->{introtext} = $feature->{bodytext};
      $feature->{to_article_type} = 2;
      my $media_id = insertcontent ( $feature ); # need fo figure out how to make this look like a story. 
      if ( $feature->{location} ) {
         $feature->{mime_type} = "image/jpeg";
         my $image_id = insertmedia ( $feature );
         insertcontentxmedia ( $media_id, $image_id );
      }
   }
   $c->finish;
}

sub moveusers {
   my $sql = <<EOT;
SELECT users.*, 
       authors.aid 
  FROM users 
       LEFT OUTER JOIN authors ON users.nickname = authors.name "
EOT

   my $c = $slash->prepare_cached ( $sql );
   my ( $user, $media_id );
   $c->execute () or return 0;
   while ( $user = $c->fetchrow_hashref ) {
      $users{$user->{uid}} = $user->{nickname};
      print "moving user ".$user->{nickname}.( $user->{aid} ? " (admin)" : "" )."\n";
      my $media_id = insertuser ( $user );
   }
   $c->finish;
}

sub movecomments {
   my ( $sid, $media_id ) = @_;
   my $sql = <<EOT;
SELECT * 
  FROM comments 
 WHERE sid = '$sid'
EOT

   my ( $comment, $mircommentid );

   my $c = $slash->prepare_cached ( $sql );
   $c->execute () or return 0;
   while ( $comment = $c->fetchrow_hashref ) {
      print " moving comment (".$comment->{cid}.")\n";
      my $mircommentid = insertcomment ( $media_id, $comment );
      print " comment not moved.\n" unless $mircommentid;
   }

   $c->finish;
}

sub moveratings {
   my ( $sid, $media_id ) = @_;
   my $sql = <<EOT;
SELECT * 
  FROM individual_story_ratings 
 WHERE sid = '$sid'
EOT

   my $c = $slash->prepare_cached ( $sql );
   my ( $rating, $mirratingid );

   $c->execute () or die2 "error retrieving: $sql";
   while ( $rating = $c->fetchrow_hashref ) {
      # print " moving ".$users{ $rating->{uid} }."'s rating (".$rating->{rating}.")\n";
      my $mirratingid = insertrating ( $media_id, $rating );
   }
   $c->finish;

}

sub movestorymedia {
   my ( $sid, $content_id ) = @_;
   my $sql = <<EOT;
SELECT * 
  FROM mid2sid 
 WHERE sid = '$sid'
EOT

   my $c = $slash->prepare_cached ( $sql );
   my ( $item, $i, $miritemid );

   $c->execute () or return 0;
   while ( $item = $c->fetchrow_hashref ) {
      $i ++;
      print "attaching $item->{mid} to $sid...\n";
      my $miritemid = insertcontentxmedia ( $content_id, $mirmedia{$item->{mid}} );
   }
   $c->finish;
}

sub movemedia {
   my $sql = "SELECT *, CONCAT('20',left(mid,2),substring(mid,4,2),substring(mid,7,2)) AS format_date FROM multimedia";

   my $c = $slash->prepare_cached ( $sql ) or die2 "SQL failed (prepare): $sql\n";
   my ( $item, $i, $miritemid );

   print $sql;
   $c->execute () or die2 "SQL failed (execute): $sql\n";
   while ( $item = $c->fetchrow_hashref ) {
      $i ++;
      my $miritemid = insertmedia ( $item );
      print "Added ".$item->{mid}." (".$item->{mime_type}.")\n";
      $mirmedia{$item->{mid}} = $miritemid;
   }
   $c->finish;
}

sub getmirsequence {
   my ( $key ) = $mir->selectrow_array ( "SELECT nextval(".$mir->quote(shift()).")" );
   return $key;
}

sub mirquote {
   my $string = shift;
   $string = Encode::encode_utf8($string);
   #$string =~ tr/\x91\x92\x93\x94\x96\x97/""\-\-/;
   #$string =~ s/\x85/.../sg;
   #$string =~ tr/[\x80-\x9F]//d;
   return $mir->quote($string);
   #return $mir->quote(Encode::from_to( shift, "iso-8859-1", "utf8") );
}

sub insertcomment {
   #takes a mir content id and slash comment row, returns a sequence
   my ( $content_id, $comment ) = @_;
   my $comment_id = getmirsequence ( 'comment_id_seq' );
   $comment->{subject} = mirquote ( $comment->{subject} );
   $comment->{description} = mirquote ( $comment->{comment} );

   if ( $comment->{description} eq "NULL" ||  $comment->{subject} eq "NULL" ) {
      return 0;
   }

   my $sql = <<EOT;
INSERT INTO comment (
   id,
   title,
   description,
   creator,
   webdb_create,
   to_media
) VALUES (
   $comment_id, 
   $comment->{subject},
   $comment->{description},
   'Unknown',
   current_timestamp,
   $content_id
)   
EOT
   $mir->do ( $sql ) or die "failed to add comment ( $sql )\n";

   my $sql = <<EOT;
INSERT INTO comment_x_media (
   media_id,
   comment_id
) VALUES (
   $content_id,
   $comment_id
)
EOT
   $mir->do ( $sql ) or die "failed to add content-comment link ( $sql )\n";
   return $comment_id;
}

sub insertcontent {
   #takes a slash story/feature row, returns a sequence
   my ( $story ) = @_;
   my $media_id = getmirsequence ( "media_id_seq" );
   $story->{ title} = mirquote ( $story->{title} );
   $story->{ bodytext} = mirquote ( $story->{bodytext} );
   $story->{ description } = mirquote ( $story->{ introtext } );
   $story->{ author_name } = mirquote ( substr( $story->{ author_name }, 0, 80 ) );
   $story->{ author_email } = mirquote ( substr( $story->{ author_email }, 0, 80 ) );
   $story->{ author_address } = mirquote ( substr( $story->{ author_address }, 0, 80 ) );
   $story->{ author_phone } = mirquote ( $story->{ author_phone } );
   $story->{ date } = mirquote ( $story->{format_date} );
   $story->{ comments } = mirquote ( "Converted from slash sid ".$story->{sid} );
   my $sql = <<EOT;
INSERT INTO content (
   id,
   title,
   content_data,
   comment ,
   creator,
   creator_email,
   creator_address,
   creator_phone,
   is_html,
   date,
   to_publisher,
   webdb_create,
   is_published,
   to_article_type,
   description
) VALUES (
   $media_id,
   $story->{"title"},
   $story->{"bodytext"},
   $story->{"comments"},
   $story->{ author_name },
   $story->{ author_email },
   $story->{ author_address },
   $story->{ author_phone },
   '1',
   $story->{date},
   1,
   $story->{date},
   't',
   $story->{to_article_type},
   $story->{description}
)
EOT

   $mir->do ( $sql ) or die "failed to add content ( $sql )\n";
   return $media_id;
}

sub insertmedia {
   #takes a content id and a slash multimedia row
   my ( $media ) = @_;
   my $media_id = getmirsequence ( "media_id_seq" );
   my $tablename;
   for ( $media->{mime_type} ) {
      if ( m/^video/ ) {
         $tablename = "video";
      } elsif ( m/^audio/ ) {
         $tablename = "audio";
      } elsif ( m/^image/ ) {
         $tablename = "images";
      } else {
         warn "unknown media type: ($_, ".$media->{location}.")\n";
         $unknownmimetype->{$_} = $_;
      #   return 0;
         $tablename = "other_media"; 
      }
   }
   my $url = new URI::URL $media->{location};
   my $thumburl = new URI::URL $media->{thumbnail};
   my ( $server, $path, $thumbpath );
   $media->{title} = mirquote ( $media->{title} );
   $media->{format_date} = mirquote ( $media->{format_date});
   $media->{author_name} = mirquote ( $media->{author_name});
   $media->{caption} = mirquote ( $media->{caption} );
   if ( $url->scheme eq "http" ) {
      $server = mirquote ( "http://".$url->host );
      $path = mirquote $url->path;
      $thumbpath = mirquote $thumburl->path;
   } else {
      $server = mirquote ("http://www.phillyimc.org");
      $path = mirquote ( $media->{location} );
      $thumbpath = mirquote ( $media->{thumbnail} );
   }
   my $sql = <<EOT;
INSERT INTO $tablename (
  id,
  title,
  date,
  creator,
  description,
  publish_server,  
  publish_path,
  icon_path,
  webdb_create,
  to_publisher,
  is_published
) VALUES (
  $media_id,
  $media->{title}, 
  $media->{format_date},
  $media->{author_name},
  $media->{caption},
  $server,
  $path,
  $thumbpath,
  current_timestamp, 
  1,
  't'
)
EOT

   $mir->do ( $sql ) or die "failed to add media ( $sql )\n";
   return $media_id;
}

sub insertcontentxmedia {
   my ( $content_id, $media_id ) = @_;
   my $sql = <<EOT;
INSERT INTO content_x_media (
  content_id,
  media_id
) VALUES (
  $content_id,
  $media_id
)
EOT

   $mir->do ( $sql ) or die "failed to add media/content link ( $sql )\n";
   return 1;
}

sub insertwebdbuser {
   #takes a slash user row and creates one in mir
   my ( $user ) = @_;
   my $user_id = getmirsequence ( "webdb_users_id_seq" );
   return 0;
} 

sub insertrating {
}


main ();

-- RolandoDeAguiar - 26 Jun 2005
Topic revision: r2 - 26 Jun 2005, AlsteR
This site is powered by FoswikiCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Foswiki? Send feedback