#!/usr/bin/perl # This is orkut2dot, a screenscraping tool to generate dotty graphs # Copyright (C) 2004 Simon Law # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # Usage: # orkut2dot username password userid [...] use strict; use warnings; use English; use Term::ReadKey; # Check the arguments my $username = shift (@ARGV); my $password = shift (@ARGV); my @userids = (@ARGV); unless ($username) { print (STDERR 'Username: '); $username = ReadLine(0); chomp ($username); } unless ($password) { print (STDERR 'Password: '); ReadMode('noecho'); $password = ReadLine(0); chomp ($password); ReadMode('restore'); print (STDERR "\n"); } # Initialize WWW::Mechanize use WWW::Mechanize; my $mech = WWW::Mechanize->new (autocheck => 1); $mech->agent_alias ('Linux Mozilla'); # Go to the homepage $mech->get ("http://www.orkut.com"); # Fill out the username and password $mech->set_visible ($username, $password); $mech->click ("Submit"); # Subroutines sub print_node ($) { # Get the page for this userid, print its node, and find its friends my $userid = shift (@_); # Grab the FriendsList page for the current node. $mech->get ("http://www.orkut.com/FriendsList.aspx?uid=$userid"); # Extract the name for this userid and print it my $link = $mech->find_link (url_regex => qr/uid=$userid/); my $name = $link->text (); print ("\"$userid\" [label=\"$name\"]\n"); # Get an array of its friends, as userids my @links = $mech->links (); my @friends; for my $link (@links) { if ($link->url () =~ m/FriendsList.aspx\?uid=(\d*)/) { push (@friends, $1) unless ($link->text () eq '[IMG]'); } } return @friends; } sub crawl_deeper (\%$) { # We just look at each friend, and populate %$friendsref as necessary. my $friendsref = shift (@_); my $userid = shift (@_); # Get the page for this userid, print its node, and find its friends my @friends = print_node ($userid); # Append these friends to the hash-table entry push (@{$$friendsref{$userid}}, @friends); } sub crawl (\%$) { # We'll look at the FriendsList and grab each friend. We'll draw # an oval for this friend, and return the array of their userids. my $friendsref = shift (@_); my $userid = shift (@_); # Get the page for this userid, print its node, and find its friends my @friends = print_node ($userid); # Append these friends to the hash-table entry push (@{$$friendsref{$userid}}, @friends); # Get the friendship information for each friend for my $friend (@friends) { crawl_deeper (%$friendsref, $friend); } # Return my list of visible friends return @friends; } # We should print the dotty header print <find_link (url_regex => qr/FriendsList.aspx/); my $userid = $link->url (); $userid =~ s/.*uid=(\d*).*/$1/; push (@userids, $userid); } my @visible = @userids; for my $userid (@userids) { push (@visible, crawl (%friends, $userid)); } # We've finished crawling through the list of friends, which means # we'll have drawn some ovals. Now we can express the relationships # between these ovals. However, since only @visible have ovals drawn, # we'll only specify those relationships. for my $userid (@visible) { # Draw relationships print "{"; # Only print nodes for @visible my @shown = grep { my $f = $_; grep ($f == $_, @visible); } @{$friends{$userid}}; map { print (" \"$_\""); } @shown; print (" } -> \"$userid\"\n"); } # Now print the footer print ("}\n");