理系学生日記

おまえはいつまで学生気分なのか

Parallel::ForkManager 使って並行ダウンローダ作った

Parallel::ForkManager はじめて使ったけど、これスゴい使いやすいなって思いました。
3 並列でダウンロードさせるにはこんなかんじ。

$ youtube-downloader -v -p 3 -f ~/tmp/url.txt

ソースこれ。

#!/usr/bin/perl
#-*- cperl -*-

use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use Parallel::ForkManager;
use WebService::Youtube::Download;

our $VERSION = "0.01";

GetOptions(
    'h|help'      => \my $help,
    'f|urlfile=s' => \my $file,
    'p|process=i' => \my $process,
    'verbose'     => \my $verbose,
) or pod2usage();
pod2usage() if $help;

&main;
exit;

sub main {
    my @urls;
    @urls = read_urlfile($file) if $file;
    push @urls, @ARGV;
    my ($num, $current) = (scalar(@urls), 0);

    my $pm = new Parallel::ForkManager->new($process || 1);
    do {
        print "downloading ", $num, " files ...\n";
        print "parallel: $process process\n";
        $pm->run_on_start( sub {
             my ($pid, $ident) = @_;
             print "$ident starts\n";
         });
        $pm->run_on_finish( sub {
            my ($pid, $exit, $ident) = @_;
            $current++;
            print "$ident completes.($current/$num)\n";
        });
    } if $verbose;
    
    foreach my $url (@urls) {
        $pm->start($url) and next;

        my $dl = WebService::Youtube::Download->new({
            url => $url
        })->download();

        $pm->finish;
    }
    $pm->wait_all_children;
}
    

sub read_urlfile {
    my $file = shift;
    open my $fh, '<', $file or die "$file: $!";

    my @urls;
    while (<$fh>) {
        chomp;

        # skip
        next if /^\s*#/;

        push @urls, $_;
    }
    @urls;
}

__END__

=head1 NAMES

youtube-downloader - download flv files from youtube

=head1 SYNOPSIS

    # provide urls directly
    youtube-downloader url1 url2 ... 

    # specify a filename which records URL line by line
    youtube-downloader -f file

    # specify number of download processes (parallel download)
    youtube-downloader -f file -p 3

=cut