Tekst ot links sys HTML::LinkExtor

anio at webgroup-bg.com anio at webgroup-bg.com
Thu Apr 15 07:07:01 CDT 2004


sofia-pm-list - An official of Sofia's Perl hackers
anio at webgroup-bg.com
Ìåðñè Ïåòèüî.
Âèäÿõ ãî òîâà,íî ìàëêî êúñíî.Îïðàâèõ ñå ñúñ HTML::Parser ñëåä êàòî ñå
ðàçòúðñèõ èç Usenet.Ùå ãî paste-íà òóê àêî íà íÿêîé ìó ñå íàëîæè äà íåãî
òúðñè êàòî ìåí:

my $ua = LWP::UserAgent->new;
my $req = HTTP::Request->new(GET => 'http://perl.com');
my $res = $ua->request($req);
my $content = $res->content;
my $p = HTML::Parser->new(api_version => 3);
my @linklist;

$p->handler( start => \&a_start_handler, "tagname,self,attr" );
$p->unbroken_text( 1 );
$p->parse( $content ) || die $!;

foreach my $link ( @linklist ){
 print $link->[0]; #link
 print $link->[1]; #text
}

sub a_start_handler {
  my( $tag, $self, $attr ) = @_;

  # we only act on <a tags
  return if $tag ne "a";

  if( defined( $href = $attr->{href} ) ){
    $self->handler(text => sub { $text = shift; $text =~ s/\n//g; },"dtext");
    $self->handler( end => \&a_end_handler, "tagname,self" );
  }

  foreach my $key ( keys %$attr ){
  #  print ">$key=$attr->{$key}\n";
  }


}

sub a_end_handler {
  return if shift ne "a";
  my $self = shift;

  push @linklist, [ $href, $text ] if defined $text && $text !~ /^\s*$/;

  $self->handler(end => undef );
  $self->handler(text => undef );
}



More information about the Sofia-pm mailing list