Index: trunk/t/building.t =================================================================== --- trunk/t/building.t (revision 18) +++ trunk/t/building.t (revision 128) @@ -1,3 +1,4 @@ +#!perl -Tw # -*-Perl-*- # Time-stamp: "2003-09-15 01:45:47 ADT" @@ -4,32 +5,32 @@ #Test that we can build and compare trees -use Test; -BEGIN { plan tests => 22 } +use Test::More tests=>40; +use strict; -use HTML::Element 1.53; +BEGIN { + use_ok( "HTML::Element", 1.53 ); +} -print "#Using HTML::Element version v$HTML::Element::VERSION\n"; -print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; -print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; -print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; -print "# Running under perl version $] for $^O", - (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; -print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" - if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); -print "# MacPerl verison $MacPerl::Version\n" - if defined $MacPerl::Version; -printf - "# Current time local: %s\n# Current time GMT: %s\n", - scalar(localtime($^T)), scalar(gmtime($^T)); +#; diag "Using HTML::Element version v&HTML::Element::Version()\n"; +#; diag "Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; +#; diag "Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; +#; diag "Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; +#; diag "Running under perl version $] for $^O", +#; (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; +#; diag " Win32::BuildNumber ", &Win32::BuildNumber(), "\n" +#; if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); +#; diag "MacPerl verison $MacPerl::Version\n" +#; if defined $MacPerl::Version; +#; diag sprintf +#; "Current time local: %s\nCurrent time GMT: %s\n", +#; scalar(localtime($^T)), scalar(gmtime($^T)); +### test subroutine by comparing to variable that it is supposed to return. This test may be questionable??? +is($HTML::Element::VERSION, &HTML::Element::Version(), "Version Subroutine") ; -my $t1; -{ - use strict; - my $lol; - $t1 = HTML::Element->new_from_lol( - $lol = +FIRST_BLOCK: { + my $lol = ['html', ['head', [ 'title', 'I like stuff!' ], @@ -38,70 +39,118 @@ {'lang', 'en-JP'}, 'stuff', ['p', 'um, p < 4!', {'class' => 'par123'}], - ['div', {foo => 'bar'}, '123'], # at 0.1.2 + ['div', {foo => 'bar'}, ' 1 2 3 '], # at 0.1.2 + ['hr'], ] ] - ) ; + my $t1 = HTML::Element->new_from_lol( $lol ); + isa_ok( $t1, 'HTML::Element' ); - ok $t1->content_list, 2; + ### added to test ->is_empty() and ->look_up() + my $hr = $t1->find('hr') ; + isa_ok( $hr, 'HTML::Element' ); + ok($hr->is_empty(), "testing is_empty method on

tag") ; + my $lookuptag = $hr->look_up("_tag", "body") ; + is('', $lookuptag->starttag(), "verify hr->look_up found body tag") ; + my %attrs = $lookuptag->all_attr() ; + my @attrs1 = sort keys %attrs ; + my @attrs2 = sort $lookuptag->all_attr_names() ; + is_deeply( \@attrs1, \@attrs2, "is_deeply attrs") ; +# print join(":", keys %attrs) ; +# print "##########" ; +# print join(":", @attrs) ; + + # Test scalar context + my $count = $t1->content_list; + is( $count, 2, "Works in scalar" ); + + # Test list context + my @list = $t1->content_list; + is( scalar @list, 2, "Should get two items back" ); + isa_ok( $list[0], 'HTML::Element' ); + isa_ok( $list[1], 'HTML::Element' ); + my $div = $t1->find_by_attribute('foo','bar'); - ok $div; + isa_ok( $div, 'HTML::Element' ); - ok $div->address, '0.1.2'; - ok $div eq $t1->address('0.1.2'); # using address to get the node - ok $div->same_as($div); - ok $t1->same_as($t1); - ok not($div->same_as($t1)); + ### tests of various output formats +# print "# ", $div->as_text(), "\n" ; +# print "# ", $div->as_trimmed_text(), "\n" ; +# print "# ", $div->as_Lisp_form(), "\n" ; + is( $div->as_text()," 1 2 3 ", "Dump element in text format"); + is( $div->as_trimmed_text(),"1 2 3", "Dump element in trimmed text format"); + is( $div->as_text_trimmed(),"1 2 3", "Dump element in trimmed text format"); + is( $div->as_Lisp_form(),"(\"_tag\" \"div\" \"foo\" \"bar\" \"_content\" (\n \" 1 2 3 \"))\n", "Dump element as Lisp form"); + is( $div->address, '0.1.2' ); + is( $div, $t1->address('0.1.2'), 'using address to get the node' ); + ok( $div->same_as($div) ); + ok( $t1->same_as($t1) ); + ok( not($div->same_as($t1)) ); + my $t2 = HTML::Element->new_from_lol($lol); - ok $t2->same_as($t1); + isa_ok( $t2, 'HTML::Element' ); + ok( $t2->same_as($t1) ); $t2->address('0.1.2')->attr('snap', 123); - ok not($t2->same_as($t1)); + ok( not($t2->same_as($t1)) ); my $body = $t1->find_by_tag_name('body'); - ok $body->tag eq 'body'; - { - my $cl = join '~', $body->content_list; - my @detached = $body->detach_content; - ok $cl eq join '~', @detached; - $body->push_content(@detached); - ok $cl eq join '~', $body->content_list; - } + isa_ok( $body, 'HTML::Element' ); + is( $body->tag, 'body' ); + my $cl = join '~', $body->content_list; + my @detached = $body->detach_content; + is( $cl, join '~', @detached ); + $body->push_content(@detached); + is( $cl, join '~', $body->content_list ); + $t2->delete; + $t1->delete; } -$t1->delete if $t1; Test2: # for normalization { - local($^W) = 0; - $t1 = HTML::Element->new_from_lol(['p', 'stuff', ['hr'], 'thing']); + my $t1 = HTML::Element->new_from_lol(['p', 'stuff', ['hr'], 'thing']); my @start = $t1->content_list; - ok @start eq 3; + is( scalar(@start), 3 ); my $lr = $t1->content; - splice @$lr,1,0, undef; - push @$lr, undef; - unshift @$lr, undef; + # $lr is ['stuff', HTML::Element('hr'), 'thing'] + is( $lr->[0], 'stuff' ); + isa_ok( $lr->[1], 'HTML::Element' ); + is( $lr->[2], 'thing' ); + # insert some undefs + splice @$lr,1,0, undef; # insert an undef between [0] and [1] + push @$lr, undef; # append an undef to the end + unshift @$lr, undef; # prepend an undef to the front + # $lr is [undef, 'stuff', undef, H::E('hr'), 'thing', undef] - #print "Content list:", join(',', map defined($_) ? $_ : '', @$lr), "\n"; + Unnormalized: { + #print "Content list:", join(',', map defined($_) ? $_ : '', @$lr), "\n"; + my $cl_count = $t1->content_list; + my @cl = $t1->content_list; + is( $cl_count, 6 ); + is( scalar(@cl), $cl_count ); # also == 6 + { no warnings; # content_list contains undefs + isnt( join('~', @start), join('~', $t1->content_list) ); + } + } - ok $t1->content_list eq 6; - ok join('~', @start) ne join('~', $t1->content_list); - $t1->normalize_content; - #print "Content list:", join(',', map defined($_) ? $_ : '', @$lr), "\n"; - ok $t1->content_list eq 3; - ok join('~', @start) eq join('~', $t1->content_list); + Normalized: { + $t1->normalize_content; + #print "Content list:", join(',', map defined($_) ? $_ : '', @$lr), "\n"; + my @cl = $t1->content_list; + eq_array( \@start, \@cl ); + } - - ok ! defined $t1->attr('foo'); + ok( not defined( $t1->attr('foo') ) ); $t1->attr('foo', 'bar'); - ok 'bar' eq $t1->attr('foo'); - ok scalar grep 'bar', $t1->all_external_attr(); + is( $t1->attr('foo'), 'bar' ); + ok( scalar( grep( 'bar', $t1->all_external_attr() ) ) ); $t1->attr('foo', ''); - ok scalar grep 'bar', $t1->all_external_attr(); + ok( scalar( grep( 'bar', $t1->all_external_attr() ) ) ); $t1->attr('foo', undef); # should delete it - ok not grep 'bar', $t1->all_external_attr(); + ok( not grep( 'bar', $t1->all_external_attr() ) ); $t1->delete; } Index: trunk/t/assubs.t =================================================================== --- trunk/t/assubs.t (revision 0) +++ trunk/t/assubs.t (revision 128) @@ -0,0 +1,46 @@ +#!perl -Tw + +use strict ; +use Test::More; + +use HTML::AsSubs; +use HTML::Tagset; + +plan tests => scalar @HTML::AsSubs::TAGS + 3 ; + +### verify all subroutines in HTML::AsSubs ; +map { + + my $h = eval "HTML::AsSubs::$_(\"$_\")"; + + my $string = ($HTML::Tagset::optionalEndTag{$_} || $HTML::Tagset::emptyElement{$_}) + ? "<$_>$_\n" + : "<$_>$_<\/$_>\n" ; + + is ($h->as_HTML ,"$string", "Test of tag: $_") ; + + +} (@HTML::AsSubs::TAGS) ; + +### verify passing href to tag. +{ + my $string="test\n" ; + my $h = HTML::AsSubs::a({ href => "http://cpan.org" }, "test"); + is ($h->as_HTML, "$string", "Test of tag properties") ; +} + +### Improve coverage by passing undef as first parm to _elem via wrapper function. +{ + my $string="test\n" ; + my $h = HTML::AsSubs::a( undef, "test"); + is ($h->as_HTML, "$string", "undef test") ; +} + +### Improve coverage by passing no parameters to _elem via wrapper function. +{ + my $string="\n" ; + my $h = HTML::AsSubs::a(); + is ($h->as_HTML, "$string", "empty tag test") ; +} + + Property changes on: trunk/t/assubs.t ___________________________________________________________________ Name: svn:executable + * Index: trunk/t/pod.t =================================================================== --- trunk/t/pod.t (revision 0) +++ trunk/t/pod.t (revision 128) @@ -0,0 +1,6 @@ +#!perl -Tw +use Test::More; +use strict ; +eval "use Test::Pod 1.00"; +plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; +all_pod_files_ok(); Index: trunk/t/construct_tree.t =================================================================== --- trunk/t/construct_tree.t (revision 0) +++ trunk/t/construct_tree.t (revision 128) @@ -0,0 +1,130 @@ +#!perl -Tw +use Test::More tests => (3 + 7 * 8); +#initial tests + number of tests in test_new_obj() * number of times called + +use strict; + +BEGIN { + use_ok( 'HTML::Tree' ); +} + +my $obj = new HTML::Tree; + +isa_ok($obj, "HTML::TreeBuilder"); + +our $TestInput = "t/oldparse.html"; + +my $HTML ; +{ + local $/ = undef ; + open(INFILE, $TestInput) || die "Can't open $TestInput: $!"; + $HTML= ; + close(INFILE) ; +} + +# setup some parts of the HTML for the list tests. + +# die "$TestInput does not have at least 2 characters!" +# if length($HTML) <= 2; +# my $HTMLPart1 = substr( $HTML, 0, int( length($HTML) / 2 ) ); +# my $HTMLPart2 = substr( $HTML, int( length($HTML) / 2 ) ); + +# The logic here is to try to split the HTML in the middle of a tag. +# The above commented-out code is also an option. + +my $split_at = 4; +die "$TestInput does not have at least " . ($split_at + 1) . " characters!" + if length($HTML) <= $split_at; +my $HTMLPart1 = substr( $HTML, 0, 4 ); +my $HTMLPart2 = substr( $HTML, 4 ); + +is($HTMLPart1 . $HTMLPart2, $HTML, "split \$HTML correctly"); + + +# Filehandle Test +{ + open(INFILE, $TestInput) || die "Can't open $TestInput: $!"; + my $file_obj = HTML::Tree->new_from_file( *INFILE ); + test_new_obj($file_obj, "new_from_file Filehandle" ) ; + close(INFILE); +} + + +# Scalar Tests +{ + my $content_obj = HTML::Tree->new_from_content($HTML); + test_new_obj($content_obj, "new_from_content Scalar") ; +} + +{ + my $file_obj = HTML::Tree->new_from_file( $TestInput); + test_new_obj($file_obj, "new_from_file Scalar" ) ; +} + +{ + my $parse_content_obj = HTML::Tree->new; + $parse_content_obj->parse_content( $HTML); + test_new_obj($parse_content_obj, "new(); parse_content Scalar" ); +} + + +# Scalar REF Tests +{ + my $content_obj = HTML::Tree->new_from_content($HTML); + test_new_obj($content_obj, "new_from_content Scalar REF") ; +} + +# None for new_from_file +# Filehandle test instead. (see above) + +{ + my $parse_content_obj = HTML::Tree->new; + $parse_content_obj->parse_content( $HTML); + test_new_obj($parse_content_obj, "new(); parse_content Scalar REF" ); +} + + +# List Tests (Scalar and Scalar REF) +{ + my $content_obj = HTML::Tree->new_from_content(\$HTMLPart1, $HTMLPart2); + test_new_obj($content_obj, "new_from_content List") ; +} + +# None for new_from_file. +# Does not support lists. + +{ + my $parse_content_obj = HTML::Tree->new; + $parse_content_obj->parse_content( \$HTMLPart1, $HTMLPart2 ); + test_new_obj($parse_content_obj, "new(); parse_content List"); +} + + +sub test_new_obj { + + my $obj = shift ; + my $test_description = shift; + + isa_ok($obj, "HTML::TreeBuilder", $test_description); + + my $html; + ok ($html = $obj->as_HTML(undef, ' '), "Get html as string." ); + + # This is a very simple test just to ensure that we get something + # sensible back. + like( $html, qr//i, " found OK." ); + like( $html, qr/www\.sn\.no/, "found www.sn.no link" ); + + TODO: { + local $TODO = < 4 } +use Test::More tests => 2; use strict; -BEGIN { ok 1 } -use HTML::TreeBuilder; -BEGIN { ok 1 } -use HTML::Element; -BEGIN { ok 1 } +BEGIN { + use_ok( 'HTML::TreeBuilder' ); +} +BEGIN { + use_ok( 'HTML::Element' ); +} print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; print "#Using HTML::Element version v$HTML::Element::VERSION\n"; @@ -26,6 +25,4 @@ printf "# Current time local: %s\n# Current time GMT: %s\n", scalar(localtime($^T)), scalar(gmtime($^T)); - -ok 1; print "# byebye from ", __FILE__, "\n"; Index: trunk/t/clonei.t =================================================================== --- trunk/t/clonei.t (revision 18) +++ trunk/t/clonei.t (revision 128) @@ -1,36 +1,21 @@ - +#!perl -Tw # -*-Perl-*- # Time-stamp: "2003-09-15 01:45:39 ADT" -BEGIN {print "1..1\n";} -use HTML::TreeBuilder; +use strict; +use Test::More tests => 4; +BEGIN {use_ok ( "HTML::TreeBuilder");} -print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; -print "#Using HTML::Element version v$HTML::Element::VERSION\n"; -print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; -print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; -print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; -print "# Running under perl version $] for $^O", - (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; -print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" - if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); -print "# MacPerl verison $MacPerl::Version\n" - if defined $MacPerl::Version; -printf - "# Current time local: %s\n# Current time GMT: %s\n", - scalar(localtime($^T)), scalar(gmtime($^T)); +my $t = HTML::TreeBuilder->new; +$t->parse('stuff lalal'); +$t->eof; +my $c = $t->clone(); -{ - use strict; - my $t = HTML::TreeBuilder->new; - $t->parse('stuff lalal'); - $t->eof; - my $c = $t->clone(); - $c->delete(); - if( $t->find_by_attribute('name', 'foo') ) { - print "ok 1\n"; - } else { - print "not ok 1\n"; - } - $t->delete(); -} +#these are correct tests. Of what, I'm not sure. +ok($c->same_as($t), "\$c is the same as \$t, according to HTML::Element"); +ok($t->same_as($c), "\$t is the same as \$c, according to HTML::Element"); + +$c->delete(); +ok ($t->find_by_attribute('name', 'foo'), "My name is foo after delete" ); + +$t->delete(); Index: trunk/t/oldparse.t =================================================================== --- trunk/t/oldparse.t (revision 18) +++ trunk/t/oldparse.t (revision 128) @@ -1,93 +1,62 @@ +#!perl -Tw +use Test::More tests => 17; +use strict; -# -*-Perl-*- -# Time-stamp: "2003-09-15 01:45:31 ADT" -BEGIN { print "1..1\n"; } +BEGIN { + use_ok( 'HTML::Parse' ); +} -use HTML::Parse; - -print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; -print "#Using HTML::Element version v$HTML::Element::VERSION\n"; -print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; -print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; -print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; -print "# Running under perl version $] for $^O", - (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; -print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" - if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); -print "# MacPerl verison $MacPerl::Version\n" - if defined $MacPerl::Version; -printf - "# Current time local: %s\n# Current time GMT: %s\n", - scalar(localtime($^T)), scalar(gmtime($^T)); - - # This is a very simple test. It basically just ensures that the -# HTML::Parse module is parsed ok by perl. +# HTML::Parse module is parsed ok by perl and that it will interact +# nicely with the rest of our modules -$HTML = <<'EOT'; +our $TestInput = "t/oldparse.html"; -Test page - +my $HTML ; +{ + local $/ = undef ; + open("INFILE", "$TestInput") || die "$!" ; + $HTML= ; + close(INFILE) ; +} -

Header

+my $own_builder = new HTML::TreeBuilder; +isa_ok( $own_builder, 'HTML::TreeBuilder' ); -This is a link to -Schibsted Nett in Norway. +my $obj_h = parse_html $HTML, $own_builder; +isa_ok( $obj_h, "HTML::TreeBuilder", "existing TreeBuilder handled OK." ); -

Sofie Amundsen var p� vei hjem fra skolen. Det f�rste stykket -hadde hun g�tt sammen med Jorunn. De hadde snakket om roboter. -Jorunn hadde ment at menneskets hjerne var som en komplisert -datamaskin. Sofie var ikke helt sikker p� om hun var enig. Et -menneske måtte da v�re noe mer enn en maskin? +my $h = parse_html $HTML; +isa_ok( $h, "HTML::TreeBuilder" ); +# This ensures that the output from $h->dump goes to STDOUT +my $html; +ok ($html = $h->as_HTML(undef, ' '), "Get html as string." ); - <-- this one did not terminate the comment - because "--" on the previous line -more comment +my $h2 = parse_htmlfile( "t/oldparse.html" ); +isa_ok( $h2, "HTML::TreeBuilder" ); ---> +ok ($html = $h2->as_HTML(undef, ' '), "Get html as string." ); -

- -
Name -
Aas Gisle -
Koster Martijn -
- -EOT - - -$h = parse_html $HTML; - -# This ensures that the output from $h->dump goes to STDOUT - -$html = $h->as_HTML(undef, ' '); - -{ - my $h = $html; - $h =~ s/^/\# /mg; - print "# HTML: $h#\n"; -} - # This is a very simple test just to ensure that we get something # sensible back. -if( $html =~ //i && $html =~ /www\.sn\.no/ - && $html !~ /comment/ && $html =~ /Gisle/ -) { - print "ok 1\n\n"; -} else { - print "not ok 1\n\n"; -} +like( $html, qr//i, "parse_htmlfile: found OK." ); +like( $html, qr/www\.sn\.no/, "parse_htmlfile: found www.sn.no link" ); +unlike( $html, qr/comment/, "parse_htmlfile: found comment" ); +like( $html, qr/Gisle/, "parse_htmlfile: found Gisle" ); -$h->delete; - - -exit; Index: trunk/t/oldparse.html =================================================================== --- trunk/t/oldparse.html (revision 0) +++ trunk/t/oldparse.html (revision 128) @@ -0,0 +1,37 @@ + +Test page + + +

Name -
Aas	Gisle -
Koster	Martijn -

Header

+ +This is a link to +Schibsted Nett in Norway. + +

Sofie Amundsen var p� vei hjem fra skolen. Det f�rste stykket +hadde hun g�tt sammen med Jorunn. De hadde snakket om roboter. +Jorunn hadde ment at menneskets hjerne var som en komplisert +datamaskin. Sofie var ikke helt sikker p� om hun var enig. Et +menneske måtte da v�re noe mer enn en maskin? + + + <-- this one did not terminate the nested-comment + because "--" on the previous line + +more comment + +--> + + + +

+ +
Name +
Aas Gisle +
Koster Martijn +
Index: trunk/t/parse.t =================================================================== --- trunk/t/parse.t (revision 18) +++ trunk/t/parse.t (revision 128) @@ -1,4 +1,4 @@ - +#!perl -Tw # -*-Perl-*- # Time-stamp: "2003-09-15 01:45:14 ADT" Index: trunk/t/parsefile.t =================================================================== --- trunk/t/parsefile.t (revision 18) +++ trunk/t/parsefile.t (revision 128) @@ -1,7 +1,7 @@ - +#!perl -Tw # -*-Perl-*- # Time-stamp: "2003-09-15 01:44:58 ADT" -use Test; +use Test::More; BEGIN { plan tests => 3 } use HTML::TreeBuilder; use strict; @@ -53,8 +53,8 @@ } # Just make a few samples to check that we got what we expected -ok //i; -ok //i; -ok /this is a simple/; +like($_, qr//i, "Matches Head"); +like($_, qr//i, "Matches isindex"); +like($_, qr/this is a simple/, "Matches simple text"); # /foo\s*a=b/ || $bad++; # too version-dependent Index: trunk/t/split.t =================================================================== --- trunk/t/split.t (revision 18) +++ trunk/t/split.t (revision 128) @@ -1,4 +1,4 @@ - +#!perl -Tw # -*-Perl-*- # Time-stamp: "2003-09-15 01:48:48 ADT" # @@ -9,6 +9,8 @@ # Now we use a shorter document, because we don't have all day on # this. +my ($HTML, $notests); +BEGIN { $HTML = <<'EOT'; Tittel @@ -23,94 +25,66 @@ some entities (å) EOT -$| = 1; +$notests = length($HTML); # A test for each char in the test doc +$notests *= 2; # done twice +$notests += 3; # plus more for the the rest of the tests +} +use strict; -$notests = length($HTML); -print "1..$notests\n"; +use Test::More tests=>$notests; # Tests -use HTML::TreeBuilder; +BEGIN { + use_ok( 'HTML::TreeBuilder'); +} -print "#Using HTML::TreeBuilder version v$HTML::TreeBuilder::VERSION\n"; -print "#Using HTML::Element version v$HTML::Element::VERSION\n"; -print "#Using HTML::Parser version v", $HTML::Parser::VERSION || "?", "\n"; -print "#Using HTML::Entities version v", $HTML::Entities::VERSION || "?", "\n"; -print "#Using HTML::Tagset version v", $HTML::Tagset::VERSION || "?", "\n"; -print "# Running under perl version $] for $^O", - (chr(65) eq 'A') ? "\n" : " in a non-ASCII world\n"; -print "# Win32::BuildNumber ", &Win32::BuildNumber(), "\n" - if defined(&Win32::BuildNumber) and defined &Win32::BuildNumber(); -print "# MacPerl verison $MacPerl::Version\n" - if defined $MacPerl::Version; -printf - "# Current time local: %s\n# Current time GMT: %s\n", - scalar(localtime($^T)), scalar(gmtime($^T)); - - -$h = new HTML::TreeBuilder; +my $h = new HTML::TreeBuilder; +isa_ok ( $h, "HTML::TreeBuilder"); $h->parse($HTML)->eof; -$html = $h->as_HTML; +my $html = $h->as_HTML; $h->delete; -{ - my $h = $html; - $h =~ s/^/# /mg; - print "# Parsing: $h#\n"; -} - # Each test here tries to parse the doc when we split it in two. -for $pos (1 .. length($HTML) - 1) { - $first = substr($HTML, 0, $pos); - $last = substr($HTML, $pos); - die "This is bad" unless $HTML eq ($first . $last); +for my $pos (0 .. length($HTML) - 1) { + my $first = substr($HTML, 0, $pos); + my $last = substr($HTML, $pos); + is ($first.$last, $HTML, "File split okay"); + my $h1; eval { - $h = new HTML::TreeBuilder; - $h->parse($first); - $h->parse($last); - $h->eof; + $h1 = new HTML::TreeBuilder; + $h1->parse($first); + $h1->parse($last); + $h1->eof; }; if ($@) { - print "Died when splitting at position $pos:\n"; - $before = 10; - $before = $pos if $pos < $before; - print "�", substr($HTML, $pos - $before, $before); - print "�\n�"; - print substr($HTML, $pos, 10); - print "�\n"; - print "not ok $pos\n"; - $h->delete; - next; + print "Died when splitting at position $pos:\n"; + my $before = 10; + $before = $pos if $pos < $before; + print "�", substr($HTML, $pos - $before, $before); + print "�\n�"; + print substr($HTML, $pos, 10); + print "�\n"; + print "not ok $pos\n"; + $h1->delete; + next; } - $new_html = $h->as_HTML; - if ($new_html ne $html) { - print "\n\nSomething is different when splitting at position $pos:\n"; - $before = 10; + my $new_html = $h1->as_HTML; + my $before = 10; $before = $pos if $pos < $before; - print "�", substr($HTML, $pos - $before, $before); - print "�\n�"; - print substr($HTML, $pos, 10); - print "�\n"; - print "\n$html$new_html\n"; - print "not ok $pos"; - - } else { - print "ok $pos\n"; - } - $h->delete; + is($new_html, $html, "Still Parsing as the same after split at $pos") or + diag("Something is different when splitting at position $pos:\n", + "�", substr($HTML, $pos - $before, $before), "�\n�", + substr($HTML, $pos, 10), "�\n", "\n$html$new_html\n", + ); + $h1->delete; } # Also try what happens when we feed the document one-char at a time -print "#\n#\nNow parsing document once char at a time...\n"; -$h = new HTML::TreeBuilder; +# print "#\n#\nNow parsing document once char at a time...\n"; +my $perChar = new HTML::TreeBuilder; while ($HTML =~ /(.)/sg) { - $h->parse($1); + $perChar->parse($1); } -$h->eof; -$new_html = $h->as_HTML; -if ($new_html ne $html) { - print "Also different when parsed one char at a time\n"; - print "\n$html$new_html\n"; - $BAD = 1; -} - -print join '', $BAD ? "not " : '', "ok $notests\n"; - +$perChar->eof; +my $new_html = $perChar->as_HTML; +is ($new_html, $html, "Testing per Char parsing"); +$perChar->delete; Index: trunk/TODO =================================================================== --- trunk/TODO (revision 18) +++ trunk/TODO (revision 128) @@ -28,4 +28,5 @@ + ======================================================================== Index: trunk/lib/HTML/Tree/AboutObjects.pod =================================================================== --- trunk/lib/HTML/Tree/AboutObjects.pod (revision 18) +++ trunk/lib/HTML/Tree/AboutObjects.pod (revision 128) @@ -430,6 +430,8 @@ in and of itself -- but in the appropriate context, it's understood to I<mean> the credit card account you're accessing. +=back + This is exactly the relationship between objects and object values, and from this analogy, several facts about object values are a bit more explicable: Index: trunk/lib/HTML/TreeBuilder.pm =================================================================== --- trunk/lib/HTML/TreeBuilder.pm (revision 18) +++ trunk/lib/HTML/TreeBuilder.pm (revision 128) @@ -101,7 +101,7 @@ return $new; } -# TODO: document? +# TODO: document more fully? sub parse_content { # from any number of scalars my $tree = shift; my $retval; @@ -1568,6 +1568,11 @@ once you've finished feeding all the chunks to parse(...), and before you actually start doing anything else with the tree in C<$root>. +=item C<< $root->parse_content(...) >> + +Basically a happly alias for C<< $root->parse(...); $root->eof >>. +Takes the exact same arguments as C<< $root->parse() >>. + =item $root->delete() [An important method inherited from L<HTML::Element|HTML::Element>, which Index: trunk/lib/HTML/Element.pm =================================================================== --- trunk/lib/HTML/Element.pm (revision 18) +++ trunk/lib/HTML/Element.pm (revision 128) @@ -83,7 +83,7 @@ "I like potatoes" Incidentally, diagramming with indenting works much better for very -large trees, and is easier for a program to generate. The $tree->dump +large trees, and is easier for a program to generate. The C<<$tree->dump>> method uses indentation just that way. However you diagram the tree, it's stored the same in memory -- it's a @@ -126,7 +126,7 @@ child's I<third> child", you're more likely to have to scan the contents of a tree, looking for whatever nodes, or kinds of nodes, you want to do something with. The most straightforward way to look over a tree -is to "traverse" it; an HTML::Element method ($h->traverse) is +is to "traverse" it; an HTML::Element method (C<<$h->traverse>>) is provided for this purpose; and several other HTML::Element methods are based on it. @@ -250,11 +250,11 @@ If setting a new value, the old value of that attribute is returned. -If methods are provided for accessing an attribute (like $h->tag for -"_tag", $h->content_list, etc. below), use those instead of calling -attr $h->attr, whether for reading or setting. +If methods are provided for accessing an attribute (like C<< $h->tag >> for +"_tag", C<< $h->content_list >>, etc. below), use those instead of calling +attr C<< $h->attr >>, whether for reading or setting. -Note that setting an attribute to undef (as opposed to "", the empty +Note that setting an attribute to C<undef> (as opposed to "", the empty string) actually deletes the attribute. =cut Index: trunk/lib/HTML/AsSubs.pm =================================================================== --- trunk/lib/HTML/AsSubs.pm (revision 18) +++ trunk/lib/HTML/AsSubs.pm (revision 128) @@ -104,16 +104,29 @@ frame frameset noframe ); -my @code; for (@TAGS) { - push(@code, "sub $_ { _elem('$_', \@_); }\n"); - push(@EXPORT, $_); + my $code; + $code = "sub $_ { _elem('$_', \@_); }\n" ; + push(@EXPORT, $_); + +=head1 Generated functions +Here we use a loop and generate functions as strings. Then eval to 'define' the function. 1 subroutine per tag. +=cut + eval $code; + if ($@) { + die $@; + } } -eval join('', @code); -if ($@) { - die $@; -} +=head1 Private Functions +=cut + +=head2 _elem() + +The _elem() function is wrapped by all the html 'tag' functions. It takes a tag-name, optional hashref of attributes and a list of content as parameters. + +=cut + sub _elem { my $tag = shift; Index: trunk/MANIFEST =================================================================== --- trunk/MANIFEST (revision 18) +++ trunk/MANIFEST (revision 128) @@ -17,7 +17,10 @@ t/00system.t t/building.t t/clonei.t +t/construct_tree.t +t/oldparse.html t/oldparse.t t/parse.t t/parsefile.t t/split.t +META.yml Index: trunk/Makefile.PL =================================================================== --- trunk/Makefile.PL (revision 18) +++ trunk/Makefile.PL (revision 128) @@ -18,11 +18,14 @@ package MY; +# This could probably be tossed in favor of the ExtUtils::MakeMaker default sub libscan { # Determine things that should *not* be installed my($self, $path) = @_; return '' if $path =~ m/~/; + return '' if $path =~ /(?:RCS|CVS|SCCS|\.svn)/; $path; + } __END__ Property changes on: trunk ___________________________________________________________________ Name: svn:ignore + .project