summaryrefslogtreecommitdiffstatsabout
-rw-r--r--TODO6
-rw-r--r--XML/Changes136
-rw-r--r--XML/LICENSE8
-rw-r--r--XML/MANIFEST14
-rw-r--r--XML/META.yml27
-rw-r--r--XML/Makefile.PL12
-rw-r--r--XML/README17
-rw-r--r--XML/TODO10
-rw-r--r--XML/Writer.pm625
-rwxr-xr-xXML/examples/check-print.pl35
-rwxr-xr-xXML/examples/data-mode-sample.pl21
-rwxr-xr-xXML/examples/directory-as-atom.pl148
-rwxr-xr-xXML/examples/double-escaping-example.pl17
-rwxr-xr-xXML/examples/namespace-prefixes.pl52
-rwxr-xr-xXML/examples/simple-xml.pl19
-rwxr-xr-xXML/examples/writing-unicode.pl18
-rwxr-xr-xXML/examples/xml-writer-string.pl19
-rw-r--r--XML/t/01_main.t1997
-rw-r--r--XML/t/pod-coverage.t8
-rw-r--r--XML/t/pod.t8
-rw-r--r--XML/test.pl546
-rwxr-xr-xgen-index.pl90
-rw-r--r--index.html62
-rw-r--r--release-howto44
-rw-r--r--style.css18
25 files changed, 3221 insertions, 736 deletions
diff --git a/XML/Writer.pm b/XML/Writer.pm
index 7f966b9..7247063 100644
--- a/XML/Writer.pm
+++ b/XML/Writer.pm
@@ -1,9 +1,10 @@
########################################################################
# Writer.pm - write an XML document.
# Copyright (c) 1999 by Megginson Technologies.
-# No warranty. Commercial and non-commercial use freely permitted.
-#
-# $Id: Writer.pm,v 0.4 2000/04/05 02:23:34 david Exp $
+# Copyright (c) 2003 Ed Avis <ed@membled.com>
+# Copyright (c) 2004-2010 Joseph Walton <joe@kafsemo.org>
+# Redistribution and use in source and compiled forms, with or without
+# modification, are permitted under any circumstances. No warranty.
########################################################################
package XML::Writer;
@@ -13,9 +14,8 @@ require 5.004;
use strict;
use vars qw($VERSION);
use Carp;
-use IO;
-
-$VERSION = "0.4";
+use IO::Handle;
+$VERSION = "0.612";
@@ -34,32 +34,42 @@ $VERSION = "0.4";
sub new {
my ($class, %params) = (@_);
- # If the user wants namespaces,
- # intercept the request here; it will
- # come back to this constructor
- # from within XML::Writer::Namespaces::new()
+ # If the user wants namespaces,
+ # intercept the request here; it will
+ # come back to this constructor
+ # from within XML::Writer::Namespaces::new()
if ($params{NAMESPACES}) {
delete $params{NAMESPACES};
return new XML::Writer::Namespaces(%params);
}
- # Set up $self and basic parameters
+ # Set up $self and basic parameters
my $self;
my $output;
my $unsafe = $params{UNSAFE};
my $newlines = $params{NEWLINES};
my $dataMode = $params{DATA_MODE};
- my $dataIndent = $params{DATA_INDENT};
+ my $dataIndent;
- # If the NEWLINES parameter is specified,
- # set the $nl variable appropriately
+ # If the NEWLINES parameter is specified,
+ # set the $nl variable appropriately
my $nl = '';
if ($newlines) {
$nl = "\n";
}
+ my $outputEncoding = $params{ENCODING} || "";
+ my ($checkUnencodedRepertoire, $escapeEncoding);
+ if (lc($outputEncoding) eq 'us-ascii') {
+ $checkUnencodedRepertoire = \&_croakUnlessASCII;
+ $escapeEncoding = \&_escapeASCII;
+ } else {
+ my $doNothing = sub {};
+ $checkUnencodedRepertoire = $doNothing;
+ $escapeEncoding = $doNothing;
+ }
- # Parse variables
+ # Parse variables
my @elementStack = ();
my $elementLevel = 0;
my %seen = ();
@@ -68,6 +78,7 @@ sub new {
my @hasDataStack = ();
my $hasElement = 0;
my @hasElementStack = ();
+ my $hasHeading = 0; # Does this document have anything before the first element?
#
# Private method to show attributes.
@@ -78,13 +89,17 @@ sub new {
while ($atts->[$i]) {
my $aname = $atts->[$i++];
my $value = _escapeLiteral($atts->[$i++]);
+ $value =~ s/\x0a/\&#10\;/g;
+ $value =~ s/\x0d/\&#13\;/g;
+ $value =~ s/\x09/\&#9\;/g;
+ &{$escapeEncoding}($value);
$output->print(" $aname=\"$value\"");
}
};
- # Method implementations: the SAFE_
- # versions perform error checking
- # and then call the regular ones.
+ # Method implementations: the SAFE_
+ # versions perform error checking
+ # and then call the regular ones.
my $end = sub {
$output->print("\n");
};
@@ -107,7 +122,13 @@ sub new {
if ($standalone && $standalone ne 'no') {
$standalone = 'yes';
}
- $encoding = "UTF-8" unless $encoding;
+
+ # Only include an encoding if one has been explicitly supplied,
+ # either here or on construction. Allow the empty string
+ # to suppress it.
+ if (!defined($encoding)) {
+ $encoding = $outputEncoding;
+ }
$output->print("<?xml version=\"1.0\"");
if ($encoding) {
$output->print(" encoding=\"$encoding\"");
@@ -116,6 +137,7 @@ sub new {
$output->print(" standalone=\"$standalone\"");
}
$output->print("?>\n");
+ $hasHeading = 1;
};
my $SAFE_xmlDecl = sub {
@@ -137,18 +159,21 @@ sub new {
}
if ($elementLevel == 0) {
$output->print("\n");
+ $hasHeading = 1;
}
};
my $SAFE_pi = sub {
my ($name, $data) = (@_);
$seen{ANYTHING} = 1;
- if ($name =~ /xml/i) {
+ if (($name =~ /^xml/i) && ($name !~ /^xml-stylesheet$/i)) {
carp("Processing instruction target begins with 'xml'");
}
- if ($name =~ /\?\>/ || $data =~ /\?\>/) {
+ if ($name =~ /\?\>/ || (defined($data) && $data =~ /\?\>/)) {
croak("Processing instruction may not contain '?>'");
+ } elsif ($name =~ /\s/) {
+ croak("Processing instruction name may not contain whitespace");
} else {
&{$pi};
}
@@ -156,9 +181,16 @@ sub new {
my $comment = sub {
my $data = $_[0];
+ if ($dataMode && $elementLevel) {
+ $output->print("\n");
+ $output->print($dataIndent x $elementLevel);
+ }
$output->print("<!-- $data -->");
- if ($elementLevel == 0) {
+ if ($dataMode && $elementLevel) {
+ $hasElement = 1;
+ } elsif ($elementLevel == 0) {
$output->print("\n");
+ $hasHeading = 1;
}
};
@@ -171,6 +203,7 @@ sub new {
if ($data =~ /-->/) {
croak("Comment may not contain '-->'");
} else {
+ &{$checkUnencodedRepertoire}($data);
$seen{ANYTHING} = 1;
&{$comment};
}
@@ -180,11 +213,15 @@ sub new {
my ($name, $publicId, $systemId) = (@_);
$output->print("<!DOCTYPE $name");
if ($publicId) {
+ unless ($systemId) {
+ croak("A DOCTYPE declaration with a public ID must also have a system ID");
+ }
$output->print(" PUBLIC \"$publicId\" \"$systemId\"");
} elsif ($systemId) {
$output->print(" SYSTEM \"$systemId\"");
}
$output->print(">\n");
+ $hasHeading = 1;
};
my $SAFE_doctype = sub {
@@ -202,9 +239,9 @@ sub new {
my $startTag = sub {
my $name = $_[0];
- if ($dataMode) {
+ if ($dataMode && ($hasHeading || $elementLevel)) {
$output->print("\n");
- $output->print(" " x ($elementLevel * $dataIndent));
+ $output->print($dataIndent x $elementLevel);
}
$elementLevel++;
push @elementStack, $name;
@@ -223,14 +260,15 @@ sub new {
my $SAFE_startTag = sub {
my $name = $_[0];
+ &{$checkUnencodedRepertoire}($name);
_checkAttributes(\@_);
if ($seen{ELEMENT} && $elementLevel == 0) {
croak("Attempt to insert start tag after close of document element");
} elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
croak("Document element is \"$name\", but DOCTYPE is \""
- . $seen{DOCTYPE}
- . "\"");
+ . $seen{DOCTYPE}
+ . "\"");
} elsif ($dataMode && $hasData) {
croak("Mixed content not allowed in data mode: element $name");
} else {
@@ -242,9 +280,9 @@ sub new {
my $emptyTag = sub {
my $name = $_[0];
- if ($dataMode) {
+ if ($dataMode && ($hasHeading || $elementLevel)) {
$output->print("\n");
- $output->print(" " x ($elementLevel * $dataIndent));
+ $output->print($dataIndent x $elementLevel);
}
$output->print("<$name");
&{$showAttributes}(\@_);
@@ -257,14 +295,15 @@ sub new {
my $SAFE_emptyTag = sub {
my $name = $_[0];
+ &{$checkUnencodedRepertoire}($name);
_checkAttributes(\@_);
if ($seen{ELEMENT} && $elementLevel == 0) {
croak("Attempt to insert empty tag after close of document element");
} elsif ($elementLevel == 0 && $seen{DOCTYPE} && $name ne $seen{DOCTYPE}) {
croak("Document element is \"$name\", but DOCTYPE is \""
- . $seen{DOCTYPE}
- . "\"");
+ . $seen{DOCTYPE}
+ . "\"");
} elsif ($dataMode && $hasData) {
croak("Mixed content not allowed in data mode: element $name");
} else {
@@ -281,7 +320,7 @@ sub new {
$elementLevel--;
if ($dataMode && $hasElement) {
$output->print("\n");
- $output->print(" " x ($elementLevel * $dataIndent));
+ $output->print($dataIndent x $elementLevel);
}
$output->print("</$name$nl>");
if ($dataMode) {
@@ -309,6 +348,7 @@ sub new {
$data =~ s/\</\&lt\;/g;
$data =~ s/\>/\&gt\;/g;
}
+ &{$escapeEncoding}($data);
$output->print($data);
$hasData = 1;
};
@@ -319,36 +359,72 @@ sub new {
} elsif ($dataMode && $hasElement) {
croak("Mixed content not allowed in data mode: characters");
} else {
+ _croakUnlessDefinedCharacters($_[0]);
&{$characters};
}
};
-
- # Assign the correct closures based on
- # the UNSAFE parameter
+ my $raw = sub {
+ $output->print($_[0]);
+ # Don't set $hasData or any other information: we know nothing
+ # about what was just written.
+ #
+ };
+
+ my $SAFE_raw = sub {
+ croak('raw() is only available when UNSAFE is set');
+ };
+
+ my $cdata = sub {
+ my $data = $_[0];
+ $data =~ s/\]\]>/\]\]\]\]><!\[CDATA\[>/g;
+ $output->print("<![CDATA[$data]]>");
+ $hasData = 1;
+ };
+
+ my $SAFE_cdata = sub {
+ if ($elementLevel < 1) {
+ croak("Attempt to insert characters outside of document element");
+ } elsif ($dataMode && $hasElement) {
+ croak("Mixed content not allowed in data mode: characters");
+ } else {
+ _croakUnlessDefinedCharacters($_[0]);
+ &{$checkUnencodedRepertoire}($_[0]);
+ &{$cdata};
+ }
+ };
+
+ # Assign the correct closures based on
+ # the UNSAFE parameter
if ($unsafe) {
$self = {'END' => $end,
- 'XMLDECL' => $xmlDecl,
- 'PI' => $pi,
- 'COMMENT' => $comment,
- 'DOCTYPE' => $doctype,
- 'STARTTAG' => $startTag,
- 'EMPTYTAG' => $emptyTag,
- 'ENDTAG' => $endTag,
- 'CHARACTERS' => $characters};
+ 'XMLDECL' => $xmlDecl,
+ 'PI' => $pi,
+ 'COMMENT' => $comment,
+ 'DOCTYPE' => $doctype,
+ 'STARTTAG' => $startTag,
+ 'EMPTYTAG' => $emptyTag,
+ 'ENDTAG' => $endTag,
+ 'CHARACTERS' => $characters,
+ 'RAW' => $raw,
+ 'CDATA' => $cdata
+ };
} else {
$self = {'END' => $SAFE_end,
- 'XMLDECL' => $SAFE_xmlDecl,
- 'PI' => $SAFE_pi,
- 'COMMENT' => $SAFE_comment,
- 'DOCTYPE' => $SAFE_doctype,
- 'STARTTAG' => $SAFE_startTag,
- 'EMPTYTAG' => $SAFE_emptyTag,
- 'ENDTAG' => $SAFE_endTag,
- 'CHARACTERS' => $SAFE_characters};
+ 'XMLDECL' => $SAFE_xmlDecl,
+ 'PI' => $SAFE_pi,
+ 'COMMENT' => $SAFE_comment,
+ 'DOCTYPE' => $SAFE_doctype,
+ 'STARTTAG' => $SAFE_startTag,
+ 'EMPTYTAG' => $SAFE_emptyTag,
+ 'ENDTAG' => $SAFE_endTag,
+ 'CHARACTERS' => $SAFE_characters,
+ 'RAW' => $SAFE_raw, # This will intentionally fail
+ 'CDATA' => $SAFE_cdata
+ };
}
- # Query methods
+ # Query methods
$self->{'IN_ELEMENT'} = sub {
my ($ancestor) = (@_);
return $elementStack[$#elementStack] eq $ancestor;
@@ -369,24 +445,45 @@ sub new {
$self->{'ANCESTOR'} = sub {
my ($n) = (@_);
- return $elementStack[$#elementStack-$n];
+ if ($n < scalar(@elementStack)) {
+ return $elementStack[$#elementStack-$n];
+ } else {
+ return undef;
+ }
};
- # Set and get the output destination.
+ # Set and get the output destination.
$self->{'GETOUTPUT'} = sub {
- return $output;
+ if (ref($output) ne 'XML::Writer::_PrintChecker') {
+ return $output;
+ } else {
+ return $output->{HANDLE};
+ }
};
$self->{'SETOUTPUT'} = sub {
my $newOutput = $_[0];
- # If there is no OUTPUT parameter,
- # use standard output
- unless ($newOutput) {
- $newOutput = new IO::Handle();
- $newOutput->fdopen(fileno(STDOUT), "w") ||
- croak("Cannot write to standard output");
+
+ if (ref($newOutput) eq 'SCALAR') {
+ $output = new XML::Writer::_String($newOutput);
+ } else {
+ # If there is no OUTPUT parameter,
+ # use standard output
+ $output = $newOutput || \*STDOUT;
+ if ($outputEncoding) {
+ if (lc($outputEncoding) eq 'utf-8') {
+ binmode($output, ':encoding(utf-8)');
+ } elsif (lc($outputEncoding) eq 'us-ascii') {
+ binmode($output, ':encoding(us-ascii)');
+ } else {
+ die 'The only supported encodings are utf-8 and us-ascii';
+ }
+ }
+ }
+
+ if ($params{CHECK_PRINT}) {
+ $output = XML::Writer::_PrintChecker->new($output);
}
- $output = $newOutput;
};
$self->{'SETDATAMODE'} = sub {
@@ -398,17 +495,28 @@ sub new {
};
$self->{'SETDATAINDENT'} = sub {
- $dataIndent = $_[0];
+ if ($_[0] =~ /^\s*$/) {
+ $dataIndent = $_[0];
+ } else {
+ $dataIndent = ' ' x $_[0];
+ }
};
$self->{'GETDATAINDENT'} = sub {
- return $dataIndent;
+ if ($dataIndent =~ /^ *$/) {
+ return length($dataIndent);
+ } else {
+ return $dataIndent;
+ }
};
- # Set the output.
+ # Set the indent.
+ &{$self->{'SETDATAINDENT'}}($params{'DATA_INDENT'} || '');
+
+ # Set the output.
&{$self->{'SETOUTPUT'}}($params{'OUTPUT'});
- # Return the blessed object.
+ # Return the blessed object.
return bless $self, $class;
}
@@ -486,13 +594,23 @@ sub endTag {
# Write a simple data element.
#
sub dataElement {
- my ($self, $name, $data, %atts) = (@_);
- $self->startTag($name, %atts);
+ my ($self, $name, $data, @atts) = (@_);
+ $self->startTag($name, @atts);
$self->characters($data);
$self->endTag($name);
}
#
+# Write a simple CDATA element.
+#
+sub cdataElement {
+ my ($self, $name, $data, %atts) = (@_);
+ $self->startTag($name, %atts);
+ $self->cdata($data);
+ $self->endTag($name);
+}
+
+#
# Write character data.
#
sub characters {
@@ -501,6 +619,22 @@ sub characters {
}
#
+# Write raw, unquoted, completely unchecked character data.
+#
+sub raw {
+ my $self = shift;
+ &{$self->{RAW}};
+}
+
+#
+# Write CDATA.
+#
+sub cdata {
+ my $self = shift;
+ &{$self->{CDATA}};
+}
+
+#
# Query the current element.
#
sub in_element {
@@ -605,21 +739,23 @@ sub removePrefix {
########################################################################
#
-# Private: check for duplicate attributes.
+# Private: check for duplicate attributes and bad characters.
# Note - this starts at $_[1], because $_[0] is assumed to be an
# element name.
#
sub _checkAttributes {
my %anames;
my $i = 1;
- while ($_[$i]) {
- my $name = $_[$i];
- $i += 2;
+ while ($_[0]->[$i]) {
+ my $name = $_[0]->[$i];
+ $i += 1;
if ($anames{$name}) {
croak("Two attributes named \"$name\"");
} else {
$anames{$name} = 1;
}
+ _croakUnlessDefinedCharacters($_[0]->[$i]);
+ $i += 1;
}
}
@@ -637,6 +773,23 @@ sub _escapeLiteral {
return $data;
}
+sub _escapeASCII($) {
+ $_[0] =~ s/([^\x00-\x7F])/sprintf('&#x%X;', ord($1))/ge;
+}
+
+sub _croakUnlessASCII($) {
+ if ($_[0] =~ /[^\x00-\x7F]/) {
+ croak('Non-ASCII characters are not permitted in this part of a US-ASCII document');
+ }
+}
+
+# Enforce XML 1.0, section 2.2's definition of "Char" (only reject low ASCII,
+# so as not to require Unicode support from perl)
+sub _croakUnlessDefinedCharacters($) {
+ if ($_[0] =~ /([\x00-\x08\x0B-\x0C\x0E-\x1F])/) {
+ croak(sprintf('Code point \u%04X is not a valid character in XML', ord($1)));
+ }
+}
########################################################################
@@ -658,48 +811,52 @@ sub new {
my $unsafe = $params{UNSAFE};
- # Snarf the prefix map, if any, and
- # note the default prefix.
+ # Snarf the prefix map, if any, and
+ # note the default prefix.
my %prefixMap = ();
if ($params{PREFIX_MAP}) {
%prefixMap = (%{$params{PREFIX_MAP}});
delete $params{PREFIX_MAP};
}
- my $defaultPrefix = $prefixMap{''};
- delete $prefixMap{''};
+ $prefixMap{'http://www.w3.org/XML/1998/namespace'} = 'xml';
- # Generate the reverse map for URIs
- my %uriMap = ();
+ # Generate the reverse map for URIs
+ my $uriMap = {};
my $key;
foreach $key (keys(%prefixMap)) {
- $uriMap{$prefixMap{$key}} = $key;
+ $uriMap->{$prefixMap{$key}} = $key;
}
- # Create an instance of the parent.
+ my $defaultPrefix = $uriMap->{''};
+ delete $prefixMap{$defaultPrefix} if ($defaultPrefix);
+
+ # Create an instance of the parent.
my $self = new XML::Writer(%params);
- # Snarf the parent's methods that we're
- # going to override.
+ # Snarf the parent's methods that we're
+ # going to override.
my $OLD_startTag = $self->{STARTTAG};
my $OLD_emptyTag = $self->{EMPTYTAG};
my $OLD_endTag = $self->{ENDTAG};
- # State variables
+ # State variables
+ my @stack;
my $prefixCounter = 1;
- my @nsDecls = ();
- my $nsDecls = {};
- my @nsDefaultDecl = ();
+ my $nsDecls = {'http://www.w3.org/XML/1998/namespace' => 'xml'};
my $nsDefaultDecl = undef;
- my @nsCopyFlag = ();
my $nsCopyFlag = 0;
+ my @forcedNSDecls = ();
+
+ if ($params{FORCED_NS_DECLS}) {
+ @forcedNSDecls = @{$params{FORCED_NS_DECLS}};
+ delete $params{FORCED_NS_DECLS};
+ }
#
# Push the current declaration state.
#
my $pushState = sub {
- push @nsDecls, $nsDecls;
- push @nsDefaultDecl, $nsDefaultDecl;
- push @nsCopyFlag, $nsCopyFlag;
+ push @stack, [$nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap];
$nsCopyFlag = 0;
};
@@ -708,20 +865,26 @@ sub new {
# Pop the current declaration state.
#
my $popState = sub {
- $nsDecls = pop @nsDecls;
- $nsDefaultDecl = pop @nsDefaultDecl;
- $nsCopyFlag = pop @nsCopyFlag;
+ ($nsDecls, $nsDefaultDecl, $nsCopyFlag, $uriMap) = @{pop @stack};
};
#
# Generate a new prefix.
#
my $genPrefix = sub {
- my $prefix;
- do {
+ my $uri = $_[0];
+ my $prefixCounter = 1;
+ my $prefix = $prefixMap{$uri};
+ my %clashMap = %{$uriMap};
+ while( my ($u, $p) = each(%prefixMap)) {
+ $clashMap{$p} = $u;
+ }
+
+ while (!defined($prefix) || ($clashMap{$prefix} && $clashMap{$prefix} ne $uri)) {
$prefix = "__NS$prefixCounter";
$prefixCounter++;
- } while ($uriMap{$prefix});
+ }
+
return $prefix;
};
@@ -731,42 +894,41 @@ sub new {
my $processName = sub {
my ($nameref, $atts, $attFlag) = (@_);
my ($uri, $local) = @{$$nameref};
- my $prefix = $prefixMap{$uri};
-
- # Is this an element name that matches
- # the default NS?
- if (!$attFlag && ($uri eq $defaultPrefix)) {
- unless ($nsDefaultDecl) {
- push @{$atts}, 'xmlns';
- push @{$atts}, $uri;
- $nsDefaultDecl = 1;
+ my $prefix = $nsDecls->{$uri};
+
+ # Is this an element name that matches
+ # the default NS?
+ if (!$attFlag && $defaultPrefix && ($uri eq $defaultPrefix)) {
+ unless ($nsDefaultDecl && ($nsDefaultDecl eq $uri)) {
+ push @{$atts}, 'xmlns';
+ push @{$atts}, $uri;
+ $nsDefaultDecl = $uri;
}
$$nameref = $local;
+
+ if (defined($uriMap->{''})) {
+ delete ($nsDecls->{$uriMap->{''}});
+ }
+
+ $nsDecls->{$uri} = '';
+ unless ($nsCopyFlag) {
+ $uriMap = {%{$uriMap}};
+ $nsDecls = {%{$nsDecls}};
+ $nsCopyFlag = 1;
+ }
+ $uriMap->{''} = $uri;
- # Is there a straight-forward prefix?
+ # Is there a straight-forward prefix?
} elsif ($prefix) {
- unless ($nsDecls->{$uri}) {
- # Copy on write (FIXME: duplicated)
- unless ($nsCopyFlag) {
- $nsCopyFlag = 1;
- my %decls = (%{$nsDecls});
- $nsDecls = \%decls;
- }
- $nsDecls->{$uri} = $prefix;
- push @{$atts}, "xmlns:$prefix";
- push @{$atts}, $uri;
- }
$$nameref = "$prefix:$local";
-
} else {
- $prefix = &{$genPrefix}();
- $prefixMap{$uri} = $prefix;
- $uriMap{$prefix} = $uri;
+ $prefix = &{$genPrefix}($uri);
unless ($nsCopyFlag) {
- $nsCopyFlag = 1;
- my %decls = (%{$nsDecls});
- $nsDecls = \%decls;
+ $uriMap = {%{$uriMap}};
+ $nsDecls = {%{$nsDecls}};
+ $nsCopyFlag = 1;
}
+ $uriMap->{$prefix} = $uri;
$nsDecls->{$uri} = $prefix;
push @{$atts}, "xmlns:$prefix";
push @{$atts}, $uri;
@@ -780,17 +942,43 @@ sub new {
#
my $nsProcess = sub {
if (ref($_[0]->[0]) eq 'ARRAY') {
- &{$processName}(\$_[0]->[0], $_[0], 0);
+ my $x = \@{$_[0]->[0]};
+ &{$processName}(\$x, $_[0], 0);
+ splice(@{$_[0]}, 0, 1, $x);
}
my $i = 1;
while ($_[0]->[$i]) {
if (ref($_[0]->[$i]) eq 'ARRAY') {
- &{$processName}(\$_[0]->[$i], $_[0], 1);
+ my $x = \@{$_[0]->[$i]};
+ &{$processName}(\$x, $_[0], 1);
+ splice(@{$_[0]}, $i, 1, $x);
}
$i += 2;
}
+
+ # We do this if any declarations are forced, due either to
+ # constructor arguments or to a call during processing.
+ if (@forcedNSDecls) {
+ foreach (@forcedNSDecls) {
+ my @dummy = ($_, 'dummy');
+ my $d2 = \@dummy;
+ if ($defaultPrefix && ($_ eq $defaultPrefix)) {
+ &{$processName}(\$d2, $_[0], 0);
+ } else {
+ &{$processName}(\$d2, $_[0], 1);
+ }
+ }
+ @forcedNSDecls = ();
+ }
+ };
+
+
+ # Indicate that a namespace should be declared by the next open element
+ $self->{FORCENSDECL} = sub {
+ push @forcedNSDecls, $_[0];
};
+
#
# Start tag, with NS processing
#
@@ -824,7 +1012,17 @@ sub new {
#
$self->{ENDTAG} = sub {
my $name = $_[0];
- &{$nsProcess}(\@_);
+ if (ref($_[0]) eq 'ARRAY') {
+ my $pfx = $nsDecls->{$_[0]->[0]};
+ if ($pfx) {
+ $_[0] = $pfx . ':' . $_[0]->[1];
+ } else {
+ $_[0] = $_[0]->[1];
+ }
+ } else {
+ $_[0] = $_[0];
+ }
+# &{$nsProcess}(\@_);
&{$OLD_endTag};
&{$popState}();
};
@@ -837,8 +1035,8 @@ sub new {
my $OLD_pi = $self->{PI};
$self->{PI} = sub {
my $target = $_[0];
- if ($target =~ /:/) {
- croak "PI target '$target' contains a colon.";
+ if (index($target, ':') >= 0) {
+ croak "PI target '$target' contains a colon.";
}
&{$OLD_pi};
}
@@ -852,8 +1050,10 @@ sub new {
my ($uri, $prefix) = (@_);
if ($prefix) {
$prefixMap{$uri} = $prefix;
- $uriMap{$prefix} = $uri;
} else {
+ if (defined($defaultPrefix)) {
+ delete($prefixMap{$defaultPrefix});
+ }
$defaultPrefix = $uri;
}
};
@@ -864,7 +1064,7 @@ sub new {
#
$self->{REMOVEPREFIX} = sub {
my ($uri) = (@_);
- if ($defaultPrefix eq $uri) {
+ if ($defaultPrefix && ($defaultPrefix eq $uri)) {
$defaultPrefix = undef;
}
delete $prefixMap{$uri};
@@ -904,38 +1104,83 @@ sub _checkNSNames {
my $i = 1;
my $name = $names->[0];
- # Check the element name.
+ # Check the element name.
if (ref($name) eq 'ARRAY') {
- if ($name->[1] =~ /:/) {
+ if (index($name->[1], ':') >= 0) {
croak("Local part of element name '" .
- $name->[1] .
- "' contains a colon.");
+ $name->[1] .
+ "' contains a colon.");
}
- } elsif ($name =~ /:/) {
+ } elsif (index($name, ':') >= 0) {
croak("Element name '$name' contains a colon.");
}
- # Check the attribute names.
+ # Check the attribute names.
while ($names->[$i]) {
my $name = $names->[$i];
if (ref($name) eq 'ARRAY') {
my $local = $name->[1];
- if ($local =~ /:/) {
- croak "Local part of attribute name '$local' contains a colon.";
+ if (index($local, ':') >= 0) {
+ croak "Local part of attribute name '$local' contains a colon.";
}
} else {
- if ($name =~ /^(xmlns|.*:)/) {
- if ($name =~ /^xmlns/) {
- croak "Attribute name '$name' begins with 'xmlns'";
- } elsif ($name =~ /:/) {
- croak "Attribute name '$name' contains ':'";
- }
+ if ($name =~ /^xmlns/) {
+ croak "Attribute name '$name' begins with 'xmlns'";
+ } elsif (index($name, ':') >= 0) {
+ croak "Attribute name '$name' contains ':'";
}
}
$i += 2;
}
}
+sub forceNSDecl
+{
+ my $self = shift;
+ return &{$self->{FORCENSDECL}};
+}
+
+
+package XML::Writer::_String;
+
+# Internal class, behaving sufficiently like an IO::Handle,
+# that stores written output in a string
+#
+# Heavily inspired by Simon Oliver's XML::Writer::String
+
+sub new
+{
+ my $class = shift;
+ my $scalar_ref = shift;
+ return bless($scalar_ref, $class);
+}
+
+sub print
+{
+ ${(shift)} .= join('', @_);
+ return 1;
+}
+
+
+package XML::Writer::_PrintChecker;
+
+use Carp;
+
+sub new
+{
+ my $class = shift;
+ return bless({HANDLE => shift}, $class);
+}
+
+sub print
+{
+ my $self = shift;
+ if ($self->{HANDLE}->print(shift)) {
+ return 1;
+ } else {
+ croak "Failed to write output: $!";
+ }
+}
1;
__END__
@@ -951,7 +1196,7 @@ XML::Writer - Perl extension for writing XML documents.
=head1 SYNOPSIS
use XML::Writer;
- use IO;
+ use IO::File;
my $output = new IO::File(">output.xml");
@@ -1005,8 +1250,10 @@ Arguments are an anonymous hash array of parameters:
=item OUTPUT
An object blessed into IO::Handle or one of its subclasses (such as
-IO::File); if this parameter is not present, the module will write to
-standard output.
+IO::File), or a reference to a string; if this parameter is not present,
+the module will write to standard output. If a string reference is passed,
+it will capture the generated XML (as a string; to get bytes use the
+C<Encode> module).
=item NAMESPACES
@@ -1043,12 +1290,20 @@ generate prefixes of the form "__NS1", "__NS2", etc.
To set the default namespace, use '' for the prefix.
+=item FORCED_NS_DECLS
+
+An array reference; if this parameter is present, the document element
+will contain declarations for all the given namespace URIs.
+Declaring namespaces in advance is particularly useful when a large
+number of elements from a namespace are siblings, but don't share a direct
+ancestor from the same namespace.
+
=item NEWLINES
A true or false value; if this parameter is present and its value is
true, then the module will insert an extra newline before the closing
delimiter of start, end, and empty tags to guarantee that the document
-does not end up as a single, long line. If the paramter is not
+does not end up as a single, long line. If the parameter is not
present, the module will not insert the newlines.
=item UNSAFE
@@ -1069,9 +1324,22 @@ elements as content.
=item DATA_INDENT
-A numeric value; if this parameter is present, it represents the
+A numeric value or white space; if this parameter is present, it represents the
indent step for elements in data mode (it will be ignored when not in
-data mode).
+data mode). If it is white space it will be repeated for each level of
+indentation.
+
+=item ENCODING
+
+A character encoding; currently this must be one of 'utf-8' or 'us-ascii'.
+If present, it will be used for the underlying character encoding and as the
+default in the XML declaration.
+
+=item CHECK_PRINT
+
+A true or false value; if this parameter is present and its value is
+true, all prints to the underlying output will be checked for success. Failures
+will cause a croak rather than being ignored.
=back
@@ -1087,9 +1355,10 @@ closed:
Add an XML declaration to the beginning of an XML document. The
version will always be "1.0". If you provide a non-null encoding or
-standalone argument, its value will appear in the declaration (and
+standalone argument, its value will appear in the declaration (any
non-null value for standalone except 'no' will automatically be
-converted to 'yes').
+converted to 'yes'). If not given here, the encoding will be taken from the
+ENCODING argument. Pass the empty string to suppress this behaviour.
$writer->xmlDecl("UTF-8");
@@ -1098,7 +1367,7 @@ converted to 'yes').
Add a DOCTYPE declaration to an XML document. The declaration must
appear before the beginning of the root element. If you provide a
publicId, you must provide a systemId as well, but you may provide
-just a system ID.
+just a system ID by passing 'undef' for the publicId.
$writer->doctype("html");
@@ -1107,7 +1376,7 @@ just a system ID.
Add a comment to an XML document. If the comment appears outside the
document element (either before the first start tag or after the last
end tag), the module will add a carriage return after it to improve
-readability:
+readability. In data mode, comments will be treated as empty tags:
$writer->comment("This is a comment");
@@ -1185,6 +1454,29 @@ You may invoke this method only within the document element
In data mode, you must not use this method to add whitespace between
elements.
+=item raw($data)
+
+Print data completely unquoted and unchecked to the XML document. For
+example C<raw('<')> will print a literal < character. This
+necessarily bypasses all well-formedness checking, and is therefore
+only available in unsafe mode.
+
+This can sometimes be useful for printing entities which are defined
+for your XML format but the module doesn't know about, for example
+&nbsp; for XHTML.
+
+=item cdata($data)
+
+As C<characters()> but writes the data quoted in a CDATA section, that
+is, between <![CDATA[ and ]]>. If the data to be written itself
+contains ]]>, it will be written as several consecutive CDATA
+sections.
+
+=item cdataElement($name, $data [, $aname1 => $value1, ...])
+
+As C<dataElement()> but the element content is written as one or more
+CDATA sections (see C<cdata()>).
+
=item setOutput($output)
Set the current output destination, as in the OUTPUT parameter for the
@@ -1234,7 +1526,7 @@ Return a true value if the most recent open element matches $name:
=item within_element($name)
-Return a true value if any open elemnet matches $name:
+Return a true value if any open element matches $name:
if ($writer->within_element('body')) {
$writer->startTag('h1');
@@ -1262,8 +1554,7 @@ element.
=head2 Additional Namespace Support
-WARNING: you must not use these methods while you are writing a
-document, or the results will be unpredictable.
+As of 0.510, these methods may be used while writing a document.
=over 4
@@ -1279,8 +1570,10 @@ To set the default namespace, omit the $prefix parameter or set it to
Remove a preferred mapping between a Namespace URI and a prefix.
-To set the default namespace, omit the $prefix parameter or set it to
-''.
+=item forceNSDecl($uri)
+
+Indicate that a namespace declaration for this URI should be included
+with the next element to be started.
=back
@@ -1365,7 +1658,19 @@ providing an UNSAFE parameter:
=head1 AUTHOR
-David Megginson, david@megginson.com
+David Megginson E<lt>david@megginson.comE<gt>
+
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (c) 1999 by Megginson Technologies.
+
+Copyright (c) 2003 Ed Avis E<lt>ed@membled.comE<gt>
+
+Copyright (c) 2004-2010 Joseph Walton E<lt>joe@kafsemo.orgE<gt>
+
+Redistribution and use in source and compiled forms, with or without
+modification, are permitted under any circumstances. No warranty.
=head1 SEE ALSO