This required moving files around in the repository and shifting from a master.adoc structure to _topic_map.yml, etc. README and Makefile modified slightly to reflect new build process
1031 lines
No EOL
56 KiB
HTML
1031 lines
No EOL
56 KiB
HTML
<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta charset="utf-8">
|
|
<meta content="IE=edge" http-equiv="X-UA-Compatible">
|
|
<meta content="width=device-width, initial-scale=1.0" name="viewport">
|
|
<title>Defensive Coding Guide | Defensive Coding Guide | Specific Programming Tasks | Serialization and Deserialization</title>
|
|
|
|
<!-- Bootstrap -->
|
|
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css">
|
|
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap-theme.min.css">
|
|
|
|
<!-- Overpass Font -->
|
|
<link rel="stylesheet" href="https://overpass-30e2.kxcdn.com/overpass.css">
|
|
|
|
<link href="../../../master/_stylesheets/asciibinder.css" rel="stylesheet" />
|
|
|
|
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
|
|
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
|
|
<!--[if lt IE 9]>
|
|
<script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
|
|
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
|
|
<![endif]-->
|
|
|
|
<link href="../../../master/_images/favicon32x32.png" rel="shortcut icon" type="text/css">
|
|
<!--[if IE]><link rel="shortcut icon" href="../../../master/_images/favicon.ico"><![endif]-->
|
|
<meta content="AsciiBinder" name="application-name">
|
|
</head>
|
|
<body>
|
|
<div class="navbar navbar-default" role="navigation">
|
|
<div class="container-fluid">
|
|
<div class="navbar-header">
|
|
<a class="navbar-brand" href="https://docs.fedoraproject.org/"><img alt="Fedora Documentation" src="../../../master/_images/fedora.svg"></a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="container">
|
|
<p class="toggle-nav visible-xs pull-left">
|
|
<button class="btn btn-default btn-sm" type="button" data-toggle="offcanvas">Toggle nav</button>
|
|
</p>
|
|
<ol class="breadcrumb">
|
|
<li class="sitename">
|
|
<a href="../../../index.html">Home</a>
|
|
</li>
|
|
<li class="hidden-xs active">
|
|
<a href="../../en-US/index.html">Defensive Coding Guide </a>
|
|
</li>
|
|
<li class="hidden-xs active">
|
|
<a href="../../en-US/index.html">Defensive Coding Guide</a>
|
|
</li>
|
|
<li class="hidden-xs active"><a href="../../en-US/tasks/Tasks-Library_Design.html">Specific Programming Tasks</a></li>
|
|
<li class="hidden-xs active">
|
|
Serialization and Deserialization
|
|
</li>
|
|
</ol>
|
|
<div class="row row-offcanvas row-offcanvas-left">
|
|
<div class="col-xs-8 col-sm-3 col-md-3 sidebar sidebar-offcanvas">
|
|
<ul class="nav nav-sidebar">
|
|
<li class="nav-header">
|
|
<a class="" href="#" data-toggle="collapse" data-target="#topicGroup0">
|
|
<span id="tgSpan0" class="fa fa-angle-down"></span>Defensive Coding Guide
|
|
</a>
|
|
<ul id="topicGroup0" class="collapse in list-unstyled">
|
|
<li><a class="" href="../../en-US/index.html">Book Information</a></li>
|
|
<li class="nav-header">
|
|
<a class="" href="#" data-toggle="collapse" data-target="#topicSubGroup-0-1">
|
|
<span id="sgSpan-0-1" class="fa fa-caret-right"></span> Programming Languages
|
|
</a>
|
|
<ul id="topicSubGroup-0-1" class="nav-tertiary list-unstyled collapse">
|
|
<li><a class="" href="../../en-US/programming-languages/C.html">The C Programming Language</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/CXX.html">The C++ Programming Language</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/Java.html">The Java Programming Language</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/Python.html">The Python Programming Language</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/Shell.html">Shell Programming and bash</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/Go.html">The Go Programming Language</a></li>
|
|
<li><a class="" href="../../en-US/programming-languages/Vala.html">The Vala Programming Language</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="nav-header">
|
|
<a class="" href="#" data-toggle="collapse" data-target="#topicSubGroup-0-2">
|
|
<span id="sgSpan-0-2" class="fa fa-caret-down"></span> Specific Programming Tasks
|
|
</a>
|
|
<ul id="topicSubGroup-0-2" class="nav-tertiary list-unstyled collapse in">
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Library_Design.html">Library Design</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Descriptors.html">File Descriptor Management</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-File_System.html">File System Manipulation</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Temporary_Files.html">Temporary Files</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Processes.html">Processes</a></li>
|
|
<li><a class=" active" href="../../en-US/tasks/Tasks-Serialization.html">Serialization and Deserialization</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Cryptography.html">Cryptography</a></li>
|
|
<li><a class="" href="../../en-US/tasks/Tasks-Packaging.html">RPM Packaging</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="nav-header">
|
|
<a class="" href="#" data-toggle="collapse" data-target="#topicSubGroup-0-3">
|
|
<span id="sgSpan-0-3" class="fa fa-caret-right"></span> Implementing Security Features
|
|
</a>
|
|
<ul id="topicSubGroup-0-3" class="nav-tertiary list-unstyled collapse">
|
|
<li><a class="" href="../../en-US/features/Features-Authentication.html">Authentication and Authorization</a></li>
|
|
<li><a class="" href="../../en-US/features/Features-TLS.html">Transport Layer Security (TLS)</a></li>
|
|
<li><a class="" href="../../en-US/features/Features-HSM.html">Hardware Security Modules and Smart Cards</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a class="" href="../../en-US/Revision_History.html">Revision History</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="col-xs-12 col-sm-9 col-md-9 main">
|
|
<div class="page-header">
|
|
<h2>Serialization and Deserialization</h2>
|
|
</div>
|
|
<div id="preamble">
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>Protocol decoders and file format parsers are often the
|
|
most-exposed part of an application because they are exposed with
|
|
little or no user interaction and before any authentication and
|
|
security checks are made. They are also difficult to write
|
|
robustly in languages which are not memory-safe.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="sect-Defensive_Coding-Tasks-Serialization-Decoders"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-Decoders"></a>Recommendations for Manually-written Decoders</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>For C and C++, the advice in <a href="#sect-Defensive_Coding-C-Pointers">[sect-Defensive_Coding-C-Pointers]</a> applies. In
|
|
addition, avoid non-character pointers directly into input
|
|
buffers. Pointer misalignment causes crashes on some
|
|
architectures.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>When reading variable-sized objects, do not allocate large
|
|
amounts of data solely based on the value of a size field. If
|
|
possible, grow the data structure as more data is read from the
|
|
source, and stop when no data is available. This helps to avoid
|
|
denial-of-service attacks where little amounts of input data
|
|
results in enormous memory allocations during decoding.
|
|
Alternatively, you can impose reasonable bounds on memory
|
|
allocations, but some protocols do not permit this.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="protocol-design"><a class="anchor" href="#protocol-design"></a>Protocol Design</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>Binary formats with explicit length fields are more difficult to
|
|
parse robustly than those where the length of dynamically-sized
|
|
elements is derived from sentinel values. A protocol which does
|
|
not use length fields and can be written in printable ASCII
|
|
characters simplifies testing and debugging. However, binary
|
|
protocols with length fields may be more efficient to parse.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>In new datagram-oriented protocols, unique numbers such as
|
|
sequence numbers or identifiers for fragment reassembly (see
|
|
<a href="#sect-Defensive_Coding-Tasks-Serialization-Fragmentation">Fragmentation</a>)
|
|
should be at least 64 bits large, and really should not be
|
|
smaller than 32 bits in size. Protocols should not permit
|
|
fragments with overlapping contents.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="sect-Defensive_Coding-Tasks-Serialization-Fragmentation"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-Fragmentation"></a>Fragmentation</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>Some serialization formats use frames or protocol data units
|
|
(PDUs) on lower levels which are smaller than the PDUs on higher
|
|
levels. With such an architecture, higher-level PDUs may have
|
|
to be <strong>fragmented</strong> into smaller frames during
|
|
serialization, and frames may need
|
|
<strong>reassembly</strong> into large PDUs during
|
|
deserialization.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Serialization formats may use conceptually similar structures
|
|
for completely different purposes, for example storing multiple
|
|
layers and color channels in a single image file.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>When fragmenting PDUs, establish a reasonable lower bound for
|
|
the size of individual fragments (as large as possible—limits as
|
|
low as one or even zero can add substantial overhead). Avoid
|
|
fragmentation if at all possible, and try to obtain the maximum
|
|
acceptable fragment length from a trusted data source.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>When implementing reassembly, consider the following aspects.</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>Avoid allocating significant amount of resources without
|
|
proper authentication. Allocate memory for the unfragmented
|
|
PDU as more and more and fragments are encountered, and not
|
|
based on the initially advertised unfragmented PDU size,
|
|
unless there is a sufficiently low limit on the unfragmented
|
|
PDU size, so that over-allocation cannot lead to performance
|
|
problems.</p>
|
|
</li>
|
|
<li>
|
|
<p>Reassembly queues on top of datagram-oriented transports
|
|
should be bounded, both in the combined size of the arrived
|
|
partial PDUs waiting for reassembly, and the total number of
|
|
partially reassembled fragments. The latter limit helps to
|
|
reduce the risk of accidental reassembly of unrelated
|
|
fragments, as it can happen with small fragment IDs (see
|
|
<a href="#sect-Defensive_Coding-Tasks-Serialization-Fragmentation-ID">Fragment IDs</a>).
|
|
It also guards to some extent against deliberate injection of fragments,
|
|
by guessing fragment IDs.</p>
|
|
</li>
|
|
<li>
|
|
<p>Carefully keep track of which bytes in the unfragmented PDU
|
|
have been covered by fragments so far. If message
|
|
reordering is a concern, the most straightforward data
|
|
structure for this is an array of bits, with one bit for
|
|
every byte (or other atomic unit) in the unfragmented PDU.
|
|
Complete reassembly can be determined by increasing a
|
|
counter of set bits in the bit array as the bit array is
|
|
updated, taking overlapping fragments into consideration.</p>
|
|
</li>
|
|
<li>
|
|
<p>Reject overlapping fragments (that is, multiple fragments
|
|
which provide data at the same offset of the PDU being
|
|
fragmented), unless the protocol explicitly requires
|
|
accepting overlapping fragments. The bit array used for
|
|
tracking already arrived bytes can be used for this purpose.</p>
|
|
</li>
|
|
<li>
|
|
<p>Check for conflicting values of unfragmented PDU lengths (if
|
|
this length information is part of every fragment) and
|
|
reject fragments which are inconsistent.</p>
|
|
</li>
|
|
<li>
|
|
<p>Validate fragment lengths and offsets of individual
|
|
fragments against the unfragmented PDU length (if they are
|
|
present). Check that the last byte in the fragment does not
|
|
lie after the end of the unfragmented PDU. Avoid integer
|
|
overflows in these computations (see <a href="#sect-Defensive_Coding-C-Arithmetic">[sect-Defensive_Coding-C-Arithmetic]</a>).</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-Fragmentation-ID"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-Fragmentation-ID"></a>Fragment IDs</h3>
|
|
<div class="paragraph">
|
|
<p>If the underlying transport is datagram-oriented (so that PDUs
|
|
can be reordered, duplicated or be lost, like with UDP),
|
|
fragment reassembly needs to take into account endpoint
|
|
addresses of the communication channel, and there has to be
|
|
some sort of fragment ID which identifies the individual
|
|
fragments as part of a larger PDU. In addition, the
|
|
fragmentation protocol will typically involve fragment offsets
|
|
and fragment lengths, as mentioned above.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>If the transport may be subject to blind PDU injection (again,
|
|
like UDP), the fragment ID must be generated randomly. If the
|
|
fragment ID is 64 bit or larger (strongly recommended), it can
|
|
be generated in a completely random fashion for most traffic
|
|
volumes. If it is less than 64 bits large (so that accidental
|
|
collisions can happen if a lot of PDUs are transmitted), the
|
|
fragment ID should be incremented sequentially from a starting
|
|
value. The starting value should be derived using a HMAC-like
|
|
construction from the endpoint addresses, using a long-lived
|
|
random key. This construction ensures that despite the
|
|
limited range of the ID, accidental collisions are as unlikely
|
|
as possible. (This will not work reliable with really short
|
|
fragment IDs, such as the 16 bit IDs used by the Internet
|
|
Protocol.)</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="sect-Defensive_Coding-Tasks-Serialization-Library"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-Library"></a>Library Support for Deserialization</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>For some languages, generic libraries are available which allow
|
|
to serialize and deserialize user-defined objects. The
|
|
deserialization part comes in one of two flavors, depending on
|
|
the library. The first kind uses type information in the data
|
|
stream to control which objects are instantiated. The second
|
|
kind uses type definitions supplied by the programmer. The
|
|
first one allows arbitrary object instantiation, the second one
|
|
generally does not.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The following serialization frameworks are in the first category,
|
|
are known to be unsafe, and must not be used for untrusted data:</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>Python’s <strong class="package">pickle</strong> and <strong class="package">cPickle</strong>
|
|
modules, and wrappers such as <strong class="package">shelve</strong></p>
|
|
</li>
|
|
<li>
|
|
<p>Perl’s <strong class="package">Storable</strong> package</p>
|
|
</li>
|
|
<li>
|
|
<p>Java serialization (<code>java.io.ObjectInputStream</code>),
|
|
even if encoded in other formats (as with
|
|
<code>java.beans.XMLDecoder</code>)</p>
|
|
</li>
|
|
<li>
|
|
<p>PHP serialization (<code>unserialize</code>)</p>
|
|
</li>
|
|
<li>
|
|
<p>Most implementations of YAML</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>When using a type-directed deserialization format where the
|
|
types of the deserialized objects are specified by the
|
|
programmer, make sure that the objects which can be instantiated
|
|
cannot perform any destructive actions in their destructors,
|
|
even when the data members have been manipulated.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>In general, JSON decoders do not suffer from this problem. But
|
|
you must not use the <code>eval</code> function to parse
|
|
JSON objects in Javascript; even with the regular expression
|
|
filter from RFC 4627, there are still information leaks
|
|
remaining. JSON-based formats can still turn out risky if they
|
|
serve as an encoding form for any if the serialization
|
|
frameworks listed above.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>For serialization in C and C++ projects, the Protocol Buffers serialization
|
|
(<strong class="package">protobuf</strong>) provides type safe automated serialization
|
|
by relying on code generation. It is positioned as similar, but simpler and
|
|
more efficient to XML serialization.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="sect-Defensive_Coding-Tasks-Serialization-XML"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML"></a>XML Serialization</h2>
|
|
<div class="sectionbody">
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-External"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-External"></a>External References</h3>
|
|
<div class="paragraph">
|
|
<p>XML documents can contain external references. They can occur
|
|
in various places.</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>In the DTD declaration in the header of an XML document:</p>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="xml"><span style="color:#34b"><!DOCTYPE html PUBLIC
|
|
"-//W3C//DTD XHTML 1.0 Transitional//EN"
|
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"></span></code></pre>
|
|
</div>
|
|
</div>
|
|
</li>
|
|
<li>
|
|
<p>In a namespace declaration:</p>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="xml"><span style="color:#070;font-weight:bold"><xsd:schema</span> <span style="color:#b48">xmlns:xsd</span>=<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">http://www.w3.org/2001/XMLSchema</span><span style="color:#710">"</span></span><span style="color:#070;font-weight:bold">></span></code></pre>
|
|
</div>
|
|
</div>
|
|
</li>
|
|
<li>
|
|
<p>In an entity defintion:</p>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="xml"><span style="color:#34b"><!ENTITY sys SYSTEM "http://www.example.com/ent.adoc[]></span>
|
|
<span style="color:#34b"><!ENTITY pub PUBLIC "-//Example//Public Entity//EN"
|
|
"http://www.example.com/pub-ent.adoc[]></span></code></pre>
|
|
</div>
|
|
</div>
|
|
</li>
|
|
<li>
|
|
<p>In a notation:</p>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="xml"><span style="color:#34b"><!NOTATION not SYSTEM "../not.adoc[]></span></code></pre>
|
|
</div>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Originally, these external references were intended as unique
|
|
identifiers, but by many XML implementations, they are used
|
|
for locating the data for the referenced element. This causes
|
|
unwanted network traffic, and may disclose file system
|
|
contents or otherwise unreachable network resources, so this
|
|
functionality should be disabled.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Depending on the XML library, external referenced might be
|
|
processed not just when parsing XML, but also when generating
|
|
it.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-Entities"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-Entities"></a>Entity Expansion</h3>
|
|
<div class="paragraph">
|
|
<p>When external DTD processing is disabled, an internal DTD
|
|
subset can still contain entity definitions. Entity
|
|
declarations can reference other entities. Some XML libraries
|
|
expand entities automatically, and this processing cannot be
|
|
switched off in some places (such as attribute values or
|
|
content models). Without limits on the entity nesting level,
|
|
this expansion results in data which can grow exponentially in
|
|
length with size of the input. (If there is a limit on the
|
|
nesting level, the growth is still polynomial, unless further
|
|
limits are imposed.)</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Consequently, the processing internal DTD subsets should be
|
|
disabled if possible, and only trusted DTDs should be
|
|
processed. If a particular XML application does not permit
|
|
such restrictions, then application-specific limits are called
|
|
for.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-XInclude"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-XInclude"></a>XInclude Processing</h3>
|
|
<div class="paragraph">
|
|
<p>XInclude processing can reference file and network resources
|
|
and include them into the document, much like external entity
|
|
references. When parsing untrusted XML documents, XInclude
|
|
processing should be turned off.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>XInclude processing is also fairly complex and may pull in
|
|
support for the XPointer and XPath specifications,
|
|
considerably increasing the amount of code required for XML
|
|
processing.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-Validation"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-Validation"></a>Algorithmic Complexity of XML Validation</h3>
|
|
<div class="paragraph">
|
|
<p>DTD-based XML validation uses regular expressions for content
|
|
models. The XML specification requires that content models
|
|
are deterministic, which means that efficient validation is
|
|
possible. However, some implementations do not enforce
|
|
determinism, and require exponential (or just polynomial)
|
|
amount of space or time for validating some DTD/document
|
|
combinations.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>XML schemas and RELAX NG (via the <code>xsd:</code>
|
|
prefix) directly support textual regular expressions which are
|
|
not required to be deterministic.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-Expat"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-Expat"></a>Using Expat for XML parsing</h3>
|
|
<div class="paragraph">
|
|
<p>By default, Expat does not try to resolve external IDs, so no
|
|
steps are required to block them. However, internal entity
|
|
declarations are processed. Installing a callback which stops
|
|
parsing as soon as such entities are encountered disables
|
|
them, see <a href="#ex-Defensive_Coding-Tasks-Serialization-XML-Expat-EntityDeclHandler">Disabling XML entity processing with Expat</a>.
|
|
Expat does not perform any validation, so there are no
|
|
problems related to that.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-EntityDeclHandler" class="exampleblock">
|
|
<div class="title">Example 1. Disabling XML entity processing with Expat</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#777">// Stop the parser when an entity declaration is encountered.</span>
|
|
<span style="color:#088;font-weight:bold">static</span> <span style="color:#339;font-weight:bold">void</span>
|
|
EntityDeclHandler(<span style="color:#339;font-weight:bold">void</span> *userData,
|
|
<span style="color:#080;font-weight:bold">const</span> XML_Char *entityName, <span style="color:#339;font-weight:bold">int</span> is_parameter_entity,
|
|
<span style="color:#080;font-weight:bold">const</span> XML_Char *value, <span style="color:#339;font-weight:bold">int</span> value_length,
|
|
<span style="color:#080;font-weight:bold">const</span> XML_Char *base, <span style="color:#080;font-weight:bold">const</span> XML_Char *systemId,
|
|
<span style="color:#080;font-weight:bold">const</span> XML_Char *publicId, <span style="color:#080;font-weight:bold">const</span> XML_Char *notationName)
|
|
{
|
|
XML_StopParser((XML_Parser)userData, XML_FALSE);
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>This handler must be installed when the
|
|
<code>XML_Parser</code> object is created (<a href="#ex-Defensive_Coding-Tasks-Serialization-XML-Expat-Create">Creating an Expat XML parser</a>).</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-Create" class="exampleblock">
|
|
<div class="title">Example 2. Creating an Expat XML parser</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java">XML_Parser parser = XML_ParserCreate(<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">UTF-8</span><span style="color:#710">"</span></span>);
|
|
<span style="color:#080;font-weight:bold">if</span> (parser == NULL) {
|
|
fprintf(stderr, <span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">XML_ParserCreate failed</span><span style="color:#b0b">\n</span><span style="color:#710">"</span></span>);
|
|
close(fd);
|
|
exit(<span style="color:#00D">1</span>);
|
|
}
|
|
<span style="color:#777">// EntityDeclHandler needs a reference to the parser to stop</span>
|
|
<span style="color:#777">// parsing.</span>
|
|
XML_SetUserData(parser, parser);
|
|
<span style="color:#777">// Disable entity processing, to inhibit entity expansion.</span>
|
|
XML_SetEntityDeclHandler(parser, EntityDeclHandler);</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>It is also possible to reject internal DTD subsets altogether,
|
|
using a suitable
|
|
<code>XML_StartDoctypeDeclHandler</code> handler
|
|
installed with <code>XML_SetDoctypeDeclHandler</code>.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-Qt"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-Qt"></a>Using Qt for XML Parsing</h3>
|
|
<div class="paragraph">
|
|
<p>The XML component of Qt, QtXml, does not resolve external IDs
|
|
by default, so it is not required to prevent such resolution.
|
|
Internal entities are processed, though. To change that, a
|
|
custom <code>QXmlDeclHandler</code> and
|
|
<code>QXmlSimpleReader</code> subclasses are needed. It
|
|
is not possible to use the
|
|
<code>QDomDocument::setContent(const QByteArray
|
|
&)</code> convenience methods.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><a href="#ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityHandler">A QtXml entity handler which blocks entity processing</a>
|
|
shows an entity handler which always returns errors, causing
|
|
parsing to stop when encountering entity declarations.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityHandler" class="exampleblock">
|
|
<div class="title">Example 3. A QtXml entity handler which blocks entity processing</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#339;font-weight:bold">class</span> <span style="color:#B06;font-weight:bold">NoEntityHandler</span> : <span style="color:#088;font-weight:bold">public</span> QXmlDeclHandler {
|
|
<span style="color:#088;font-weight:bold">public</span>:
|
|
bool attributeDecl(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&,
|
|
<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&);
|
|
bool internalEntityDecl(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&);
|
|
bool externalEntityDecl(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&,
|
|
<span style="color:#080;font-weight:bold">const</span> QString&);
|
|
QString errorString() <span style="color:#080;font-weight:bold">const</span>;
|
|
};
|
|
|
|
bool
|
|
NoEntityHandler::attributeDecl
|
|
(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&,
|
|
<span style="color:#080;font-weight:bold">const</span> QString&)
|
|
{
|
|
<span style="color:#080;font-weight:bold">return</span> <span style="color:#069">false</span>;
|
|
}
|
|
|
|
bool
|
|
NoEntityHandler::internalEntityDecl(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&)
|
|
{
|
|
<span style="color:#080;font-weight:bold">return</span> <span style="color:#069">false</span>;
|
|
}
|
|
|
|
bool
|
|
NoEntityHandler::externalEntityDecl(<span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span> QString&, <span style="color:#080;font-weight:bold">const</span>
|
|
QString&)
|
|
{
|
|
<span style="color:#080;font-weight:bold">return</span> <span style="color:#069">false</span>;
|
|
}
|
|
|
|
QString
|
|
NoEntityHandler::errorString() <span style="color:#080;font-weight:bold">const</span>
|
|
{
|
|
<span style="color:#080;font-weight:bold">return</span> <span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">XML declaration not permitted</span><span style="color:#710">"</span></span>;
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>This handler is used in the custom
|
|
<code>QXmlReader</code> subclass in <a href="#ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityReader">A QtXml XML reader which blocks entity processing</a>.
|
|
Some parts of QtXml will call the
|
|
<code>setDeclHandler(QXmlDeclHandler *)</code> method.
|
|
Consequently, we prevent overriding our custom handler by
|
|
providing a definition of this method which does nothing. In
|
|
the constructor, we activate namespace processing; this part
|
|
may need adjusting.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityReader" class="exampleblock">
|
|
<div class="title">Example 4. A QtXml XML reader which blocks entity processing</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#339;font-weight:bold">class</span> <span style="color:#B06;font-weight:bold">NoEntityReader</span> : <span style="color:#088;font-weight:bold">public</span> QXmlSimpleReader {
|
|
NoEntityHandler handler;
|
|
<span style="color:#088;font-weight:bold">public</span>:
|
|
NoEntityReader();
|
|
<span style="color:#339;font-weight:bold">void</span> setDeclHandler(QXmlDeclHandler *);
|
|
};
|
|
|
|
NoEntityReader::NoEntityReader()
|
|
{
|
|
QXmlSimpleReader::setDeclHandler(&handler);
|
|
setFeature(<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">http://xml.org/sax/features/namespaces</span><span style="color:#710">"</span></span>, <span style="color:#069">true</span>);
|
|
setFeature(<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">http://xml.org/sax/features/namespace-prefixes</span><span style="color:#710">"</span></span>, <span style="color:#069">false</span>);
|
|
}
|
|
|
|
<span style="color:#339;font-weight:bold">void</span>
|
|
NoEntityReader::setDeclHandler(QXmlDeclHandler *)
|
|
{
|
|
<span style="color:#777">// Ignore the handler which was passed in.</span>
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Our <code>NoEntityReader</code> class can be used with
|
|
one of the overloaded
|
|
<code>QDomDocument::setContent</code> methods.
|
|
<a href="#ex-Defensive_Coding-Tasks-Serialization-XML-Qt-QDomDocument">Parsing an XML document with QDomDocument, without entity expansion</a>
|
|
shows how the <code>buffer</code> object (of type
|
|
<code>QByteArray</code>) is wrapped as a
|
|
<code>QXmlInputSource</code>. After calling the
|
|
<code>setContent</code> method, you should check the
|
|
return value and report any error.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-QDomDocument" class="exampleblock">
|
|
<div class="title">Example 5. Parsing an XML document with QDomDocument, without entity expansion</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java">NoEntityReader reader;
|
|
QBuffer buffer(&data);
|
|
buffer.open(QIODevice::ReadOnly);
|
|
QXmlInputSource source(&buffer);
|
|
QDomDocument doc;
|
|
QString errorMsg;
|
|
<span style="color:#339;font-weight:bold">int</span> errorLine;
|
|
<span style="color:#339;font-weight:bold">int</span> errorColumn;
|
|
bool okay = doc.setContent
|
|
(&source, &reader, &errorMsg, &errorLine, &errorColumn);</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse"></a>Using OpenJDK for XML Parsing and Validation</h3>
|
|
<div class="paragraph">
|
|
<p>OpenJDK contains facilities for DOM-based, SAX-based, and
|
|
StAX-based document parsing. Documents can be validated
|
|
against DTDs or XML schemas.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The approach taken to deal with entity expansion differs from
|
|
the general recommendation in <a href="#sect-Defensive_Coding-Tasks-Serialization-XML-Entities">Entity Expansion</a>.
|
|
We enable the the feature flag
|
|
<code>javax.xml.XMLConstants.FEATURE_SECURE_PROCESSING</code>,
|
|
which enforces heuristic restrictions on the number of entity
|
|
expansions. Note that this flag alone does not prevent
|
|
resolution of external references (system IDs or public IDs),
|
|
so it is slightly misnamed.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>In the following sections, we use helper classes to prevent
|
|
external ID resolution.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoEntityResolver" class="exampleblock">
|
|
<div class="title">Example 6. Helper class to prevent DTD external entity resolution in OpenJDK</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#339;font-weight:bold">class</span> <span style="color:#B06;font-weight:bold">NoEntityResolver</span> <span style="color:#088;font-weight:bold">implements</span> EntityResolver {
|
|
<span style="color:#007">@Override</span>
|
|
<span style="color:#088;font-weight:bold">public</span> InputSource resolveEntity(<span style="color:#0a8;font-weight:bold">String</span> publicId, <span style="color:#0a8;font-weight:bold">String</span> systemId)
|
|
<span style="color:#088;font-weight:bold">throws</span> SAXException, <span style="color:#C00;font-weight:bold">IOException</span> {
|
|
<span style="color:#777">// Throwing an exception stops validation.</span>
|
|
<span style="color:#080;font-weight:bold">throw</span> <span style="color:#080;font-weight:bold">new</span> <span style="color:#C00;font-weight:bold">IOException</span>(<span style="color:#0a8;font-weight:bold">String</span>.format(
|
|
<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">attempt to resolve </span><span style="color:#b0b">\"</span><span style="color:#D20">%s</span><span style="color:#b0b">\"</span><span style="color:#D20"> </span><span style="color:#b0b">\"</span><span style="color:#D20">%s</span><span style="color:#b0b">\"</span><span style="color:#710">"</span></span>, publicId, systemId));
|
|
}
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoResourceResolver" class="exampleblock">
|
|
<div class="title">Example 7. Helper class to prevent schema resolution in OpenJDK</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#339;font-weight:bold">class</span> <span style="color:#B06;font-weight:bold">NoResourceResolver</span> <span style="color:#088;font-weight:bold">implements</span> LSResourceResolver {
|
|
<span style="color:#007">@Override</span>
|
|
<span style="color:#088;font-weight:bold">public</span> LSInput resolveResource(<span style="color:#0a8;font-weight:bold">String</span> type, <span style="color:#0a8;font-weight:bold">String</span> namespaceURI,
|
|
<span style="color:#0a8;font-weight:bold">String</span> publicId, <span style="color:#0a8;font-weight:bold">String</span> systemId, <span style="color:#0a8;font-weight:bold">String</span> baseURI) {
|
|
<span style="color:#777">// Throwing an exception stops validation.</span>
|
|
<span style="color:#080;font-weight:bold">throw</span> <span style="color:#080;font-weight:bold">new</span> <span style="color:#C00;font-weight:bold">RuntimeException</span>(<span style="color:#0a8;font-weight:bold">String</span>.format(
|
|
<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">resolution attempt: type=%s namespace=%s </span><span style="color:#710">"</span></span> +
|
|
<span style="background-color:hsla(0,100%,50%,0.05)"><span style="color:#710">"</span><span style="color:#D20">publicId=%s systemId=%s baseURI=%s</span><span style="color:#710">"</span></span>,
|
|
type, namespaceURI, publicId, systemId, baseURI));
|
|
}
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-Imports">Java imports for OpenJDK XML parsing</a>
|
|
shows the imports used by the examples.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-Imports" class="exampleblock">
|
|
<div class="title">Example 8. Java imports for OpenJDK XML parsing</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.XMLConstants</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.parsers.DocumentBuilder</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.parsers.DocumentBuilderFactory</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.parsers.ParserConfigurationException</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.parsers.SAXParser</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.parsers.SAXParserFactory</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.transform.dom.DOMSource</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.transform.sax.SAXSource</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.validation.Schema</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.validation.SchemaFactory</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">javax.xml.validation.Validator</span>;
|
|
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.w3c.dom.Document</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.w3c.dom.ls.LSInput</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.w3c.dom.ls.LSResourceResolver</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.EntityResolver</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.ErrorHandler</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.InputSource</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.SAXException</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.SAXParseException</span>;
|
|
<span style="color:#080;font-weight:bold">import</span> <span style="color:#B44;font-weight:bold">org.xml.sax.XMLReader</span>;</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM"></a>DOM-based XML parsing and DTD validation in OpenJDK</h4>
|
|
<div class="paragraph">
|
|
<p>This approach produces a
|
|
<code>org.w3c.dom.Document</code> object from an input
|
|
stream. <a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM">DOM-based XML parsing in OpenJDK</a>
|
|
use the data from the <code>java.io.InputStream</code>
|
|
instance in the <code>inputStream</code> variable.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM" class="exampleblock">
|
|
<div class="title">Example 9. DOM-based XML parsing in OpenJDK</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#0a8;font-weight:bold">DocumentBuilderFactory</span> factory = <span style="color:#0a8;font-weight:bold">DocumentBuilderFactory</span>.newInstance();
|
|
<span style="color:#777">// Impose restrictions on the complexity of the DTD.</span>
|
|
factory.setFeature(<span style="color:#0a8;font-weight:bold">XMLConstants</span>.FEATURE_SECURE_PROCESSING, <span style="color:#069">true</span>);
|
|
|
|
<span style="color:#777">// Turn on validation.</span>
|
|
<span style="color:#777">// This step can be omitted if validation is not desired.</span>
|
|
factory.setValidating(<span style="color:#069">true</span>);
|
|
|
|
<span style="color:#777">// Parse the document.</span>
|
|
<span style="color:#0a8;font-weight:bold">DocumentBuilder</span> builder = factory.newDocumentBuilder();
|
|
builder.setEntityResolver(<span style="color:#080;font-weight:bold">new</span> NoEntityResolver());
|
|
builder.setErrorHandler(<span style="color:#080;font-weight:bold">new</span> Errors());
|
|
<span style="color:#0a8;font-weight:bold">Document</span> document = builder.parse(inputStream);</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>External entity references are prohibited using the
|
|
<code>NoEntityResolver</code> class in
|
|
<a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoEntityResolver">Helper class to prevent DTD external entity resolution in OpenJDK</a>.
|
|
Because external DTD references are prohibited, DTD validation
|
|
(if enabled) will only happen against the internal DTD subset
|
|
embedded in the XML document.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>To validate the document against an external DTD, use a
|
|
<code>javax.xml.transform.Transformer</code> class to
|
|
add the DTD reference to the document, and an entity
|
|
resolver which whitelists this external reference.</p>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-SAX"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-SAX"></a>XML Schema Validation in OpenJDK</h4>
|
|
<div class="paragraph">
|
|
<p><a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_SAX">SAX-based validation against an XML schema in OpenJDK</a>
|
|
shows how to validate a document against an XML Schema,
|
|
using a SAX-based approach. The XML data is read from an
|
|
<code>java.io.InputStream</code> in the
|
|
<code>inputStream</code> variable.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_SAX" class="exampleblock">
|
|
<div class="title">Example 10. SAX-based validation against an XML schema in OpenJDK</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#0a8;font-weight:bold">SchemaFactory</span> factory = <span style="color:#0a8;font-weight:bold">SchemaFactory</span>.newInstance(
|
|
<span style="color:#0a8;font-weight:bold">XMLConstants</span>.W3C_XML_SCHEMA_NS_URI);
|
|
|
|
<span style="color:#777">// This enables restrictions on the schema and document</span>
|
|
<span style="color:#777">// complexity.</span>
|
|
factory.setFeature(<span style="color:#0a8;font-weight:bold">XMLConstants</span>.FEATURE_SECURE_PROCESSING, <span style="color:#069">true</span>);
|
|
|
|
<span style="color:#777">// This prevents resource resolution by the schema itself.</span>
|
|
<span style="color:#777">// If the schema is trusted and references additional files,</span>
|
|
<span style="color:#777">// this line must be omitted, otherwise loading these files</span>
|
|
<span style="color:#777">// will fail.</span>
|
|
factory.setResourceResolver(<span style="color:#080;font-weight:bold">new</span> NoResourceResolver());
|
|
|
|
<span style="color:#0a8;font-weight:bold">Schema</span> schema = factory.newSchema(schemaFile);
|
|
<span style="color:#0a8;font-weight:bold">Validator</span> validator = schema.newValidator();
|
|
|
|
<span style="color:#777">// This prevents external resource resolution.</span>
|
|
validator.setResourceResolver(<span style="color:#080;font-weight:bold">new</span> NoResourceResolver());
|
|
|
|
validator.validate(<span style="color:#080;font-weight:bold">new</span> <span style="color:#0a8;font-weight:bold">SAXSource</span>(<span style="color:#080;font-weight:bold">new</span> InputSource(inputStream)));</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The <code>NoResourceResolver</code> class is defined
|
|
in <a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoResourceResolver">Helper class to prevent schema resolution in OpenJDK</a>.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>If you need to validate a document against an XML schema,
|
|
use the code in <a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM">DOM-based XML parsing in OpenJDK</a>
|
|
to create the document, but do not enable validation at this
|
|
point. Then use
|
|
<a href="#ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_DOM">Validation of a DOM document against an XML schema in OpenJDK</a>
|
|
to perform the schema-based validation on the
|
|
<code>org.w3c.dom.Document</code> instance
|
|
<code>document</code>.</p>
|
|
</div>
|
|
<div id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_DOM" class="exampleblock">
|
|
<div class="title">Example 11. Validation of a DOM document against an XML schema in OpenJDK</div>
|
|
<div class="content">
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="CodeRay highlight"><code data-lang="java"><span style="color:#0a8;font-weight:bold">SchemaFactory</span> factory = <span style="color:#0a8;font-weight:bold">SchemaFactory</span>.newInstance(
|
|
<span style="color:#0a8;font-weight:bold">XMLConstants</span>.W3C_XML_SCHEMA_NS_URI);
|
|
|
|
<span style="color:#777">// This enables restrictions on schema complexity.</span>
|
|
factory.setFeature(<span style="color:#0a8;font-weight:bold">XMLConstants</span>.FEATURE_SECURE_PROCESSING, <span style="color:#069">true</span>);
|
|
|
|
<span style="color:#777">// The following line prevents resource resolution</span>
|
|
<span style="color:#777">// by the schema itself.</span>
|
|
factory.setResourceResolver(<span style="color:#080;font-weight:bold">new</span> NoResourceResolver());
|
|
|
|
<span style="color:#0a8;font-weight:bold">Schema</span> schema = factory.newSchema(schemaFile);
|
|
|
|
<span style="color:#0a8;font-weight:bold">Validator</span> validator = schema.newValidator();
|
|
|
|
<span style="color:#777">// This prevents external resource resolution.</span>
|
|
validator.setResourceResolver(<span style="color:#080;font-weight:bold">new</span> NoResourceResolver());
|
|
validator.validate(<span style="color:#080;font-weight:bold">new</span> <span style="color:#0a8;font-weight:bold">DOMSource</span>(document));</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-Other"><a class="anchor" href="#sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-Other"></a>Other XML Parsers in OpenJDK</h4>
|
|
<div class="paragraph">
|
|
<p>OpenJDK contains additional XML parsing and processing
|
|
facilities. Some of them are insecure.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The class <code>java.beans.XMLDecoder</code> acts as a
|
|
bridge between the Java object serialization format and XML.
|
|
It is close to impossible to securely deserialize Java
|
|
objects in this format from untrusted inputs, so its use is
|
|
not recommended, as with the Java object serialization
|
|
format itself. See <a href="#sect-Defensive_Coding-Tasks-Serialization-Library">Library Support for Deserialization</a>.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect1">
|
|
<h2 id="protocol-encoders"><a class="anchor" href="#protocol-encoders"></a>Protocol Encoders</h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>For protocol encoders, you should write bytes to a buffer which
|
|
grows as needed, using an exponential sizing policy. Explicit
|
|
lengths can be patched in later, once they are known.
|
|
Allocating the required number of bytes upfront typically
|
|
requires separate code to compute the final size, which must be
|
|
kept in sync with the actual encoding step, or vulnerabilities
|
|
may result. In multi-threaded code, parts of the object being
|
|
deserialized might change, so that the computed size is out of
|
|
date.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>You should avoid copying data directly from a received packet
|
|
during encoding, disregarding the format. Propagating malformed
|
|
data could enable attacks on other recipients of that data.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>When using C or C++ and copying whole data structures directly
|
|
into the output, make sure that you do not leak information in
|
|
padding bytes between fields or at the end of the
|
|
<code>struct</code>.</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div id="bottom" class="text-muted py-3" >
|
|
<div class="foot">
|
|
<div class="container">
|
|
<div class="row footerlinks">
|
|
<div class="col-sm-3 col-xs-6 widget">
|
|
<h3 class="widget-title">About</h3>
|
|
<div class="widget-body">
|
|
<dl>
|
|
<dd><a href="https://fedoraproject.org/wiki/Overview">About Fedora</a></dd>
|
|
<dd><a href="https://getfedora.org/en/sponsors">Sponsors</a></dd>
|
|
<dd><a href="https://fedoramagazine.org">Fedora Magazine</a></dd>
|
|
<dd><a href="https://fedoraproject.org/wiki/Legal:Main#Legal">Legal</a></dd>
|
|
</dl>
|
|
<ul class="list-inline">
|
|
<li>
|
|
<a href="https://www.facebook.com/TheFedoraProject" class="btn-social btn-outline"><i class="fa fa-fw fa-facebook"></i></a>
|
|
</li>
|
|
<li>
|
|
<a href="https://plus.google.com/112917221531140868607" class="btn-social btn-outline"><i class="fa fa-fw fa-google-plus"></i></a>
|
|
</li>
|
|
<li>
|
|
<a href="https://twitter.com/fedora" class="btn-social btn-outline"><i class="fa fa-fw fa-twitter"></i></a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-sm-3 col-xs-6 widget">
|
|
<h3 class="widget-title uppercase">Download</h3>
|
|
<div class="widget-body">
|
|
<dl>
|
|
<dd><a href="https://getfedora.org/en/workstation/download">Get Fedora Workstation</a></dd>
|
|
<dd><a href="https://getfedora.org/en/server/download">Get Fedora Server</a></dd>
|
|
<dd><a href="https://getfedora.org/en/atomic/download">Get Fedora Atomic</a></dd>
|
|
<dd><a href="https://spins.fedoraproject.org">Fedora Spins</a></dd>
|
|
<dd><a href="https://labs.fedoraproject.org">Fedora Labs</a></dd>
|
|
<dd><a href="https://arm.fedoraproject.org">Fedora ARM<span class="sup">®</span></a></dd>
|
|
<dd><a href="https://alt.fedoraproject.org/">Alternative Downloads</a></dd>
|
|
|
|
</dl>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-sm-3 col-xs-6 widget">
|
|
<h3 class="widget-title">Support</h3>
|
|
<div class="widget-body">
|
|
<dl>
|
|
<dd><a href="https://fedoraproject.org/wiki/Communicating_and_getting_help">Get Help</a></dd>
|
|
<dd><a href="https://ask.fedoraproject.org/">Ask Fedora</a></dd>
|
|
<dd><a href="https://fedoraproject.org/wiki/Common_F27_bugs">Common Bugs</a></dd>
|
|
<dd><a href="https://developer.fedoraproject.org/">Fedora Developer Portal</a></dd>
|
|
<dd><a href="https://docs.fedoraproject.org/f27/install-guide/index.html">Installation Guide</a></dd>
|
|
</dl>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="col-sm-3 col-xs-6 widget">
|
|
<h3 class="widget-title">Join</h3>
|
|
<div class="widget-body">
|
|
<dl>
|
|
<dd><a href="https://fedoraproject.org/wiki/Join">Join Fedora</a></dd>
|
|
<dd><a href="http://fedoraplanet.org">Planet Fedora</a></dd>
|
|
<dd><a href="https://fedoraproject.org/wiki/SIGs">Fedora SIGs</a></dd>
|
|
<dd><a href="https://admin.fedoraproject.org/accounts/">Fedora Account System</a></dd>
|
|
<dd><a href="https://fedoracommunity.org/">Fedora Community</a></dd>
|
|
</dl>
|
|
</div>
|
|
</div>
|
|
</div> <!-- /row of widgets -->
|
|
|
|
<div class="row">
|
|
<div class="col-md-2">
|
|
<div class="widget-body">
|
|
<a href="https://www.redhat.com/"><img class="rh-logo" src="../../../master/_images/redhat-logo.png" alt="Red Hat Logo" /></a>
|
|
</div>
|
|
</div>
|
|
<div class="col-md-7">
|
|
<div class="widget-body">
|
|
<p class="sponsor">Fedora is sponsored by Red Hat.</p>
|
|
<p class="sponsor"><a href="https://www.redhat.com/en/technologies/linux-platforms/articles/relationship-between-fedora-and-rhel">Learn more about the relationship between Red Hat and Fedora »</a></p>
|
|
<p class="copy">© 2017 Red Hat, Inc. and others. Please send any comments or corrections to the <a href="https://pagure.io/fedora-docs/docs-fp-o">documentation team</a></p>
|
|
</div>
|
|
</div>
|
|
</div> <!-- /row of widgets -->
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<!-- jQuery (necessary for Bootstrap's JavaScript plugins) -->
|
|
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
|
|
<!-- Latest compiled and minified JavaScript -->
|
|
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js"></script>
|
|
<script src="../../../master/_javascripts/bootstrap-offcanvas.js" type="text/javascript"></script>
|
|
<script type="text/javascript">
|
|
/*<![CDATA[*/
|
|
$(document).ready(function() {
|
|
$("[id^='topicGroup']").on('show.bs.collapse', function (event) {
|
|
if (!($(event.target).attr('id').match(/^topicSubGroup/))) {
|
|
$(this).parent().find("[id^='tgSpan']").toggleClass("fa-angle-right fa-angle-down");
|
|
}
|
|
});
|
|
$("[id^='topicGroup']").on('hide.bs.collapse', function (event) {
|
|
if (!($(event.target).attr('id').match(/^topicSubGroup/))) {
|
|
$(this).parent().find("[id^='tgSpan']").toggleClass("fa-angle-right fa-angle-down");
|
|
}
|
|
});
|
|
$("[id^='topicSubGroup']").on('show.bs.collapse', function () {
|
|
$(this).parent().find("[id^='sgSpan']").toggleClass("fa-caret-right fa-caret-down");
|
|
});
|
|
$("[id^='topicSubGroup']").on('hide.bs.collapse', function () {
|
|
$(this).parent().find("[id^='sgSpan']").toggleClass("fa-caret-right fa-caret-down");
|
|
});
|
|
});
|
|
/*]]>*/
|
|
</script>
|
|
</body>
|
|
</html> |