Converted to AsciiDoc

2017-12-01 17:09:28 +01:00 · 2017-12-01 17:09:28 +01:00 · e836ecfd7c
commit e836ecfd7c
parent ada1aca49b
45 changed files with 0 additions and 21077 deletions
--- a/docbook-schema/README
+++ b/docbook-schema/README
@ -1,3 +0,0 @@
 This directory contains Relax NG Compact schema files, for use with
 Emacs.  These files were download from http://www.docbook.org/rng/4.5/
 on 2012-07-16.
--- a/docbook-schema/calstblx.rnc
+++ b/docbook-schema/calstblx.rnc
@ -1,164 +0,0 @@
 # ......................................................................
 # DocBook CALS Table Model V4.5 ........................................
 # File calstblx.mod ....................................................
 # Copyright 1992-2002 HaL Computer Systems, Inc.,
 # O'Reilly & Associates, Inc., ArborText, Inc., Fujitsu Software
 # Corporation, Norman Walsh, Sun Microsystems, Inc., and the
 # Organization for the Advancement of Structured Information
 # Standards (OASIS).
 # 
 # This DTD is based on the CALS Table Model
 # PUBLIC "-//USA-DOD//DTD Table Model 951010//EN"
 # 
 # $Id: calstblx.dtd 6340 2006-10-03 13:23:24Z nwalsh $
 # 
 # Permission to use, copy, modify and distribute the DocBook DTD
 # and its accompanying documentation for any purpose and without fee
 # is hereby granted in perpetuity, provided that the above copyright
 # notice and this paragraph appear in all copies.  The copyright
 # holders make no representation about the suitability of the DTD for
 # any purpose.  It is provided "as is" without expressed or implied
 # warranty.
 # 
 # If you modify the DocBook DTD in any way, except for declaring and
 # referencing additional sets of general entities and declaring
 # additional notations, label your DTD as a variant of DocBook.  See
 # the maintenance documentation for more information.
 # 
 # Please direct all questions, bug reports, or suggestions for
 # changes to the docbook@lists.oasis-open.org mailing list. For more
 # information, see http://www.oasis-open.org/docbook/.
 # ......................................................................
 # This module contains the definitions for the CALS Table Model
 # converted to XML. It has been modified slightly for use in the
 # combined HTML/CALS models supported by DocBook V4.5.
 # These definitions are not directly related to the table model, but are
 # used in the default CALS table model and are usually defined elsewhere
 # (and prior to the inclusion of this table module) in a CALS DTD.
 # no if zero(s),
 # yes if any other digits value
 yesorno = string
 titles = title?
 # default for use in entry content
 # The parameter entities as defined below provide the CALS table model
 # as published (as part of the Example DTD) in MIL-HDBK-28001.
 # 
 # These following declarations provide the CALS-compliant default definitions
 # for these entities.  However, these entities can and should be redefined
 # (by giving the appropriate parameter entity declaration(s) prior to the
 # reference to this Table Model declaration set entity) to fit the needs
 # of the current application.
 tbl.table-titles.mdl = titles
 tbl.table-main.mdl = tgroup+ | graphic+
 tbl.tgroup.mdl = colspec*, spanspec*, thead?, tfoot?, tbody
 tbl.tgroup.att = attribute tgroupstyle { text }?
 tbl.row.mdl = (entry | entrytbl)+
 tbl.entrytbl.mdl = colspec*, spanspec*, thead?, tbody
 # =====  Element and attribute declarations follow. =====
 # doc:A formal table in a document.
 table = element table { table.attlist, tbl.table.mdl }
 table.attlist &=
  attribute frame { tbl.frame.attval }?,
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  tbl.table.att,
  bodyatt,
  secur
 # doc:A wrapper for the main content of a table, or part of a table.
 tgroup = element tgroup { tgroup.attlist, tbl.tgroup.mdl }
 tgroup.attlist &=
  attribute cols { text },
  tbl.tgroup.att,
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  attribute align { "left" | "right" | "center" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?,
  secur
 # doc:Specifications for a column in a table.
 colspec = element colspec { colspec.attlist, empty }
 colspec.attlist &=
  attribute colnum { text }?,
  attribute colname { text }?,
  attribute colwidth { text }?,
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  attribute align { "left" | "right" | "center" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?
 # doc:Formatting information for a spanned column in a table.
 spanspec = element spanspec { spanspec.attlist, empty }
 spanspec.attlist &=
  attribute namest { text },
  attribute nameend { text },
  attribute spanname { text },
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  attribute align { "left" | "right" | "center" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?
 # doc:A table header consisting of one or more rows.
 thead = element thead { thead.attlist, tbl.hdft.mdl }
 thead.attlist &=
  attribute valign { "top" | "middle" | "bottom" }?,
  secur
 # doc:A table footer consisting of one or more rows.
 tfoot = element tfoot { tfoot.attlist, tbl.hdft.mdl }
 tfoot.attlist &=
  attribute valign { "top" | "middle" | "bottom" }?,
  secur
 # doc:A wrapper for the rows of a table or informal table.
 tbody = element tbody { tbody.attlist, tbl.tbody.mdl }
 tbody.attlist &=
  attribute valign { "top" | "middle" | "bottom" }?,
  secur
 # doc:A row in a table.
 row = element row { row.attlist, tbl.row.mdl }
 row.attlist &=
  attribute rowsep { yesorno }?,
  attribute valign { "top" | "middle" | "bottom" }?,
  secur
 # doc:A subtable appearing in place of an Entry in a table.
 entrytbl = element entrytbl { entrytbl.attlist, tbl.entrytbl.mdl }
 entrytbl.attlist &=
  attribute cols { text },
  tbl.tgroup.att,
  attribute colname { text }?,
  attribute spanname { text }?,
  attribute namest { text }?,
  attribute nameend { text }?,
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  attribute align { "left" | "right" | "center" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?,
  secur
 # doc:A cell in a table.
 entry = element entry { entry.attlist, tbl.entry.mdl* }
 entry.attlist &=
  attribute colname { text }?,
  attribute namest { text }?,
  attribute nameend { text }?,
  attribute spanname { text }?,
  attribute morerows { text }?,
  attribute colsep { yesorno }?,
  attribute rowsep { yesorno }?,
  attribute align { "left" | "right" | "center" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?,
  attribute rotate { yesorno }?,
  attribute valign { "top" | "middle" | "bottom" }?,
  secur
 # End of DocBook CALS Table Model V4.5 .................................
 # ......................................................................
--- a/docbook-schema/dbhierx.rnc
+++ b/docbook-schema/dbhierx.rnc
--- a/docbook-schema/dbnotnx.rnc
+++ b/docbook-schema/dbnotnx.rnc
@ -1,85 +0,0 @@
 # ......................................................................
 # DocBook notations module V4.5 ........................................
 # File dbnotnx.mod .....................................................
 # Copyright 1992-2004 HaL Computer Systems, Inc.,
 # O'Reilly & Associates, Inc., ArborText, Inc., Fujitsu Software
 # Corporation, Norman Walsh, Sun Microsystems, Inc., and the
 # Organization for the Advancement of Structured Information
 # Standards (OASIS).
 # 
 # $Id: dbnotnx.mod 6340 2006-10-03 13:23:24Z nwalsh $
 # 
 # Permission to use, copy, modify and distribute the DocBook DTD
 # and its accompanying documentation for any purpose and without fee
 # is hereby granted in perpetuity, provided that the above copyright
 # notice and this paragraph appear in all copies.  The copyright
 # holders make no representation about the suitability of the DTD for
 # any purpose.  It is provided "as is" without expressed or implied
 # warranty.
 # 
 # If you modify the DocBook DTD in any way, except for declaring and
 # referencing additional sets of general entities and declaring
 # additional notations, label your DTD as a variant of DocBook.  See
 # the maintenance documentation for more information.
 # 
 # Please direct all questions, bug reports, or suggestions for
 # changes to the docbook@lists.oasis-open.org mailing list. For more
 # information, see http://www.oasis-open.org/docbook/.
 # ......................................................................
 # This module contains the notation declarations used by DocBook.
 # 
 # In DTD driver files referring to this module, please use an entity
 # declaration that uses the public identifier shown below:
 # 
 # <!ENTITY % dbnotn PUBLIC
 # "-//OASIS//ENTITIES DocBook Notations V4.5//EN"
 # "dbnotnx.mod">
 # %dbnotn;
 # 
 # See the documentation for detailed information on the parameter
 # entity and module scheme used in DocBook, customizing DocBook and
 # planning for interchange, and changes made since the last release
 # of DocBook.
 local.notation.class = notAllowed
 notation.class =
  "BMP"
  | "CGM-CHAR"
  | "CGM-BINARY"
  | "CGM-CLEAR"
  | "DITROFF"
  | "DVI"
  | "EPS"
  | "EQN"
  | "FAX"
  | "GIF"
  | "GIF87a"
  | "GIF89a"
  | "JPG"
  | "JPEG"
  | "IGES"
  | "PCX"
  | "PIC"
  | "PNG"
  | "PS"
  | "SGML"
  | "TBL"
  | "TEX"
  | "TIFF"
  | "WMF"
  | "WPG"
  | "SVG"
  | "PDF"
  | "SWF"
  | "linespecific"
  | local.notation.class
 # WordPerfect Graphic format
 # End of DocBook notations module V4.5 .................................
 # ......................................................................
--- a/docbook-schema/dbpoolx.rnc
+++ b/docbook-schema/dbpoolx.rnc
--- a/docbook-schema/docbook.rnc
+++ b/docbook-schema/docbook.rnc
@ -1,499 +0,0 @@
 # ......................................................................
 # DocBook XML DTD V4.5 .................................................
 # File docbookx.dtd ....................................................
 # Copyright 1992-2006 HaL Computer Systems, Inc.,
 # O'Reilly & Associates, Inc., ArborText, Inc., Fujitsu Software
 # Corporation, Norman Walsh, Sun Microsystems, Inc., and the
 # Organization for the Advancement of Structured Information
 # Standards (OASIS).
 # 
 # See also http://docbook.org/specs/
 # 
 # $Id: docbookx.dtd 6340 2006-10-03 13:23:24Z nwalsh $
 # 
 # Permission to use, copy, modify and distribute the DocBook XML DTD
 # and its accompanying documentation for any purpose and without fee
 # is hereby granted in perpetuity, provided that the above copyright
 # notice and this paragraph appear in all copies.  The copyright
 # holders make no representation about the suitability of the DTD for
 # any purpose.  It is provided "as is" without expressed or implied
 # warranty.
 # 
 # If you modify the DocBook DTD in any way, except for declaring and
 # referencing additional sets of general entities and declaring
 # additional notations, label your DTD as a variant of DocBook.  See
 # the maintenance documentation for more information.
 # 
 # Please direct all questions, bug reports, or suggestions for
 # changes to the docbook@lists.oasis-open.org mailing list. For more
 # information, see http://www.oasis-open.org/docbook/.
 # ......................................................................
 # This is the driver file for V4.5 of the DocBook DTD.
 # Please use the following formal public identifier to identify it:
 # 
 # "-//OASIS//DTD DocBook XML V4.5//EN"
 # 
 # For example, if your document's top-level element is Book, and
 # you are using DocBook directly, use the FPI in the DOCTYPE
 # declaration:
 # 
 # <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
 #                "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"
 #                [...]>
 # 
 # Or, if you have a higher-level driver file that customizes DocBook,
 # use the FPI in the parameter entity declaration:
 # 
 # <!ENTITY % DocBookDTD PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
 #            "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 # %DocBookDTD;
 # 
 # See the documentation for detailed information on the parameter
 # entity and module scheme used in DocBook, customizing DocBook and
 # planning for interchange, and changes made since the last release
 # of DocBook.
 # ......................................................................
 # Enable SGML features .................................................
 # ......................................................................
 # Notation declarations ................................................
 include "dbnotnx.rnc"
 # ......................................................................
 # ISO character entity sets ............................................
 # euro sign, U+20AC NEW
 # ......................................................................
 # DTD modules ..........................................................
 # Information pool ..............
 include "dbpoolx.rnc"
 # Redeclaration placeholder .....
 # Document hierarchy ............
 include "dbhierx.rnc"
 start =
  articleinfo
  | honorific
  | legalnotice
  | procedure
  | simplelist
  | keycode
  | refsynopsisdiv
  | article
  | phrase
  | destructorsynopsis
  | itemizedlist
  | audioobject
  | link
  | sect1info
  | xref
  | glossaryinfo
  | varname
  | keywordset
  | informalequation
  | toc
  | pagenums
  | lot
  | shortcut
  | glosslist
  | option
  | bibliosource
  | variablelist
  | filename
  | pob
  | colgroup
  | foreignphrase
  | group
  | substeps
  | conftitle
  | textobject
  | menuchoice
  | colspec
  | contractsponsor
  | tocback
  | contractnum
  | constant
  | dedication
  | inlineequation
  | bibliographyinfo
  | country
  | glossseealso
  | bridgehead
  | mousebutton
  | surname
  | stepalternatives
  | tertiaryie
  | mediaobject
  | msgentry
  | fax
  | initializer
  | table
  | task
  | setinfo
  | videodata
  | bibliodiv
  | issuenum
  | phone
  | state
  | refsynopsisdivinfo
  | member
  | glossentry
  | term
  | msgtext
  | tr
  | errortype
  | confdates
  | inlinegraphic
  | th
  | segmentedlist
  | remark
  | preface
  | structname
  | publisher
  | td
  | oointerface
  | refsection
  | type
  | taskrelated
  | msgrel
  | artpagenums
  | bibliomixed
  | revnumber
  | firstterm
  | seeie
  | spanspec
  | toclevel5
  | trademark
  | toclevel4
  | toclevel3
  | toclevel2
  | indexentry
  | toclevel1
  | colophon
  | methodparam
  | sidebarinfo
  | productnumber
  | funcprototype
  | inlinemediaobject
  | refclass
  | lotentry
  | paramdef
  | classsynopsisinfo
  | qandaset
  | footnote
  | msglevel
  | keysym
  | citation
  | simplemsgentry
  | othercredit
  | subjectset
  | keycap
  | orderedlist
  | refmiscinfo
  | blockinfo
  | programlistingco
  | abbrev
  | sidebar
  | informalfigure
  | tip
  | primaryie
  | appendixinfo
  | partintro
  | glossdiv
  | confgroup
  | segtitle
  | taskprerequisites
  | street
  | tbody
  | caption
  | markup
  | setindex
  | msgsub
  | subscript
  | orgname
  | fieldsynopsis
  | refname
  | void
  | sect5
  | sect4
  | sect3
  | chapter
  | sect2
  | sect1
  | modifier
  | col
  | orgdiv
  | city
  | bibliolist
  | funcparams
  | application
  | \token
  | imageobject
  | literal
  | funcsynopsis
  | olink
  | package
  | collab
  | seealsoie
  | primary
  | glossterm
  | termdef
  | area
  | ackno
  | function
  | collabname
  | lineannotation
  | guisubmenu
  | msgexplan
  | errorname
  | property
  | synopfragmentref
  | refentryinfo
  | entry
  | manvolnum
  | synopsis
  | emphasis
  | appendix
  | bookinfo
  | contrib
  | otheraddr
  | copyright
  | methodname
  | email
  | ooclass
  | videoobject
  | abstract
  | firstname
  | revremark
  | glossdef
  | guibutton
  | informalexample
  | screen
  | errorcode
  | command
  | seriesvolnums
  | refpurpose
  | parameter
  | equation
  | tfoot
  | code
  | jobtitle
  | sgmltag
  | screenco
  | holder
  | isbn
  | corpcredit
  | biblioset
  | part
  | symbol
  | row
  | bibliomisc
  | imagedata
  | secondary
  | classname
  | callout
  | screenshot
  | bibliomset
  | indexterm
  | refsect3
  | tocchap
  | para
  | refsect2
  | refsect1
  | date
  | refdescriptor
  | wordasword
  | epigraph
  | audiodata
  | hardware
  | confsponsor
  | authorgroup
  | warning
  | authorinitials
  | medialabel
  | varlistentry
  | authorblurb
  | itermset
  | refsect3info
  | informaltable
  | guimenuitem
  | postcode
  | subjectterm
  | refnamediv
  | note
  | figure
  | envar
  | listitem
  | methodsynopsis
  | affiliation
  | funcsynopsisinfo
  | structfield
  | blockquote
  | keyword
  | chapterinfo
  | tertiary
  | year
  | subtitle
  | personblurb
  | refentry
  | citebiblioid
  | seglistitem
  | bibliography
  | msg
  | constructorsynopsis
  | refsect2info
  | volumenum
  | database
  | funcdef
  | uri
  | graphicco
  | biblioid
  | msgmain
  | printhistory
  | glosssee
  | beginpage
  | glossary
  | set
  | highlights
  | objectinfo
  | tocpart
  | guiicon
  | revhistory
  | seg
  | see
  | msgorig
  | areaspec
  | partinfo
  | index
  | sectioninfo
  | refsectioninfo
  | optional
  | confnum
  | replaceable
  | refsect1info
  | corpauthor
  | step
  | anchor
  | arg
  | mathphrase
  | setindexinfo
  | keycombo
  | address
  | cmdsynopsis
  | computeroutput
  | literallayout
  | qandaentry
  | sect5info
  | bibliocoverage
  | coref
  | editor
  | superscript
  | personname
  | pubsnumber
  | graphic
  | simplesect
  | accel
  | secondaryie
  | biblioref
  | publishername
  | bibliorelation
  | prefaceinfo
  | revision
  | screeninfo
  | sbr
  | example
  | citetitle
  | issn
  | invpartnumber
  | indexdiv
  | sect4info
  | corpname
  | lineage
  | ooexception
  | reference
  | revdescription
  | title
  | edition
  | co
  | msgaud
  | guimenu
  | shortaffil
  | titleabbrev
  | msginfo
  | refmeta
  | qandadiv
  | mediaobjectco
  | seealso
  | exceptionname
  | answer
  | programlisting
  | tgroup
  | refentrytitle
  | book
  | errortext
  | varargs
  | sect3info
  | citerefentry
  | tasksummary
  | quote
  | othername
  | prompt
  | entrytbl
  | interfacename
  | acronym
  | modespec
  | msgset
  | thead
  | textdata
  | userinput
  | attribution
  | footnoteref
  | action
  | tocentry
  | tocfront
  | author
  | imageobjectco
  | alt
  | question
  | ulink
  | subject
  | pubdate
  | returnvalue
  | label
  | caution
  | section
  | systemitem
  | referenceinfo
  | sect2info
  | calloutlist
  | classsynopsis
  | productname
  | simpara
  | synopfragment
  | important
  | interface
  | releaseinfo
  | formalpara
  | areaset
  | biblioentry
  | indexinfo
  | guilabel
 # ......................................................................
 # Other general entities ...............................................
 # End of DocBook XML DTD V4.5 ..........................................
 # ......................................................................
--- a/docbook-schema/htmltblx.rnc
+++ b/docbook-schema/htmltblx.rnc
@ -1,225 +0,0 @@
 # ......................................................................
 # DocBook XML HTML Table Module V4.5 ...................................
 # File htmltblx.mod ....................................................
 # Copyright 2003-2006 ArborText, Inc., Norman Walsh, Sun Microsystems,
 # Inc., and the Organization for the Advancement of Structured Information
 # Standards (OASIS).
 # 
 # $Id: htmltblx.mod 6340 2006-10-03 13:23:24Z nwalsh $
 # 
 # Permission to use, copy, modify and distribute the DocBook XML DTD
 # and its accompanying documentation for any purpose and without fee
 # is hereby granted in perpetuity, provided that the above copyright
 # notice and this paragraph appear in all copies.  The copyright
 # holders make no representation about the suitability of the DTD for
 # any purpose.  It is provided "as is" without expressed or implied
 # warranty.
 # 
 # If you modify the DocBook XML DTD in any way, except for declaring and
 # referencing additional sets of general entities and declaring
 # additional notations, label your DTD as a variant of DocBook.  See
 # the maintenance documentation for more information.
 # 
 # Please direct all questions, bug reports, or suggestions for
 # changes to the docbook@lists.oasis-open.org mailing list. For more
 # information, see http://www.oasis-open.org/docbook/.
 # ......................................................................
 # This module contains the definitions for elements that are
 # isomorphic to the HTML elements. One could argue we should
 # instead have based ourselves on the XHTML Table Module, but the
 # HTML one is more like what browsers are likely to accept today
 # and users are likely to use.
 # 
 # This module has been developed for use with the DocBook V4.5
 # "union table model" in which elements and attlists common to both
 # models are defined (as the union) in the CALS table module by
 # setting various parameter entities appropriately in this file.
 # 
 # In DTD driver files referring to this module, please use an entity
 # declaration that uses the public identifier shown below:
 # 
 # <!ENTITY % htmltbl PUBLIC
 # "-//OASIS//ELEMENTS DocBook XML HTML Tables V4.5//EN"
 # "htmltblx.mod">
 # %htmltbl;
 # 
 # See the documentation for detailed information on the parameter
 # entity and module scheme used in DocBook, customizing DocBook and
 # planning for interchange, and changes made since the last release
 # of DocBook.
 # ======================= XHTML Tables =======================================
 namespace a = "http://relaxng.org/ns/compatibility/annotations/1.0"
 html.coreattrs =
  common.attrib,
  attribute class { text }?,
  attribute style { text }?,
  attribute title { text }?
 # Does not contain lang or dir because they are in %common.attribs
 i18n = attribute xml:lang { xsd:NMTOKEN }?
 events =
  attribute onclick { text }?,
  attribute ondblclick { text }?,
  attribute onmousedown { text }?,
  attribute onmouseup { text }?,
  attribute onmouseover { text }?,
  attribute onmousemove { text }?,
  attribute onmouseout { text }?,
  attribute onkeypress { text }?,
  attribute onkeydown { text }?,
  attribute onkeyup { text }?
 attrs = html.coreattrs, i18n, events
 cellhalign =
  attribute align { "left" | "center" | "right" | "justify" | "char" }?,
  attribute char { text }?,
  attribute charoff { text }?
 cellvalign =
  attribute valign { "top" | "middle" | "bottom" | "baseline" }?
 # doc:A group of columns in an HTML table.
 colgroup = element colgroup { colgroup.attlist, col* }
 # doc:Specifications for a column in an HTML table.
 col = element col { col.attlist, empty }
 # doc:A row in an HTML table.
 tr = element tr { tr.attlist, (th | td)+ }
 # doc:A table header entry in an HTML table.
 th =
  element th {
    th.attlist, (para.char.mix | tabentry.mix | table | informaltable)*
  }
 # doc:A table ntry in an HTML table.
 td =
  element td {
    td.attlist, (para.char.mix | tabentry.mix | table | informaltable)*
  }
 colgroup.attlist &=
  attrs,
  [ a:defaultValue = "1" ] attribute span { text }?,
  attribute width { text }?,
  cellhalign,
  cellvalign
 col.attlist &=
  attrs,
  [ a:defaultValue = "1" ] attribute span { text }?,
  attribute width { text }?,
  cellhalign,
  cellvalign
 tr.attlist &=
  attrs,
  cellhalign,
  cellvalign,
  attribute bgcolor { text }?
 th.attlist &=
  attrs,
  attribute abbr { text }?,
  attribute axis { text }?,
  attribute headers { xsd:IDREFS }?,
  attribute scope { "row" | "col" | "rowgroup" | "colgroup" }?,
  [ a:defaultValue = "1" ] attribute rowspan { text }?,
  [ a:defaultValue = "1" ] attribute colspan { text }?,
  cellhalign,
  cellvalign,
  attribute nowrap { "nowrap" }?,
  attribute bgcolor { text }?,
  attribute width { text }?,
  attribute height { text }?
 td.attlist &=
  attrs,
  attribute abbr { text }?,
  attribute axis { text }?,
  attribute headers { xsd:IDREFS }?,
  attribute scope { "row" | "col" | "rowgroup" | "colgroup" }?,
  [ a:defaultValue = "1" ] attribute rowspan { text }?,
  [ a:defaultValue = "1" ] attribute colspan { text }?,
  cellhalign,
  cellvalign,
  attribute nowrap { "nowrap" }?,
  attribute bgcolor { text }?,
  attribute width { text }?,
  attribute height { text }?
 # ======================================================
 # Set up to read in the CALS model configured to
 # merge with the XHTML table model
 # ======================================================
 tables.role.attrib = role.attrib
 # Add label and role attributes to table and informaltable
 bodyatt =
  attribute floatstyle { text }?,
  attribute rowheader { "firstcol" | "norowheader" }?,
  label.attrib
 # Add common attributes to Table, TGroup, TBody, THead, TFoot, Row, 
 # EntryTbl, and Entry (and InformalTable element).
 secur =
  common.attrib,
  attribute class { text }?,
  attribute style { text }?,
  attribute title { text }?,
  i18n,
  events,
  tables.role.attrib
 common.table.attribs = bodyatt, secur
 # Content model for Table (that also allows HTML tables)
 tbl.table.mdl =
  (blockinfo?,
   formalobject.title.content,
   ndxterm.class*,
   textobject*,
   (graphic+ | mediaobject+ | tgroup+))
  | (caption, (col* | colgroup*), thead?, tfoot?, (tbody+ | tr+))
 informal.tbl.table.mdl =
  (textobject*, (graphic+ | mediaobject+ | tgroup+))
  | ((col* | colgroup*), thead?, tfoot?, (tbody+ | tr+))
 # Attributes for Table (including HTML ones)
 # N.B. rules = (none | groups | rows | cols | all) but it can't be spec'd
 # that way because 'all' already occurs in a different enumeration in
 # CALS tables (frame).
 tbl.table.att =
  attribute tabstyle { text }?,
  attribute tocentry { yesorno.attvals }?,
  attribute shortentry { yesorno.attvals }?,
  attribute orient { "port" | "land" }?,
  attribute pgwide { yesorno.attvals }?,
  attribute summary { text }?,
  attribute width { text }?,
  attribute border { text }?,
  attribute rules { text }?,
  attribute cellspacing { text }?,
  attribute cellpadding { text }?,
  attribute align { "left" | "center" | "right" }?,
  attribute bgcolor { text }?
 tbl.frame.attval =
  "void"
  | "above"
  | "below"
  | "hsides"
  | "lhs"
  | "rhs"
  | "vsides"
  | "box"
  | "border"
  | "top"
  | "bottom"
  | "topbot"
  | "all"
  | "sides"
  | "none"
 # Allow either objects or inlines; beware of REs between elements.
 tbl.entry.mdl = para.char.mix | tabentry.mix
 # thead, tfoot, and tbody are defined in both table models,
 # so we set up parameter entities to define union models for them
 tbl.hdft.mdl = tr+ | (colspec*, row+)
 tbl.tbody.mdl = tr+ | row+
 # End of DocBook XML HTML Table Module V4.5 ............................
 # ......................................................................
--- a/en-US/Author_Group.xml
+++ b/en-US/Author_Group.xml
@ -1,35 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Defensive_Coding.ent">
 %BOOK_ENTITIES;
 ]>
 <authorgroup>
        <author>
                <firstname>Florian</firstname>
                <surname>Weimer</surname>
                <affiliation>
                        <orgname>Red Hat</orgname>
                        <orgdiv>Product Security Team</orgdiv>
                </affiliation>
                <email>fweimer@redhat.com</email>
        </author>
        <author>
                <firstname>Nikos</firstname>
                <surname>Mavrogiannopoulos</surname>
                <affiliation>
                        <orgname>Red Hat</orgname>
                        <orgdiv>Crypto Team</orgdiv>
                </affiliation>
                <email>nmav@redhat.com</email>
        </author>
        <author>
                <firstname>Robert</firstname>
                <surname>Relyea</surname>
                <affiliation>
                        <orgname>Red Hat</orgname>
                        <orgdiv>Crypto Team</orgdiv>
                </affiliation>
                <email>rrelyea@redhat.com</email>
        </author>
 </authorgroup>
--- a/en-US/Book_Info.xml
+++ b/en-US/Book_Info.xml
@ -1,29 +0,0 @@
 <?xml version='1.0' encoding='UTF-8' ?>
 <!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <bookinfo id="book-Defensive_Coding">
 	<title>Defensive Coding</title>
 	<subtitle>A Guide to Improving Software Security</subtitle>
 	<edition>1</edition>
 	<pubsnumber>1</pubsnumber>
 	<productname>Unofficial fork</productname>
 	<productnumber></productnumber>
 	<abstract>
 	  <para>
 	    This document provides guidelines for improving software
 	    security through secure coding.  It covers common
 	    programming languages and libraries, and focuses on
 	    concrete recommendations.
 	  </para>
 	</abstract>
 	<corpauthor>
 		<inlinemediaobject>
 			<imageobject>
 				<imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
 			</imageobject>
 		</inlinemediaobject>
 	</corpauthor>
 	<xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
        <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </bookinfo>
--- a/en-US/C-Allocators.xml
+++ b/en-US/C-Allocators.xml
@ -1,209 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-C-Allocators">
  <title>Memory Allocators</title>
  <section>
    <title><function>malloc</function> and Related Functions</title>
    <para>
      The C library interfaces for memory allocation are provided by
      <function>malloc</function>, <function>free</function> and
      <function>realloc</function>, and the
      <function>calloc</function> function.  In addition to these
      generic functions, there are derived functions such as
      <function>strdup</function> which perform allocation using
      <function>malloc</function> internally, but do not return
      untyped heap memory (which could be used for any object).
    </para>
    <para>
      The C compiler knows about these functions and can use their
      expected behavior for optimizations.  For instance, the compiler
      assumes that an existing pointer (or a pointer derived from an
      existing pointer by arithmetic) will not point into the memory
      area returned by <function>malloc</function>.
    </para>
    <para>
      If the allocation fails, <function>realloc</function> does not
      free the old pointer.  Therefore, the idiom <literal>ptr =
      realloc(ptr, size);</literal> is wrong because the memory
      pointed to by <literal>ptr</literal> leaks in case of an error.
    </para>
    <section id="sect-Defensive_Coding-C-Use-After-Free">
      <title>Use-after-free errors</title>
      <para>
 	After <function>free</function>, the pointer is invalid.
 	Further pointer dereferences are not allowed (and are usually
 	detected by <application>valgrind</application>).  Less obvious
 	is that any <emphasis>use</emphasis> of the old pointer value is
 	not allowed, either.  In particular, comparisons with any other
 	pointer (or the null pointer) are undefined according to the C
 	standard.
      </para>
      <para>
 	The same rules apply to <function>realloc</function> if the
 	memory area cannot be enlarged in-place.  For instance, the
 	compiler may assume that a comparison between the old and new
 	pointer will always return false, so it is impossible to detect
 	movement this way.
      </para>
    </section>
    <section>
      <title>Handling Memory Allocation Errors</title>
      <para>
 	Recovering from out-of-memory errors is often difficult or even
 	impossible.  In these cases, <function>malloc</function> and
 	other allocation functions return a null pointer.  Dereferencing
 	this pointer lead to a crash.  Such dereferences can even be
 	exploitable for code execution if the dereference is combined
 	with an array subscript.
      </para>
      <para>
 	In general, if you cannot check all allocation calls and
 	handle failure, you should abort the program on allocation
 	failure, and not rely on the null pointer dereference to
 	terminate the process.  See
 	<xref
 	linkend="sect-Defensive_Coding-Tasks-Serialization-Decoders"/>
 	for related memory allocation concerns.
      </para>
    </section>
  </section>
  <section id="sect-Defensive_Coding-C-Allocators-alloca">
    <title><function>alloca</function> and Other Forms of Stack-based
    Allocation</title>
    <para>
      Allocation on the stack is risky because stack overflow checking
      is implicit.  There is a guard page at the end of the memory
      area reserved for the stack.  If the program attempts to read
      from or write to this guard page, a <literal>SIGSEGV</literal>
      signal is generated and the program typically terminates.
    </para>
    <para>
      This is sufficient for detecting typical stack overflow
      situations such as unbounded recursion, but it fails when the
      stack grows in increments larger than the size of the guard
      page.  In this case, it is possible that the stack pointer ends
      up pointing into a memory area which has been allocated for a
      different purposes.  Such misbehavior can be exploitable.
    </para>
    <para>
      A common source for large stack growth are calls to
      <function>alloca</function> and related functions such as
      <function>strdupa</function>.  These functions should be avoided
      because of the lack of error checking.  (They can be used safely
      if the allocated size is less than the page size (typically,
      4096 bytes), but this case is relatively rare.)  Additionally,
      relying on <function>alloca</function> makes it more difficult
      to reorganize the code because it is not allowed to use the
      pointer after the function calling <function>alloca</function>
      has returned, even if this function has been inlined into its
      caller.
    </para>
    <para>
      Similar concerns apply to <emphasis>variable-length
      arrays</emphasis> (VLAs), a feature of the C99 standard which
      started as a GNU extension.  For large objects exceeding the
      page size, there is no error checking, either.
    </para>
    <para>
      In both cases, negative or very large sizes can trigger a
      stack-pointer wraparound, and the stack pointer and end up
      pointing into caller stack frames, which is fatal and can be
      exploitable.
    </para>
    <para>
      If you want to use <function>alloca</function> or VLAs for
      performance reasons, consider using a small on-stack array (less
      than the page size, large enough to fulfill most requests).  If
      the requested size is small enough, use the on-stack array.
      Otherwise, call <function>malloc</function>.  When exiting the
      function, check if <function>malloc</function> had been called,
      and free the buffer as needed.
    </para>
  </section>
  <section id="sect-Defensive_Coding-C-Allocators-Arrays">
    <title>Array Allocation</title>
    <para>
      When allocating arrays, it is important to check for overflows.
      The <function>calloc</function> function performs such checks.
    </para>
    <para>
      If <function>malloc</function> or <function>realloc</function>
      is used, the size check must be written manually.  For instance,
      to allocate an array of <literal>n</literal> elements of type
      <literal>T</literal>, check that the requested size is not
      greater than <literal>((size_t) -1) / sizeof(T)</literal>.  See
      <xref linkend="sect-Defensive_Coding-C-Arithmetic"/>.
    </para>
  </section>
  <section id="sect-Defensive_Coding-C-Allocators-Custom">
    <title>Custom Memory Allocators</title>
    <para>
      Custom memory allocates come in two forms: replacements for
      <function>malloc</function>, and completely different interfaces
      for memory management.  Both approaches can reduce the
      effectiveness of <application>valgrind</application> and similar
      tools, and the heap corruption detection provided by GNU libc, so
      they should be avoided.
    </para>
    <para>
      Memory allocators are difficult to write and contain many
      performance and security pitfalls.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  When computing array sizes or rounding up allocation
 	  requests (to the next allocation granularity, or for
 	  alignment purposes), checks for arithmetic overflow are
 	  required.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Size computations for array allocations need overflow
 	  checking.  See <xref
 	  linkend="sect-Defensive_Coding-C-Allocators-Arrays"/>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  It can be difficult to beat well-tuned general-purpose
 	  allocators.  In micro benchmarks, pool allocators can show
 	  huge wins, and size-specific pools can reduce internal
 	  fragmentation.  But often, utilization of individual pools
 	  is poor, and external fragmentation increases the overall
 	  memory usage.
 	</para>
      </listitem>
    </itemizedlist>
  </section>
  <section>
    <title>Conservative Garbage Collection</title>
    <para>
      Garbage collection can be an alternative to explicit memory
      management using <function>malloc</function> and
      <function>free</function>.  The Boehm-Dehmers-Weiser allocator
      can be used from C programs, with minimal type annotations.
      Performance is competitive with <function>malloc</function> on
      64-bit architectures, especially for multi-threaded programs.
      The stop-the-world pauses may be problematic for some real-time
      applications, though.
    </para>
    <para>
      However, using a conservative garbage collector may reduce
      opportunities for code reduce because once one library in a
      program uses garbage collection, the whole process memory needs
      to be subject to it, so that no pointers are missed.  The
      Boehm-Dehmers-Weiser collector also reserves certain signals for
      internal use, so it is not fully transparent to the rest of the
      program.
    </para>
  </section>
 </section>
--- a/en-US/C-Language.xml
+++ b/en-US/C-Language.xml
@ -1,221 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-C-Language">
  <title>The Core Language</title>
  <para>
    C provides no memory safety.  Most recommendations in this section
    deal with this aspect of the language.
  </para>
  <section id="sect-Defensive_Coding-C-Undefined">
    <title>Undefined Behavior</title>
    <para>
      Some C constructs are defined to be undefined by the C standard.
      This does not only mean that the standard does not describe
      what happens when the construct is executed.  It also allows
      optimizing compilers such as GCC to assume that this particular
      construct is never reached.  In some cases, this has caused
      GCC to optimize security checks away.  (This is not a flaw in GCC
      or the C language.  But C certainly has some areas which are more
      difficult to use than others.)
    </para>
    <para>
      Common sources of undefined behavior are:
    </para>
    <itemizedlist>
      <listitem><para>out-of-bounds array accesses</para></listitem>
      <listitem><para>null pointer dereferences</para></listitem>
      <listitem><para>overflow in signed integer arithmetic</para></listitem>
    </itemizedlist>
  </section>
  <section id="sect-Defensive_Coding-C-Pointers">
    <title>Recommendations for Pointers and Array Handling</title>
    <para>
      Always keep track of the size of the array you are working with.
      Often, code is more obviously correct when you keep a pointer
      past the last element of the array, and calculate the number of
      remaining elements by substracting the current position from
      that pointer.  The alternative, updating a separate variable
      every time when the position is advanced, is usually less
      obviously correct.
    </para>
    <para>
      <xref linkend="ex-Defensive_Coding-C-Pointers-remaining"/>
      shows how to extract Pascal-style strings from a character
      buffer.  The two pointers kept for length checks are
      <varname>inend</varname> and <varname>outend</varname>.
      <varname>inp</varname> and <varname>outp</varname> are the
      respective positions.
      The number of input bytes is checked using the expression
      <literal>len > (size_t)(inend - inp)</literal>.
      The cast silences a compiler warning;
      <varname>inend</varname> is always larger than
      <varname>inp</varname>.
    </para>
    <example id="ex-Defensive_Coding-C-Pointers-remaining">
      <title>Array processing in C</title>
      <xi:include href="snippets/C-Pointers-remaining.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      It is important that the length checks always have the form
      <literal>len > (size_t)(inend - inp)</literal>, where
      <varname>len</varname> is a variable of type
      <type>size_t</type> which denotes the <emphasis>total</emphasis>
      number of bytes which are about to be read or written next.  In
      general, it is not safe to fold multiple such checks into one,
      as in <literal>len1 + len2 > (size_t)(inend - inp)</literal>,
      because the expression on the left can overflow or wrap around
      (see <xref linkend="sect-Defensive_Coding-C-Arithmetic"/>), and it
      no longer reflects the number of bytes to be processed.
    </para>
  </section>
  <section id="sect-Defensive_Coding-C-Arithmetic">
    <title>Recommendations for Integer Arithmetic</title>
    <para>
      Overflow in signed integer arithmetic is undefined.  This means
      that it is not possible to check for overflow after it happened,
      see <xref linkend="ex-Defensive_Coding-C-Arithmetic-bad"/>.
    </para>
    <example id="ex-Defensive_Coding-C-Arithmetic-bad">
      <title>Incorrect overflow detection in C</title>
      <xi:include href="snippets/C-Arithmetic-add.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      The following approaches can be used to check for overflow,
      without actually causing it.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Use a wider type to perform the calculation, check that the
 	  result is within bounds, and convert the result to the
 	  original type.  All intermediate results must be checked in
 	  this way.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Perform the calculation in the corresponding unsigned type
 	  and use bit fiddling to detect the overflow.
 	  <xref linkend="ex-Defensive_Coding-C-Arithmetic-add_unsigned"/>
 	  shows how to perform an overflow check for unsigned integer
 	  addition.  For three or more terms, all the intermediate
 	  additions have to be checked in this way.
 	</para>
      </listitem>
    </itemizedlist>
    <example id="ex-Defensive_Coding-C-Arithmetic-add_unsigned">
      <title>Overflow checking for unsigned addition</title>
      <xi:include href="snippets/C-Arithmetic-add_unsigned.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <itemizedlist>
      <listitem>
 	<para>
 	  Compute bounds for acceptable input values which are known
 	  to avoid overflow, and reject other values.  This is the
 	  preferred way for overflow checking on multiplications,
 	  see <xref linkend="ex-Defensive_Coding-C-Arithmetic-mult"/>.
 	  <!-- This approach can result in bogus compiler warnings
 	       with signed types:
 	       http://gcc.gnu.org/bugzilla/post_bug.cgi -->
 	</para>
      </listitem>
    </itemizedlist>
    <example id="ex-Defensive_Coding-C-Arithmetic-mult">
      <title>Overflow checking for unsigned multiplication</title>
      <xi:include href="snippets/C-Arithmetic-mult.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      Basic arithmetic operations are commutative, so for bounds checks,
      there are two different but mathematically equivalent
      expressions.  Sometimes, one of the expressions results in
      better code because parts of it can be reduced to a constant.
      This applies to overflow checks for multiplication <literal>a *
      b</literal> involving a constant <literal>a</literal>, where the
      expression is reduced to <literal>b &gt; C</literal> for some
      constant <literal>C</literal> determined at compile time.  The
      other expression, <literal>b &amp;&amp; a > ((unsigned)-1) /
      b</literal>, is more difficult to optimize at compile time.
    </para>
    <para>
      When a value is converted to a signed integer, GCC always
      chooses the result based on 2's complement arithmetic.  This GCC
      extension (which is also implemented by other compilers) helps a
      lot when implementing overflow checks.
    </para>
    <para>
      Sometimes, it is necessary to compare unsigned and signed
      integer variables.  This results in a compiler warning,
      <emphasis>comparison between signed and unsigned integer
      expressions</emphasis>, because the comparison often gives
      unexpected results for negative values.  When adding a cast,
      make sure that negative values are covered properly.  If the
      bound is unsigned and the checked quantity is signed, you should
      cast the checked quantity to an unsigned type as least as wide
      as either operand type.  As a result, negative values will fail
      the bounds check.  (You can still check for negative values
      separately for clarity, and the compiler will optimize away this
      redundant check.)
    </para>
    <para>
      Legacy code should be compiled with the <option>-fwrapv</option>
      GCC option.  As a result, GCC will provide 2's complement
      semantics for integer arithmetic, including defined behavior on
      integer overflow.
    </para>
  </section>
  <section id="sect-Defensive_Coding-C-Globals">
    <title>Global Variables</title>
    <para>
      Global variables should be avoided because they usually lead to
      thread safety hazards.  In any case, they should be declared
      <literal>static</literal>, so that access is restricted to a
      single translation unit.
    </para>
    <para>
      Global constants are not a problem, but declaring them can be
      tricky.  <xref linkend="ex-Defensive_Coding-C-Globals-String_Array"/>
      shows how to declare a constant array of constant strings.
      The second <literal>const</literal> is needed to make the
      array constant, and not just the strings.  It must be placed
      after the <literal>*</literal>, and not before it.
    </para>
    <example id="ex-Defensive_Coding-C-Globals-String_Array">
      <title>Declaring a constant array of constant strings</title>
      <xi:include href="snippets/C-Globals-String_Array.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      Sometimes, static variables local to functions are used as a
      replacement for proper memory management.  Unlike non-static
      local variables, it is possible to return a pointer to static
      local variables to the caller.  But such variables are
      well-hidden, but effectively global (just as static variables at
      file scope). It is difficult to add thread safety afterwards if
      such interfaces are used.  Merely dropping the
      <literal>static</literal> keyword in such cases leads to
      undefined behavior.
    </para>
    <para>
      Another source for static local variables is a desire to reduce
      stack space usage on embedded platforms, where the stack may
      span only a few hundred bytes.  If this is the only reason why
      the <literal>static</literal> keyword is used, it can just be
      dropped, unless the object is very large (larger than
      128 kilobytes on 32 bit platforms).  In the latter case, it is
      recommended to allocate the object using
      <literal>malloc</literal>, to obtain proper array checking, for
      the same reasons outlined in <xref
      linkend="sect-Defensive_Coding-C-Allocators-alloca"/>.
    </para>
  </section>
 </section>
--- a/en-US/C-Libc.xml
+++ b/en-US/C-Libc.xml
@ -1,352 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-C-Libc">
  <title>The C Standard Library</title>
  <para>
    Parts of the C standard library (and the UNIX and GNU extensions)
    are difficult to use, so you should avoid them.
  </para>
  <para>
    Please check the applicable documentation before using the
    recommended replacements.  Many of these functions allocate
    buffers using <function>malloc</function> which your code must
    deallocate explicitly using <function>free</function>.
  </para>
  <section id="sect-Defensive_Coding-C-Absolutely-Banned">
    <title>Absolutely Banned Interfaces</title>
    <para>
      The functions listed below must not be used because they are
      almost always unsafe.  Use the indicated replacements instead.
    </para>
    <itemizedlist>
      <listitem><para><function>gets</function>
      ⟶ <function>fgets</function></para></listitem>
      <listitem><para><function>getwd</function>
      ⟶ <function>getcwd</function>
      or <function>get_current_dir_name</function></para></listitem>
      <listitem>
 	<para>
 	  <function>readdir_r</function> ⟶ <function>readdir</function>
 	  <!-- It is quite complicated to allocate a properly-sized
 	       buffer for use with readdir_r, and readdir provides
 	       sufficient thread safety guarantees. -->
 	  <!-- ??? Add File_System cross-reference -->
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>realpath</function> (with a non-NULL second parameter)
 	  ⟶ <function>realpath</function> with NULL as the second parameter,
 	  or <function>canonicalize_file_name</function>
 	  <!-- It is complicated to allocate a properly-sized buffer
 	       for use with realpath. -->
 	  <!-- ??? Add File_System cross-reference -->
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      The constants listed below must not be used, either.  Instead,
      code must allocate memory dynamically and use interfaces with
      length checking.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  <literal>NAME_MAX</literal> (limit not actually enforced by
 	  the kernel)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <literal>PATH_MAX</literal> (limit not actually enforced by
 	  the kernel)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <literal>_PC_NAME_MAX</literal> (This limit, returned by the
 	  <function>pathconf</function> function, is not enforced by
 	  the kernel.)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <literal>_PC_PATH_MAX</literal> (This limit, returned by the
 	  <function>pathconf</function> function, is not enforced by
 	  the kernel.)
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      The following structure members must not be used.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  <literal>f_namemax</literal> in <literal>struct
 	  statvfs</literal> (limit not actually enforced by the kernel,
 	  see <literal>_PC_NAME_MAX</literal> above)
 	</para>
      </listitem>
    </itemizedlist>
  </section>
  <section id="sect-Defensive_Coding-C-Avoid">
    <title>Functions to Avoid</title>
    <para>
      The following string manipulation functions can be used securely
      in principle, but their use should be avoided because they are
      difficult to use correctly.  Calls to these functions can be
      replaced with <function>asprintf</function> or
      <function>vasprintf</function>.  (For non-GNU targets, these
      functions are available from Gnulib.)  In some cases, the
      <function>snprintf</function> function might be a suitable
      replacement, see <xref
      linkend="sect-Defensive_Coding-C-String-Functions-Length"/>.
    </para>
    <itemizedlist>
      <listitem><para><function>sprintf</function></para></listitem>
      <listitem><para><function>strcat</function></para></listitem>
      <listitem><para><function>strcpy</function></para></listitem>
      <listitem><para><function>vsprintf</function></para></listitem>
    </itemizedlist>
    <para>
      Use the indicated replacements for the functions below.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  <function>alloca</function> ⟶
 	  <function>malloc</function> and <function>free</function>
 	  (see <xref linkend="sect-Defensive_Coding-C-Allocators-alloca"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>putenv</function> ⟶
 	  explicit <varname>envp</varname> argument in process creation
 	  (see <xref linkend="sect-Defensive_Coding-Tasks-Processes-environ"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>setenv</function> ⟶
 	  explicit <varname>envp</varname> argument in process creation
 	  (see <xref linkend="sect-Defensive_Coding-Tasks-Processes-environ"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>strdupa</function> ⟶
 	  <function>strdup</function> and <function>free</function>
 	  (see <xref linkend="sect-Defensive_Coding-C-Allocators-alloca"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>strndupa</function> ⟶
 	  <function>strndup</function> and <function>free</function>
 	  (see <xref linkend="sect-Defensive_Coding-C-Allocators-alloca"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>system</function> ⟶
 	  <function>posix_spawn</function>
 	  or <function>fork</function>/<function>execve</function>/
 	  (see <xref linkend="sect-Defensive_Coding-Tasks-Processes-execve"/>)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>unsetenv</function> ⟶
 	  explicit <varname>envp</varname> argument in process creation
 	  (see <xref linkend="sect-Defensive_Coding-Tasks-Processes-environ"/>)
 	</para>
      </listitem>
    </itemizedlist>
  </section>
  <section id="sect-Defensive_Coding-C-String-Functions-Length">
    <title>String Functions with Explicit Length Arguments</title>
    <para>
      The C run-time library provides string manipulation functions
      which not just look for NUL characters for string termination,
      but also honor explicit lengths provided by the caller.
      However, these functions evolved over a long period of time, and
      the lengths mean different things depending on the function.
    </para>
    <section id="sect-Defensive_Coding-C-Libc-snprintf">
      <title><literal>snprintf</literal></title>
      <para>
 	The <function>snprintf</function> function provides a way to
 	construct a string in a statically-sized buffer.  (If the buffer
 	size is allocated on the heap, consider use
 	<function>asprintf</function> instead.)
      </para>
      <informalexample>
 	<xi:include href="snippets/C-String-Functions-snprintf.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	The second argument to the <function>snprintf</function> call
 	should always be the size of the buffer in the first argument
 	(which should be a character array).  Elaborate pointer and
 	length arithmetic can introduce errors and nullify the
 	security benefits of <function>snprintf</function>.
      </para>
      <para>
 	In particular, <literal>snprintf</literal> is not well-suited
 	to constructing a string iteratively, by appending to an
 	existing buffer.  <function>snprintf</function> returns one of
 	two values, <literal>-1</literal> on errors, or the number of
 	characters which <emphasis>would have been written to the
 	buffer if the buffer were large enough</emphasis>.  This means
 	that adding the result of <function>snprintf</function> to the
 	buffer pointer to skip over the characters just written is
 	incorrect and risky.  However, as long as the length argument
 	is not zero, the buffer will remain null-terminated. <xref
 	linkend="ex-Defensive_Coding-C-String-Functions-snprintf-incremental"/>
 	works because <literal>end -current &gt; 0</literal> is a loop
 	invariant.  After the loop, the result string is in the
 	<varname>buf</varname> variable.
      </para>
      <example id="ex-Defensive_Coding-C-String-Functions-snprintf-incremental">
 	<title>Repeatedly writing to a buffer using <function>snprintf</function></title>
 	<xi:include href="snippets/C-String-Functions-snprintf-incremental.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	If you want to avoid the call to <function>strlen</function>
 	for performance reasons, you have to check for a negative
 	return value from <function>snprintf</function> and also check
 	if the return value is equal to the specified buffer length or
 	larger.  Only if neither condition applies, you may advance
 	the pointer to the start of the write buffer by the number
 	return by <function>snprintf</function>.  However, this
 	optimization is rarely worthwhile.
      </para>
      <para>
 	Note that it is not permitted to use the same buffer both as
 	the destination and as a source argument.
      </para>
    </section>
    <section id="sect-Defensive_Coding-C-Libc-vsnprintf">
      <title><literal>vsnprintf</literal> and Format Strings</title>
      <para>
 	If you use <function>vsnprintf</function> (or
 	<function>vasprintf</function> or even
 	<function>snprintf</function>) with a format string which is
 	not a constant, but a function argument, it is important to
 	annotate the function with a <literal>format</literal>
 	function attribute, so that GCC can warn about misuse of your
 	function (see <xref
 	linkend="ex-Defensive_Coding-C-String-Functions-format-Attribute"/>).
      </para>
      <example id="ex-Defensive_Coding-C-String-Functions-format-Attribute">
 	<title>The <literal>format</literal> function attribute</title>
 	<xi:include href="snippets/C-String-Functions-format.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
    </section>
    <section id="sect-Defensive_Coding-C-Libc-strncpy">
      <title><function>strncpy</function></title>
      <para>
 	The <function>strncpy</function> function does not ensure that
 	the target buffer is null-terminated.  A common idiom for
 	ensuring NUL termination is:
      </para>
      <informalexample>
 	<xi:include href="snippets/C-String-Functions-strncpy.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	Another approach uses the <function>strncat</function>
 	function for this purpose:
      </para>
      <informalexample>
 	<xi:include href="snippets/C-String-Functions-strncat-as-strncpy.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
    </section>
    <section id="sect-Defensive_Coding-C-Libc-strncat">
      <title><function>strncat</function></title>
      <para>
 	The length argument of the <function>strncat</function>
 	function specifies the maximum number of characters copied
 	from the source buffer, excluding the terminating NUL
 	character.  This means that the required number of bytes in
 	the destination buffer is the length of the original string,
 	plus the length argument in the <function>strncat</function>
 	call, plus one.  Consequently, this function is rarely
 	appropriate for performing a length-checked string operation,
 	with the notable exception of the <function>strcpy</function>
 	emulation described in <xref
 	linkend="sect-Defensive_Coding-C-Libc-strncpy"/>.
      </para>
      <para>
 	To implement a length-checked string append, you can use an
 	approach similar to <xref
 	linkend="ex-Defensive_Coding-C-String-Functions-snprintf-incremental"/>:
      </para>
      <informalexample>
 	<xi:include href="snippets/C-String-Functions-strncat-emulation.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	In many cases, including this one, the string concatenation
 	can be avoided by combining everything into a single format
 	string:
      </para>
      <informalexample>
 	<xi:include href="snippets/C-String-Functions-strncat-merged.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	But you should must not dynamically construct format strings
 	to avoid concatenation because this would prevent GCC from
 	type-checking the argument lists.
      </para>
      <para>
 	It is not possible to use format strings like
 	<literal>"%s%s"</literal> to implement concatenation, unless
 	you use separate buffers.  <function>snprintf</function> does
 	not support overlapping source and target strings.
      </para>
    </section>
    <section>
      <title><function>strlcpy</function> and
      <function>strlcat</function></title>
      <para>
 	Some systems support <function>strlcpy</function> and
 	<function>strlcat</function> functions which behave this way,
 	but these functions are not part of GNU libc.
 	<function>strlcpy</function> is often replaced with
 	<function>snprintf</function> with a <literal>"%s"</literal>
 	format string.  See <xref
 	linkend="sect-Defensive_Coding-C-Libc-strncpy"/> for a caveat
 	related to the <function>snprintf</function> return value.
      </para>
      <para>
 	To emulate <function>strlcat</function>, use the approach
 	described in <xref
 	linkend="sect-Defensive_Coding-C-Libc-strncat"/>.
      </para>
    </section>
    <section>
      <title>ISO C11 Annex K *<function>_s</function> functions</title>
      <para>
 	ISO C11 adds another set of length-checking functions, but GNU
 	libc currently does not implement them.
      </para>
    </section>
    <section>
      <title>Other <function>strn</function>* and
      <function>stpn</function>* functions</title>
      <para>
 	GNU libc contains additional functions with different variants
 	of length checking.  Consult the documentation before using
 	them to find out what the length actually means.
      </para>
    </section>
  </section>
 </section>
--- a/en-US/C-Other.xml
+++ b/en-US/C-Other.xml
@ -1,70 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-C-Other">
  <title>Other C-related Topics</title>
  <section id="sect-Defensive_Coding-C-Wrapper-Functions">
    <title>Wrapper Functions</title>
    <para>
      Some libraries provide wrappers for standard library functions.
      Common cases include allocation functions such as
      <function>xmalloc</function> which abort the process on
      allocation failure (instead of returning a
      <literal>NULL</literal> pointer), or alternatives to relatively
      recent library additions such as <function>snprintf</function>
      (along with implementations for systems which lack them).
    </para>
    <para>
      In general, such wrappers are a bad idea, particularly if they
      are not implemented as inline functions or preprocessor macros.
      The compiler lacks knowledge of such wrappers outside the
      translation unit which defines them, which means that some
      optimizations and security checks are not performed.  Adding
      <literal>__attribute__</literal> annotations to function
      declarations can remedy this to some extent, but these
      annotations have to be maintained carefully for feature parity
      with the standard implementation.
    </para>
    <para>
      At the minimum, you should apply these attributes:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  If you wrap function which accepts are GCC-recognized format
 	  string (for example, a <function>printf</function>-style
 	  function used for logging), you should add a suitable
 	  <literal>format</literal> attribute, as in <xref
 	  linkend="ex-Defensive_Coding-C-String-Functions-format-Attribute"/>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  If you wrap a function which carries a
 	  <literal>warn_unused_result</literal> attribute and you
 	  propagate its return value, your wrapper should be declared
 	  with <literal>warn_unused_result</literal> as well.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Duplicating the buffer length checks based on the
 	  <function>__builtin_object_size</function> GCC builtin is
 	  desirable if the wrapper processes arrays.  (This
 	  functionality is used by the
 	  <literal>-D_FORTIFY_SOURCE=2</literal> checks to guard
 	  against static buffer overflows.)  However, designing
 	  appropriate interfaces and implementing the checks may not
 	  be entirely straightforward.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      For other attributes (such as <literal>malloc</literal>),
      careful analysis and comparison with the compiler documentation
      is required to check if propagating the attribute is
      appropriate.  Incorrectly applied attributes can result in
      undesired behavioral changes in the compiled code.
    </para>
  </section>
 </section>
--- a/en-US/C.xml
+++ b/en-US/C.xml
@ -1,12 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-C">
 	<title>The C Programming Language</title>
    <xi:include href="C-Language.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="C-Libc.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="C-Allocators.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="C-Other.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </chapter>
--- a/en-US/CXX-Language.xml
+++ b/en-US/CXX-Language.xml
@ -1,188 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-CXX-Language">
  <title>The Core Language</title>
  <para>
    C++ includes a large subset of the C language.  As far as the C
    subset is used, the recommendations in <xref
    linkend="chap-Defensive_Coding-C"/> apply.
  </para>
  <section>
    <title>Array Allocation with <literal>operator new[]</literal></title>
    <para>
      For very large values of <literal>n</literal>, an expression
      like <literal>new T[n]</literal> can return a pointer to a heap
      region which is too small.  In other words, not all array
      elements are actually backed with heap memory reserved to the
      array.  Current GCC versions generate code that performs a
      computation of the form <literal>sizeof(T) * size_t(n) +
      cookie_size</literal>, where <literal>cookie_size</literal> is
      currently at most 8.  This computation can overflow, and GCC
      versions prior to 4.8 generated code which did not detect this.
      (Fedora 18 was the first release which fixed this in GCC.)
    </para>
    <para>
      The <literal>std::vector</literal> template can be used instead
      an explicit array allocation.  (The GCC implementation detects
      overflow internally.)
    </para>
    <para>
      If there is no alternative to <literal>operator new[]</literal>
      and the sources will be compiled with older GCC versions, code
      which allocates arrays with a variable length must check for
      overflow manually.  For the <literal>new T[n]</literal> example,
      the size check could be <literal>n || (n > 0 &amp;&amp; n &gt;
      (size_t(-1) - 8) / sizeof(T))</literal>.  (See <xref
      linkend="sect-Defensive_Coding-C-Arithmetic"/>.)  If there are
      additional dimensions (which must be constants according to the
      C++ standard), these should be included as factors in the
      divisor.
    </para>
    <para>
      These countermeasures prevent out-of-bounds writes and potential
      code execution.  Very large memory allocations can still lead to
      a denial of service.  <xref
      linkend="sect-Defensive_Coding-Tasks-Serialization-Decoders"/>
      contains suggestions for mitigating this problem when processing
      untrusted data.
    </para>
    <para>
      See <xref linkend="sect-Defensive_Coding-C-Allocators-Arrays"/>
      for array allocation advice for C-style memory allocation.
    </para>
  </section>
  <section>
    <title>Overloading</title>
    <para>
      Do not overload functions with versions that have different
      security characteristics.  For instance, do not implement a
      function <function>strcat</function> which works on
      <type>std::string</type> arguments.  Similarly, do not name
      methods after such functions.
    </para>
  </section>
  <section>
    <title>ABI compatibility and preparing for security updates</title>
    <para>
      A stable binary interface (ABI) is vastly preferred for security
      updates.  Without a stable ABI, all reverse dependencies need
      recompiling, which can be a lot of work and could even be
      impossible in some cases.  Ideally, a security update only
      updates a single dynamic shared object, and is picked up
      automatically after restarting affected processes.
    </para>
    <para>
      Outside of extremely performance-critical code, you should
      ensure that a wide range of changes is possible without breaking
      ABI.  Some very basic guidelines are:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Avoid inline functions.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Use the pointer-to-implementation idiom.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Try to avoid templates.  Use them if the increased type
 	  safety provides a benefit to the programmer.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Move security-critical code out of templated code, so that
 	  it can be patched in a central place if necessary.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      The KDE project publishes a document with more extensive
      guidelines on ABI-preserving changes to C++ code, <ulink
      url="http://techbase.kde.org/Policies/Binary_Compatibility_Issues_With_C++">Policies/Binary
      Compatibility Issues With C++</ulink>
      (<emphasis>d-pointer</emphasis> refers to the
      pointer-to-implementation idiom).
    </para>
  </section>
  <section id="sect-Defensive_Coding-CXX-Language-CXX11">
    <title>C++0X and C++11 Support</title>
    <para>
      GCC offers different language compatibility modes:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  <option>-std=c++98</option> for the original 1998 C++
 	  standard
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <option>-std=c++03</option> for the 1998 standard with the
 	  changes from the TR1 technical report
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <option>-std=c++11</option> for the 2011 C++ standard.  This
 	  option should not be used.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <option>-std=c++0x</option> for several different versions
 	  of C++11 support in development, depending on the GCC
 	  version.  This option should not be used.
 	  <!-- There were two incompatibilies before GCC 4.7.2
 	       (std::list and std::pair), but link C++98 and C++11
 	       code is still unsupported, although it currently has
 	       some chance of working by accident. -->
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      For each of these flags, there are variants which also enable
      GNU extensions (mostly language features also found in C99 or
      C11): <option>-std=gnu++98</option>,
      <option>-std=gnu++03</option>, <option>-std=gnu++11</option>.
      Again, <option>-std=gnu++11</option> should not be used.
    </para>
    <para>
      If you enable C++11 support, the ABI of the standard C++ library
      <literal>libstdc++</literal> will change in subtle ways.
      Currently, no C++ libraries are compiled in C++11 mode, so if
      you compile your code in C++11 mode, it will be incompatible
      with the rest of the system.  Unfortunately, this is also the
      case if you do not use any C++11 features.  Currently, there is
      no safe way to enable C++11 mode (except for freestanding
      applications).
    </para>
    <para>
      The meaning of C++0X mode changed from GCC release to GCC
      release.  Earlier versions were still ABI-compatible with C++98
      mode, but in the most recent versions, switching to C++0X mode
      activates C++11 support, with its compatibility problems.
    </para>
    <para>
      Some C++11 features (or approximations thereof) are available
      with TR1 support, that is, with <option>-std=c++03</option> or
      <option>-std=gnu++03</option> and in the
      <literal>&lt;tr1/*&gt;</literal> header files.  This includes
      <literal>std::tr1::shared_ptr</literal> (from
      <literal>&lt;tr1/memory&gt;</literal>) and
      <literal>std::tr1::function</literal> (from
      <literal>&lt;tr1/functional&gt;</literal>).  For other C++11
      features, the Boost C++ library contains replacements.
    </para>
  </section>
 </section>
--- a/en-US/CXX-Std.xml
+++ b/en-US/CXX-Std.xml
@ -1,202 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-CXX-Std">
  <title>The C++ Standard Library</title>
  <para>
    The C++ standard library includes most of its C counterpart
    by reference, see <xref linkend="sect-Defensive_Coding-C-Libc"/>.
  </para>
  <section id="sect-Defensive_Coding-CXX-Std-Functions">
    <title>Functions That Are Difficult to Use</title>
    <para>
      This section collects functions and function templates which are
      part of the standard library and are difficult to use.
    </para>
    <section id="sect-Defensive_Coding-CXX-Std-Functions-Unpaired_Iterators">
      <title>Unpaired Iterators</title>
      <para>
 	Functions which use output operators or iterators which do not
 	come in pairs (denoting ranges) cannot perform iterator range
 	checking.
 	(See <xref linkend="sect-Defensive_Coding-CXX-Std-Iterators"/>)
 	Function templates which involve output iterators are
 	particularly dangerous:
      </para>
      <itemizedlist>
 	<listitem><para><function>std::copy</function></para></listitem>
 	<listitem><para><function>std::copy_backward</function></para></listitem>
 	<listitem><para><function>std::copy_if</function></para></listitem>
 	<listitem><para><function>std::move</function> (three-argument variant)</para></listitem>
 	<listitem><para><function>std::move_backward</function></para></listitem>
 	<listitem><para><function>std::partition_copy_if</function></para></listitem>
 	<listitem><para><function>std::remove_copy</function></para></listitem>
 	<listitem><para><function>std::remove_copy_if</function></para></listitem>
 	<listitem><para><function>std::replace_copy</function></para></listitem>
 	<listitem><para><function>std::replace_copy_if</function></para></listitem>
 	<listitem><para><function>std::swap_ranges</function></para></listitem>
 	<listitem><para><function>std::transform</function></para></listitem>
      </itemizedlist>
      <para>
 	In addition, <function>std::copy_n</function>,
 	<function>std::fill_n</function> and
 	<function>std::generate_n</function> do not perform iterator
 	checking, either, but there is an explicit count which has to be
 	supplied by the caller, as opposed to an implicit length
 	indicator in the form of a pair of forward iterators.
      </para>
      <para>
 	These output-iterator-expecting functions should only be used
 	with unlimited-range output iterators, such as iterators
 	obtained with the <function>std::back_inserter</function>
 	function.
      </para>
      <para>
 	Other functions use single input or forward iterators, which can
 	read beyond the end of the input range if the caller is not careful:
      </para>
      <itemizedlist>
 	<listitem><para><function>std::equal</function></para></listitem>
 	<listitem><para><function>std::is_permutation</function></para></listitem>
 	<listitem><para><function>std::mismatch</function></para></listitem>
      </itemizedlist>
    </section>
  </section>
  <section id="sect-Defensive_Coding-CXX-Std-String">
    <title>String Handling with <literal>std::string</literal></title>
    <para>
      The <literal>std::string</literal> class provides a convenient
      way to handle strings.  Unlike C strings,
      <literal>std::string</literal> objects have an explicit length
      (and can contain embedded NUL characters), and storage for its
      characters is managed automatically.  This section discusses
      <literal>std::string</literal>, but these observations also
      apply to other instances of the
      <literal>std::basic_string</literal> template.
    </para>
    <para>
      The pointer returned by the <function>data()</function> member
      function does not necessarily point to a NUL-terminated string.
      To obtain a C-compatible string pointer, use
      <function>c_str()</function> instead, which adds the NUL
      terminator.
    </para>
    <para>
      The pointers returned by the <function>data()</function> and
      <function>c_str()</function> functions and iterators are only
      valid until certain events happen.  It is required that the
      exact <literal>std::string</literal> object still exists (even
      if it was initially created as a copy of another string object).
      Pointers and iterators are also invalidated when non-const
      member functions are called, or functions with a non-const
      reference parameter.  The behavior of the GCC implementation
      deviates from that required by the C++ standard if multiple
      threads are present.  In general, only the first call to a
      non-const member function after a structural modification of the
      string (such as appending a character) is invalidating, but this
      also applies to member function such as the non-const version of
      <function>begin()</function>, in violation of the C++ standard.
    </para>
    <para>
      Particular care is necessary when invoking the
      <function>c_str()</function> member function on a temporary
      object.  This is convenient for calling C functions, but the
      pointer will turn invalid as soon as the temporary object is
      destroyed, which generally happens when the outermost expression
      enclosing the expression on which <function>c_str()</function>
      is called completes evaluation.  Passing the result of
      <function>c_str()</function> to a function which does not store
      or otherwise leak that pointer is safe, though.
    </para>
    <para>
      Like with <literal>std::vector</literal> and
      <literal>std::array</literal>, subscribing with
      <literal>operator[]</literal> does not perform bounds checks.
      Use the <function>at(size_type)</function> member function
      instead.  See <xref
      linkend="sect-Defensive_Coding-CXX-Std-Subscript"/>.
      Furthermore, accessing the terminating NUL character using
      <literal>operator[]</literal> is not possible.  (In some
      implementations, the <literal>c_str()</literal> member function
      writes the NUL character on demand.)
    </para>
    <para>
      Never write to the pointers returned by
      <function>data()</function> or <function>c_str()</function>
      after casting away <literal>const</literal>.  If you need a
      C-style writable string, use a
      <literal>std::vector&lt;char&gt;</literal> object and its
      <function>data()</function> member function.  In this case, you
      have to explicitly add the terminating NUL character.
    </para>
    <para>
      GCC's implementation of <literal>std::string</literal> is
      currently based on reference counting.  It is expected that a
      future version will remove the reference counting, due to
      performance and conformance issues.  As a result, code that
      implicitly assumes sharing by holding to pointers or iterators
      for too long will break, resulting in run-time crashes or worse.
      On the other hand, non-const iterator-returning functions will
      no longer give other threads an opportunity for invalidating
      existing iterators and pointers because iterator invalidation
      does not depend on sharing of the internal character array
      object anymore.
    </para>
  </section>
  <section id="sect-Defensive_Coding-CXX-Std-Subscript">
    <title>Containers and <literal>operator[]</literal></title>
    <para>
      Many sequence containers similar to <literal>std::vector</literal>
      provide both <literal>operator[](size_type)</literal> and a
      member function <literal>at(size_type)</literal>.  This applies
      to <literal>std::vector</literal> itself,
      <literal>std::array</literal>, <literal>std::string</literal>
      and other instances of <literal>std::basic_string</literal>.
    </para>
    <para>
      <literal>operator[](size_type)</literal> is not required by the
      standard to perform bounds checking (and the implementation in
      GCC does not).  In contrast, <literal>at(size_type)</literal>
      must perform such a check.  Therefore, in code which is not
      performance-critical, you should prefer
      <literal>at(size_type)</literal> over
      <literal>operator[](size_type)</literal>, even though it is
      slightly more verbose.
    </para>
    <para>
      The <literal>front()</literal> and <literal>back()</literal>
      member functions are undefined if a vector object is empty.  You
      can use <literal>vec.at(0)</literal> and
      <literal>vec.at(vec.size() - 1)</literal> as checked
      replacements.  For an empty vector, <literal>data()</literal> is
      defined; it returns an arbitrary pointer, but not necessarily
      the NULL pointer.
    </para>
  </section>
  <section id="sect-Defensive_Coding-CXX-Std-Iterators">
    <title>Iterators</title>
    <para>
      Iterators do not perform any bounds checking.  Therefore, all
      functions that work on iterators should accept them in pairs,
      denoting a range, and make sure that iterators are not moved
      outside that range.  For forward iterators and bidirectional
      iterators, you need to check for equality before moving the
      first or last iterator in the range.  For random-access
      iterators, you need to compute the difference before adding or
      subtracting an offset.  It is not possible to perform the
      operation and check for an invalid operator afterwards.
    </para>
    <para>
      Output iterators cannot be compared for equality.  Therefore, it
      is impossible to write code that detects that it has been
      supplied an output area that is too small, and their use should
      be avoided.
    </para>
    <para>
      These issues make some of the standard library functions
      difficult to use correctly, see <xref
      linkend="sect-Defensive_Coding-CXX-Std-Functions-Unpaired_Iterators"/>.
    </para>
  </section>
 </section>
--- a/en-US/CXX.xml
+++ b/en-US/CXX.xml
@ -1,10 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-CXX">
  <title>The C++ Programming Language</title>
  <xi:include href="CXX-Language.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  <xi:include href="CXX-Std.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </chapter>
--- a/en-US/Defensive_Coding.ent
+++ b/en-US/Defensive_Coding.ent
@ -1,2 +0,0 @@
 <!ENTITY YEAR "2012-2017">
 <!ENTITY HOLDER "Red Hat, Inc">
--- a/en-US/Defensive_Coding.xml
+++ b/en-US/Defensive_Coding.xml
@ -1,33 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
 <book>
  <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  <part>
    <title>Programming Languages</title>
    <xi:include href="C.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="CXX.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Java.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Python.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Shell.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Go.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Vala.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  </part>
  <part>
    <title>Specific Programming Tasks</title>
    <xi:include href="Tasks-Library_Design.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Descriptors.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-File_System.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Temporary_Files.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Processes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Serialization.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Cryptography.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Tasks-Packaging.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  </part>
  <part>
    <title>Implementing Security Features</title>
    <xi:include href="Features-Authentication.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Features-TLS.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
    <xi:include href="Features-HSM.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  </part>
    <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </book>
--- a/en-US/Features-Authentication.xml
+++ b/en-US/Features-Authentication.xml
@ -1,189 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Authentication">
  <title>Authentication and Authorization</title>
  <section id="sect-Defensive_Coding-Authentication-Server">
    <title>Authenticating Servers</title>
    <para>
      When connecting to a server, a client has to make sure that it
      is actually talking to the server it expects.  There are two
      different aspects, securing the network path, and making sure
      that the expected user runs the process on the target host.
      There are several ways to ensure that:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  The server uses a TLS certificate which is valid according
 	  to the web browser public key infrastructure, and the client
 	  verifies the certificate and the host name.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The server uses a TLS certificate which is expected by the
 	  client (perhaps it is stored in a configuration file read by
 	  the client). In this case, no host name checking is
 	  required.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  On Linux, UNIX domain sockets (of the
 	  <literal>PF_UNIX</literal> protocol family, sometimes called
 	  <literal>PF_LOCAL</literal>) are restricted by file system
 	  permissions.  If the server socket path is not
 	  world-writable, the server identity cannot be spoofed by
 	  local users.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Port numbers less than 1024 (<emphasis>trusted
 	  ports</emphasis>) can only be used by
 	  <literal>root</literal>, so if a UDP or TCP server is
 	  running on the local host and it uses a trusted port, its
 	  identity is assured.  (Not all operating systems enforce the
 	  trusted ports concept, and the network might not be trusted,
 	  so it is only useful on the local system.)
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      TLS (<xref linkend="chap-Defensive_Coding-TLS"/>) is the
      recommended way for securing connections over untrusted
      networks.
    </para>
    <para>
      If the server port number is 1024 is higher, a local user can
      impersonate the process by binding to this socket, perhaps after
      crashing the real server by exploiting a denial-of-service
      vulnerability.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Authentication-Host_based">
    <title>Host-based Authentication</title>
    <para>
      Host-based authentication uses access control lists (ACLs) to
      accept or deny requests from clients. This authentication
      method comes in two flavors: IP-based (or, more generally,
      address-based) and name-based (with the name coming from DNS or
      <filename>/etc/hosts</filename>). IP-based ACLs often use
      prefix notation to extend access to entire subnets.  Name-based
      ACLs sometimes use wildcards for adding groups of hosts (from
      entire DNS subtrees).  (In the SSH context, host-based
      authentication means something completely different and is not
      covered in this section.)
    </para>
    <para>
      Host-based authentication trust the network and may not offer
      sufficient granularity, so it has to be considered a weak form
      of authentication.  On the other hand, IP-based authentication
      can be made extremely robust and can be applied very early in
      input processing, so it offers an opportunity for significantly
      reducing the number of potential attackers for many services.
    </para>
    <para>
      The names returned by <function>gethostbyaddr</function> and
      <function>getnameinfo</function> functions cannot be trusted.
      (DNS PTR records can be set to arbitrary values, not just names
      belong to the address owner.)  If these names are used for ACL
      matching, a forward lookup using
      <function>gethostbyaddr</function> or
      <function>getaddrinfo</function> has to be performed.  The name
      is only valid if the original address is found among the results
      of the forward lookup (<emphasis>double-reverse
      lookup</emphasis>).
    </para>
    <para>
      An empty ACL should deny all access (deny-by-default).  If empty
      ACLs permits all access, configuring any access list must switch
      to deny-by-default for all unconfigured protocols, in both
      name-based and address-based variants.
    </para>
    <para>
      Similarly, if an address or name is not matched by the list, it
      should be denied.  However, many implementations behave
      differently, so the actual behavior must be documented properly.
    </para>
    <para>
      IPv6 addresses can embed IPv4 addresses.  There is no
      universally correct way to deal with this ambiguity.  The
      behavior of the ACL implementation should be documented.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Authentication-UNIX_Domain">
    <title>UNIX Domain Socket Authentication</title>
    <para>
      UNIX domain sockets (with address family
      <literal>AF_UNIX</literal> or <literal>AF_LOCAL</literal>) are
      restricted to the local host and offer a special authentication
      mechanism: credentials passing.
    </para>
    <para>
      Nowadays, most systems support the
      <literal>SO_PEERCRED</literal> (Linux) or
      <literal>LOCAL_PEERCRED</literal> (FreeBSD) socket options, or
      the <function>getpeereid</function> (other BSDs, OS X).
      These interfaces provide direct access to the (effective) user
      ID on the other end of a domain socket connect, without
      cooperation from the other end.
    </para>
    <para>
      Historically, credentials passing was implemented using
      ancillary data in the <function>sendmsg</function> and
      <function>recvmsg</function> functions.  On some systems, only
      credentials data that the peer has explicitly sent can be
      received, and the kernel checks the data for correctness on the
      sending side.  This means that both peers need to deal with
      ancillary data.  Compared to that, the modern interfaces are
      easier to use.  Both sets of interfaces vary considerably among
      UNIX-like systems, unfortunately.
    </para>
    <para>
      If you want to authenticate based on supplementary groups, you
      should obtain the user ID using one of these methods, and look
      up the list of supplementary groups using
      <function>getpwuid</function> (or
      <function>getpwuid_r</function>) and
      <function>getgrouplist</function>.  Using the PID and
      information from <filename>/proc/PID/status</filename> is prone
      to race conditions and insecure.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Authentication-Netlink">
    <title><literal>AF_NETLINK</literal> Authentication of Origin</title>
    <!-- ??? kernel change may make this obsolete:
         https://bugzilla.redhat.com/show_bug.cgi?id=851968 -->
    <para>
      Netlink messages are used as a high-performance data transfer
      mechanism between the kernel and the user space.  Traditionally,
      they are used to exchange information related to the network
      stack, such as routing table entries.
    </para>
    <para>
      When processing Netlink messages from the kernel, it is
      important to check that these messages actually originate from
      the kernel, by checking that the port ID (or PID) field
      <literal>nl_pid</literal> in the <literal>sockaddr_nl</literal>
      structure is <literal>0</literal>.  (This structure can be
      obtained using <function>recvfrom</function> or
      <function>recvmsg</function>, it is different from the
      <literal>nlmsghdr</literal> structure.)  The kernel does not
      prevent other processes from sending unicast Netlink messages,
      but the <literal>nl_pid</literal> field in the sender's socket
      address will be non-zero in such cases.
    </para>
    <para>
      Applications should not use <literal>AF_NETLINK</literal>
      sockets as an IPC mechanism among processes, but prefer UNIX
      domain sockets for this tasks.
    </para>
  </section>
 </chapter>
--- a/en-US/Features-HSM.xml
+++ b/en-US/Features-HSM.xml
@ -1,180 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-HSM">
  <title>Hardware Security Modules and Smart Cards</title>
  <para>
      Hardware Security Modules (HSMs) are specialized hardware intended
      to protect private keys on server systems. They store internally
      the private keys (e.g., RSA keys), and provide access to operations
      with the keys without exposing the keys. That access, is provided using
      a standardized API, which across Fedora is PKCS#11.
  </para>
  <para>
      Smart cards are small cards with a micro processor, often combined with a
      USB reader resembling a USB stick. They are very similar in nature with
      HSMs as they can also be used to protect private keys and are almost
      universally accessed via the PKCS#11 API. The main distinguishers from HSMs
      is their inferior performance and often, the available hardware protection mechanisms.
  </para>
  <para>
      Typically a smart card or HSM relies on a shared library to provide functionality.
      This shared library follows the PKCS#11 API and thus is often referred to as 
      a PKCS#11 module. In Fedora the <literal>opensc</literal>
      shared module (<literal>opensc-pkcs11.so</literal>) can be used for the majority
      of smart cards available in the market. By convention these modules are located
      at <literal>/usr/lib64/pkcs11</literal>. They can be used directly, or via
      a higher level library.
  </para>
  <para>
      All the major crypto libraries (NSS, GnuTLS and OpenSSL in Fedora) support
      hardware security modules and smart cards, by providing wrappers over the
      PKCS#11 API. However, the level of support varies, as well as the ease of
      use of such modules and its integration to the overall library API.
  </para>
  <itemizedlist>
    <listitem>
    <para>
      The PKCS#11 API does provide an API to access HSMs or smart cards, but
      does not provide any method of discovering which HSMs or smart cards are
      available in the system. In Fedora and modules are registered via <ulink url="https://p11-glue.freedesktop.org/doc/p11-kit/pkcs11-conf.html">p11-kit
      configuration files</ulink>, stored at <literal>/etc/pkcs11/modules/</literal>. For applications using
      <literal>engine_pkcs11</literal> or GnuTLS the registered modules are
      available without further configuration. Other applications will have to load
      the <literal>p11-kit-proxy.so</literal> module.
    </para>
    </listitem>
    <listitem>
    <para>
      Most crypto libraries support the <ulink url="https://tools.ietf.org/html/rfc7512">PKCS#11 URLs scheme</ulink>
      to identify objects stored in an HSM, however that support is not yet universal.
      Some support transparent usage of PKCS#11 objects, e.g., specifying
      a PKCS#11 object instead of a file, while others require to use
      specialized APIs for such objects.
    </para>
    </listitem>
    <listitem>
    <para>
       Objects stored in an HSM or smart card can be protected with a PIN. As such,
       libraries typically require to set a PIN handling function for accessing private keys,
       or the PIN can be passed along with a PKCS#11 URL and the pin-value parameter.
    </para>
    </listitem>
    <listitem>
    <para>
 	Obtaining a Hardware Security Module, or including it on a continuous integration
 	testing is not always feasible. For testing purposes smart cards supported by the OpenSC
 	project can be used, as well as software modules like <literal>softhsm</literal> which
 	provides a tool to setup a software HSM, and a PKCS#11 library.
    </para>
    </listitem>
    <listitem>
    <para>
 	The PKCS#11 API requires applications that use fork to reinitialize the used PKCS#11
 	modules. This is an uncommon requirement, which has led to several bugs across
 	applications in Fedora which used PKCS#11 directly. To make things more complicated
 	software PKCS#11 module like <literal>softhsm</literal> do not require this re-initialization
 	leading to applications working against software modules but failing with hardware
 	modules or smart cards. The wrapper PKCS#11 APIs provided by NSS, GNUTLS and
 	engine_pkcs11 (OpenSSL) handle the reinitialization after fork requirement transparently.
    </para>
    </listitem>
  </itemizedlist>
  <section id="sect-Defensive_Coding-HSM-OpenSSL">
    <title>OpenSSL HSM Support</title>
    <para>
 	OpenSSL does not have native support for PKCS#11. It can
 	provide PKCS#11 support through the OpenSC's project 
 	<literal>pkcs11</literal> engine (formerly known as <literal>engine_pkcs11</literal>).
 	As such software intended to use HSMs, must utilize that engine.
    </para>
    <para>
        Engine <literal>pkcs11</literal> supports loading stored objects via PKCS#11 URLs.
        If no PKCS#11 module is specified the engine will use the system-wide registered
        modules via <literal>p11-kit-proxy.so</literal>. 
    </para>
    <para>
      The following example demonstrates the initialization of the pkcs11 engine
      and its usage to sign data.
    </para>
    <example id="ex-Defensive_Coding-HSM-OpenSSL">
 	<title>Signing data with HSM and OpenSSL</title>
 	<xi:include href="snippets/Features-HSM-OpenSSL.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
  </section>
  <section id="sect-Defensive_Coding-HSM-GNUTLS">
    <title>GNUTLS HSM Support</title>
    <para>
 	GNUTLS supports PKCS#11 natively. Most of the API functions
 	accepting certificate files, can also accept PKCS#11 URLs, thus
 	requiring minor or no modifications to applications in order
 	to support HSMs. In most cases applications must be modified
 	to install a PIN callback function.
    </para>
    <para>
      The following example demonstrates the initialization of the pkcs11 engine
      and its usage to sign data.
    </para>
    <example id="ex-Defensive_Coding-HSM-GNUTLS">
 	<title>Signing data with HSM and GnuTLS</title>
 	<xi:include href="snippets/Features-HSM-GNUTLS.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      The PIN callback function can be either set globally as in
      the example above or locally by utilizing functions such as <literal>gnutls_privkey_set_pin_function</literal>.
      An example PIN callback function is shown below.
    </para>
    <example id="ex-Defensive_Coding-HSM-GNUTLS-PIN">
 	<title>An example PIN callback with GNUTLS</title>
 	<xi:include href="snippets/Features-HSM-GNUTLS-PIN.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
  </section>
  <section id="sect-Defensive_Coding-HSM-NSS">
    <title>NSS HSM Support</title>
    <para>
 	NSS supports PKCS#11 natively. In fact all NSS crypto operations,
        including built-in operations, go through PKCS #11 modules. NSS provides
        its own software PKCS #11 module called softoken. NSS automatically
        loads any PKCS #11 module specified in its module database, which can
        be manipulated with the modutil command. NSS uses the PKCS #11 module
        that contains the requested keys to do the crypto operations. As long as
        the application opens an NSS database and properly sets a pin callback. If
        it runs with native NSS, it should be able to use HSMs that provide PKCS #11
        modules. Modules can also be loaded programatically, though this is less common.
    </para>
    <!-- loaded programatically = loaded automatically? -->
    <para>
      The following example demonstrates a typical NSS application for signing.
    </para>
    <example id="ex-Defensive_Coding-HSM-NSS">
 	<title>Signing data with HSM and NSS</title>
 	<xi:include href="snippets/Features-HSM-NSS.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      To use the example above with an HSM or smart card you will need to do the following.
    </para>
    <informalexample>
      <programlisting language="Bash">
 # add your HSM or token library to an NSS database (in the sample code the database is
 # located in the current directory'.')
 $ modutil -add "My HSM" -libfile ${path_to_pkcs11_file} -dbdir .
 # Find the token name on your HSM
 $ modutil -list -dbdir .
 # find the cert on your token
 $ certutil -L -h ${token_name} -d .
 # pass the cert to your signing program
 $ NSS_Sign_Example "${token_name}:${cert_name}"
      </programlisting>
    </informalexample>
    <example id="ex-Defensive_Coding-HSM-NSS-PIN">
 	<title>An example PIN callback with NSS</title>
 	<xi:include href="snippets/Features-HSM-NSS-PIN.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
  </section>
 </chapter>
--- a/en-US/Features-TLS.xml
+++ b/en-US/Features-TLS.xml
@ -1,941 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-TLS">
  <title>Transport Layer Security (TLS)</title>
  <para>
    Transport Layer Security (TLS, formerly Secure Sockets
    Layer/SSL) is the recommended way to to protect integrity and
    confidentiality while data is transferred over an untrusted
    network connection, and to identify the endpoint. At this
    chapter we describe the available libraries in Fedora as well
    as known pitfalls, and safe ways to write applications with them.
  </para>
  <para>
      When using any library, in addition to this guide, it is recommended to consult the
      library' documentation.
  </para>
  <itemizedlist>
    <listitem><para><ulink url="https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS">NSS documentation</ulink></para></listitem>
    <listitem><para><ulink url="http://www.gnutls.org/manual/">GNUTLS documentation</ulink></para></listitem>
    <listitem><para><ulink url="https://www.openssl.org/docs/">OpenSSL documentation</ulink></para></listitem>
    <listitem><para><ulink url="https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html">OpenJDK documentation</ulink></para></listitem>
  </itemizedlist>
  <section id="sect-Defensive_Coding-TLS-Pitfalls">
    <title>Common Pitfalls</title>
    <para>
      TLS implementations are difficult to use, and most of them lack
      a clean API design.  The following sections contain
      implementation-specific advice, and some generic pitfalls are
      mentioned below.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Most TLS implementations have questionable default TLS
 	  cipher suites.  Most of them enable anonymous Diffie-Hellman
 	  key exchange (but we generally want servers to authenticate
 	  themselves).  Many do not disable ciphers which are subject
 	  to brute-force attacks because of restricted key lengths.
 	  Some even disable all variants of AES in the default
 	  configuration.
 	</para>
 	<para>
 	  When overriding the cipher suite defaults, it is recommended
 	  to disable all cipher suites which are not present on a
 	  whitelist, instead of simply enabling a list of cipher
 	  suites.  This way, if an algorithm is disabled by default in
 	  the TLS implementation in a future security update, the
 	  application will not re-enable it.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The name which is used in certificate validation must match
 	  the name provided by the user or configuration file.  No host
 	  name canonicalization or IP address lookup must be performed.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The TLS handshake has very poor performance if the TCP Nagle
 	  algorithm is active.  You should switch on the
 	  <literal>TCP_NODELAY</literal> socket option (at least for the
 	  duration of the handshake), or use the Linux-specific
 	  <literal>TCP_CORK</literal> option.
 	</para>
 	<example id="ex-Defensive_Coding-TLS-Nagle">
 	  <title>Deactivating the TCP Nagle algorithm</title>
 	  <xi:include href="snippets/Features-TLS-Nagle.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
      </listitem>
      <listitem>
 	<para>
 	  Implementing proper session resumption decreases handshake
 	  overhead considerably.  This is important if the upper-layer
 	  protocol uses short-lived connections (like most application
 	  of HTTPS).
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Both client and server should work towards an orderly
 	  connection shutdown, that is send
 	  <literal>close_notify</literal> alerts and respond to them.
 	  This is especially important if the upper-layer protocol
 	  does not provide means to detect connection truncation (like
 	  some uses of HTTP).
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  When implementing a server using event-driven programming,
 	  it is important to handle the TLS handshake properly because
 	  it includes multiple network round-trips which can block
 	  when an ordinary TCP <function>accept</function> would not.
 	  Otherwise, a client which fails to complete the TLS
 	  handshake for some reason will prevent the server from
 	  handling input from other clients.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Unlike regular file descriptors, TLS connections cannot be
 	  passed between processes.  Some TLS implementations add
 	  additional restrictions, and TLS connections generally
 	  cannot be used across <function>fork</function> function
 	  calls (see <xref
 	  linkend="sect-Defensive_Coding-Tasks-Processes-Fork-Parallel"/>).
 	</para>
      </listitem>
    </itemizedlist>
    <section id="sect-Defensive_Coding-TLS-OpenSSL">
      <title>OpenSSL Pitfalls</title>
      <para>
 	Some OpenSSL function use <emphasis>tri-state return
 	values</emphasis>.  Correct error checking is extremely
 	important.  Several functions return <literal>int</literal>
 	values with the following meaning:
      </para>
      <itemizedlist>
 	<listitem>
 	  <para>
 	    The value <literal>1</literal> indicates success (for
 	    example, a successful signature verification).
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The value <literal>0</literal> indicates semantic
 	    failure (for example, a signature verification which was
 	    unsuccessful because the signing certificate was
 	    self-signed).
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The value <literal>-1</literal> indicates a low-level
 	    error in the system, such as failure to allocate memory
 	    using <function>malloc</function>.
 	  </para>
 	</listitem>
      </itemizedlist>
      <para>
 	Treating such tri-state return values as booleans can lead
 	to security vulnerabilities.  Note that some OpenSSL
 	functions return boolean results or yet another set of
 	status indicators.  Each function needs to be checked
 	individually.
      </para>
      <para>
 	Recovering precise error information is difficult.
 	<xref linkend="ex-Defensive_Coding-TLS-OpenSSL-Errors"/>
 	shows how to obtain a more precise error code after a function
 	call on an <literal>SSL</literal> object has failed.  However,
 	there are still cases where no detailed error information is
 	available (e.g., if <function>SSL_shutdown</function> fails
 	due to a connection teardown by the other end).
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenSSL-Errors">
 	<title>Obtaining OpenSSL error codes</title>
 	<xi:include href="snippets/Features-TLS-OpenSSL-Errors.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	The <function>OPENSSL_config</function> function is
 	documented to never fail.  In reality, it can terminate the
 	entire process if there is a failure accessing the
 	configuration file.  An error message is written to standard
 	error, but which might not be visible if the function is
 	called from a daemon process.
      </para>
      <para>
 	OpenSSL contains two separate ASN.1 DER decoders.  One set
 	of decoders operate on BIO handles (the input/output stream
 	abstraction provided by OpenSSL); their decoder function
 	names start with <literal>d2i_</literal> and end in
 	<literal>_fp</literal> or <literal>_bio</literal> (e.g.,
 	<function>d2i_X509_fp</function> or
 	<function>d2i_X509_bio</function>).  These decoders must not
 	be used for parsing data from untrusted sources; instead,
 	the variants without the <literal>_fp</literal> and
 	<literal>_bio</literal> (e.g.,
 	<function>d2i_X509</function>) shall be used.  The BIO
 	variants have received considerably less testing and are not
 	very robust.
      </para>
      <para>
 	For the same reason, the OpenSSL command line tools (such as
 	<command>openssl x509</command>) are generally generally less
 	robust than the actual library code.  They use the BIO
 	functions internally, and not the more robust variants.
      </para>
      <para>
 	The command line tools do not always indicate failure in the
 	exit status of the <application>openssl</application> process.
 	For instance, a verification failure in <command>openssl
 	verify</command> result in an exit status of zero.
      </para>
      <para>
 	OpenSSL command-line commands, such as <command>openssl
 	genrsa</command>, do not ensure that physical entropy is used
 	for key generation—they obtain entropy from
 	<filename>/dev/urandom</filename> and other sources, but not
 	from <filename>/dev/random</filename>.  This can result in
 	weak keys if the system lacks a proper entropy source (e.g., a
 	virtual machine with solid state storage).  Depending on local
 	policies, keys generated by these OpenSSL tools should not be
 	used in high-value, critical functions.
      </para>
      <para>
 	The OpenSSL server and client applications (<command>openssl
 	s_client</command> and <command>openssl s_server</command>)
 	are debugging tools and should <emphasis>never</emphasis> be
 	used as generic clients.  For instance, the
 	<application>s_client</application> tool reacts in a
 	surprising way to lines starting with <literal>R</literal> and
 	<literal>Q</literal>.
      </para>
      <para>
 	OpenSSL allows application code to access private key
 	material over documented interfaces.  This can significantly
 	increase the part of the code base which has to undergo
 	security certification.
      </para>
    </section>
    <section id="sect-Defensive_Coding-TLS-Pitfalls-GNUTLS">
      <title>GNUTLS Pitfalls</title>
      <para>
 	Older versions of GNUTLS had several peculiarities described
 	in previous versions of this guide; as of GNUTLS 3.3.10, these 
 	issues are no longer applicable.
      </para>
    </section>
    <section id="sect-Defensive_Coding-TLS-Pitfalls-OpenJDK">
      <title>OpenJDK Pitfalls</title>
      <para>
 	The Java cryptographic framework is highly modular.  As a
 	result, when you request an object implementing some
 	cryptographic functionality, you cannot be completely sure
 	that you end up with the well-tested, reviewed implementation
 	in OpenJDK.
      </para>
      <para>
 	OpenJDK (in the source code as published by Oracle) and other
 	implementations of the Java platform require that the system
 	administrator has installed so-called <emphasis>unlimited
 	strength jurisdiction policy files</emphasis>.  Without this
 	step, it is not possible to use the secure algorithms which
 	offer sufficient cryptographic strength.  Most downstream
 	redistributors of OpenJDK remove this requirement.
      </para>
      <para>
 	Some versions of OpenJDK use <filename>/dev/random</filename>
 	as the randomness source for nonces and other random data
 	which is needed for TLS operation, but does not actually
 	require physical randomness.  As a result, TLS applications
 	can block, waiting for more bits to become available in
 	<filename>/dev/random</filename>.
      </para>
    </section>
    <section id="sect-Defensive_Coding-TLS-Pitfalls-NSS">
      <title>NSS Pitfalls</title>
      <para>
 	NSS was not designed to be used by other libraries which can
 	be linked into applications without modifying them.  There is
 	a lot of global state.  There does not seem to be a way to
 	perform required NSS initialization without race conditions.
      </para>
      <para>
 	If the NSPR descriptor is in an unexpected state, the
 	<function>SSL_ForceHandshake</function> function can succeed,
 	but no TLS handshake takes place, the peer is not
 	authenticated, and subsequent data is exchanged in the clear.
      </para>
      <para>
 	NSS disables itself if it detects that the process underwent a
 	<function>fork</function> after the library has been
 	initialized.  This behavior is required by the PKCS#11 API
 	specification.
      </para>
    </section>
  </section>
  <section id="sect-Defensive_Coding-TLS-Client">
    <title>TLS Clients</title>
    <para>
      Secure use of TLS in a client generally involves all of the
      following steps.  (Individual instructions for specific TLS
      implementations follow in the next sections.)
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  The client must configure the TLS library to use a set of
 	  trusted root certificates.  These certificates are provided
 	  by the system in various formats and files. These are documented in <literal>update-ca-trust</literal>
 	  man page in Fedora. Portable applications should not hard-code
 	  any paths; they should rely on APIs which set the default
 	  for the system trust store.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The client selects sufficiently strong cryptographic
 	  primitives and disables insecure ones (such as no-op
 	  encryption). Compression support and SSL version 3 or lower must be
 	  disabled (including the SSLv2-compatible handshake).
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The client initiates the TLS connection.  The Server Name
 	  Indication extension should be used if supported by the
 	  TLS implementation.  Before switching to the encrypted
 	  connection state, the contents of all input and output
 	  buffers must be discarded.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The client needs to validate the peer certificate provided
 	  by the server, that is, the client must check that there
 	  is a cryptographically protected chain from a trusted root
 	  certificate to the peer certificate.  (Depending on the
 	  TLS implementation, a TLS handshake can succeed even if
 	  the certificate cannot be validated.)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The client must check that the configured or user-provided
 	  server name matches the peer certificate provided by the
 	  server.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      It is safe to provide users detailed diagnostics on
      certificate validation failures.  Other causes of handshake
      failures and, generally speaking, any details on other errors
      reported by the TLS implementation (particularly exception
      tracebacks), must not be divulged in ways that make them
      accessible to potential attackers.  Otherwise, it is possible
      to create decryption oracles.
    </para>
      <important>
 	<para>
 	  Depending on the application, revocation checking (against
 	  certificate revocations lists or via OCSP) and session
 	  resumption are important aspects of production-quality
 	  client.  These aspects are not yet covered.
 	</para>
      </important>
    <section>
      <title>Implementation TLS Clients With OpenSSL</title>
      <para>
 	In the following code, the error handling is only exploratory.
 	Proper error handling is required for production use,
 	especially in libraries.
 	<!-- FIXME: Cross-reference event-driven I/O section when it
 	     exists and mention that this is really quite complex to
 	     implement.  -->
      </para>
      <para>
 	The OpenSSL library needs explicit initialization (see <xref
 	linkend="ex-Defensive_Coding-TLS-OpenSSL-Init"/>).
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenSSL-Init">
 	<title>OpenSSL library initialization</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenSSL-Init.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	After that, a context object has to be created, which acts as
 	a factory for connection objects (<xref
 	linkend="ex-Defensive_Coding-TLS-Client-OpenSSL-CTX"/>).  We
 	use an explicit cipher list so that we do not pick up any
 	strange ciphers when OpenSSL is upgraded.  The actual version
 	requested in the client hello depends on additional
 	restrictions in the OpenSSL library.  If possible, you should
 	follow the example code and use the default list of trusted
 	root certificate authorities provided by the system because
 	you would have to maintain your own set otherwise, which can
 	be cumbersome.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-OpenSSL-CTX">
 	<title>OpenSSL client context creation</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenSSL-CTX.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	A single context object can be used to create multiple
 	connection objects.  It is safe to use the same
 	<literal>SSL_CTX</literal> object for creating connections
 	concurrently from multiple threads, provided that the
 	<literal>SSL_CTX</literal> object is not modified (e.g.,
 	callbacks must not be changed).
      </para>
      <para>
 	After creating the TCP socket and disabling the Nagle
 	algorithm (per <xref
 	linkend="ex-Defensive_Coding-TLS-Nagle"/>), the actual
 	connection object needs to be created, as show in <xref
 	linkend="ex-Defensive_Coding-TLS-Client-OpenSSL-CTX"/>.  If
 	the handshake started by <function>SSL_connect</function>
 	fails, the <function>ssl_print_error_and_exit</function>
 	function from <xref
 	linkend="ex-Defensive_Coding-TLS-OpenSSL-Errors"/> is called.
      </para>
      <para>
 	The <function>certificate_validity_override</function>
 	function provides an opportunity to override the validity of
 	the certificate in case the OpenSSL check fails.  If such
 	functionality is not required, the call can be removed,
 	otherwise, the application developer has to implement it.
      </para>
      <para>
 	The host name passed to the functions
 	<function>SSL_set_tlsext_host_name</function> and
 	<function>X509_check_host</function> must be the name that was
 	passed to <function>getaddrinfo</function> or a similar name
 	resolution function.  No host name canonicalization must be
 	performed.  The <function>X509_check_host</function> function
 	used in the final step for host name matching is currently
 	only implemented in OpenSSL 1.1, which is not released yet.
 	In case host name matching fails, the function
 	<function>certificate_host_name_override</function> is called.
 	This function should check user-specific certificate store, to
 	allow a connection even if the host name does not match the
 	certificate.  This function has to be provided by the
 	application developer.  Note that the override must be keyed
 	by both the certificate <emphasis>and</emphasis> the host
 	name.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-OpenSSL-Connect">
 	<title>Creating a client connection using OpenSSL</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenSSL-Connect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	The connection object can be used for sending and receiving
 	data, as in <xref
 	linkend="ex-Defensive_Coding-TLS-OpenSSL-Connection-Use"/>.
 	It is also possible to create a <literal>BIO</literal> object
 	and use the <literal>SSL</literal> object as the underlying
 	transport, using <function>BIO_set_ssl</function>.
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenSSL-Connection-Use">
 	<title>Using an OpenSSL connection to send and receive data</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenSSL-Connection-Use.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	When it is time to close the connection, the
 	<function>SSL_shutdown</function> function needs to be called
 	twice for an orderly, synchronous connection termination
 	(<xref
 	linkend="ex-Defensive_Coding-TLS-OpenSSL-Connection-Close"/>).
 	This exchanges <literal>close_notify</literal> alerts with the
 	server.  The additional logic is required to deal with an
 	unexpected <literal>close_notify</literal> from the server.
 	Note that is necessary to explicitly close the underlying
 	socket after the connection object has been freed.
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenSSL-Connection-Close">
 	<title>Closing an OpenSSL connection in an orderly fashion</title>
 	<xi:include href="snippets/Features-TLS-OpenSSL-Connection-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	<xref linkend="ex-Defensive_Coding-TLS-OpenSSL-Context-Close"/> shows how
 	to deallocate the context object when it is no longer needed
 	because no further TLS connections will be established.
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenSSL-Context-Close">
 	<title>Closing an OpenSSL connection in an orderly fashion</title>
 	<xi:include href="snippets/Features-TLS-OpenSSL-Context-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
    </section>
    <section id="sect-Defensive_Coding-TLS-Client-GNUTLS">
      <title>Implementation TLS Clients With GNUTLS</title>
      <para>
 	This section describes how to implement a TLS client with full
 	certificate validation (but without certificate revocation
 	checking).  Note that the error handling in is only
 	exploratory and needs to be replaced before production use.
      </para>
      <para>
 	Before setting up TLS connections, a credentials objects has
 	to be allocated and initialized with the set of trusted root
 	CAs (<xref
 	linkend="ex-Defensive_Coding-TLS-Client-GNUTLS-Credentials"/>).
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-GNUTLS-Credentials">
 	<title>Initializing a GNUTLS credentials structure</title>
 	<xi:include href="snippets/Features-TLS-Client-GNUTLS-Credentials.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	After the last TLS connection has been closed, this credentials
 	object should be freed:
      </para>
      <informalexample>
 	<xi:include href="snippets/Features-TLS-GNUTLS-Credentials-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	During its lifetime, the credentials object can be used to
 	initialize TLS session objects from multiple threads, provided
 	that it is not changed.
      </para>
      <para>
 	Once the TCP connection has been established, the Nagle
 	algorithm should be disabled (see <xref
 	linkend="ex-Defensive_Coding-TLS-Nagle"/>).  After that, the
 	socket can be associated with a new GNUTLS session object.
 	The previously allocated credentials object provides the set
 	of root CAs.  Then the TLS handshake must be initiated. 
 	This is shown in <xref
 	linkend="ex-Defensive_Coding-TLS-Client-GNUTLS-Connect"/>.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-GNUTLS-Connect">
 	<title>Establishing a TLS client connection using GNUTLS</title>
 	<xi:include href="snippets/Features-TLS-Client-GNUTLS-Connect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	After the handshake has been completed, the server certificate
 	needs to be verified against the server's hostname (<xref
 	linkend="ex-Defensive_Coding-TLS-Client-GNUTLS-Verify"/>).  In
 	the example, the user-defined
 	<function>certificate_validity_override</function> function is
 	called if the verification fails, so that a separate,
 	user-specific trust store can be checked.  This function call
 	can be omitted if the functionality is not needed.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-GNUTLS-Verify">
 	<title>Verifying a server certificate using GNUTLS</title>
 	<xi:include href="snippets/Features-TLS-Client-GNUTLS-Verify.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	An established TLS session can be used for sending and
 	receiving data, as in <xref
 	linkend="ex-Defensive_Coding-TLS-GNUTLS-Use"/>.
      </para>
      <example id="ex-Defensive_Coding-TLS-GNUTLS-Use">
 	<title>Using a GNUTLS session</title>
 	<xi:include href="snippets/Features-TLS-GNUTLS-Use.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	In order to shut down a connection in an orderly manner, you
 	should call the <function>gnutls_bye</function> function.
 	Finally, the session object can be deallocated using
 	<function>gnutls_deinit</function> (see <xref
 	linkend="ex-Defensive_Coding-TLS-GNUTLS-Disconnect"/>).
      </para>
      <example id="ex-Defensive_Coding-TLS-GNUTLS-Disconnect">
 	<title>Closing a GNUTLS session in an orderly fashion</title>
 	<xi:include href="snippets/Features-TLS-GNUTLS-Disconnect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
    </section>
    <section id="sect-Defensive_Coding-TLS-Client-OpenJDK">
      <title>Implementing TLS Clients With OpenJDK</title>
      <para>
 	The examples below use the following cryptographic-related
 	classes:
      </para>
      <informalexample>
 	<xi:include href="snippets/Features-TLS-Client-OpenJDK-Import.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	If compatibility with OpenJDK 6 is required, it is necessary
 	to use the internal class
 	<literal>sun.security.util.HostnameChecker</literal>.  (The
 	public OpenJDK API does not provide any support for dissecting
 	the subject distinguished name of an X.509 certificate, so a
 	custom-written DER parser is needed—or we have to use an
 	internal class, which we do below.)  In OpenJDK 7, the
 	<function>setEndpointIdentificationAlgorithm</function> method
 	was added to the
 	<literal>javax.net.ssl.SSLParameters</literal> class,
 	providing an official way to implement host name checking.
      </para>
      <para>
 	TLS connections are established using an
 	<literal>SSLContext</literal> instance.  With a properly
 	configured OpenJDK installation, the
 	<literal>SunJSSE</literal> provider uses the system-wide set
 	of trusted root certificate authorities, so no further
 	configuration is necessary.  For backwards compatibility with
 	OpenJDK&nbsp;6, the <literal>TLSv1</literal> provider has to
 	be supported as a fall-back option.  This is shown in <xref
 	linkend="ex-Defensive_Coding-TLS-Client-OpenJDK-Context"/>.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-OpenJDK-Context">
 	<title>Setting up an <literal>SSLContext</literal> for OpenJDK TLS
 	clients</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenJDK-Context.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	In addition to the context, a TLS parameter object will be
 	needed which adjusts the cipher suites and protocols (<xref
 	linkend="ex-Defensive_Coding-TLS-OpenJDK-Parameters"/>).  Like
 	the context, these parameters can be reused for multiple TLS
 	connections.
      </para>
      <example id="ex-Defensive_Coding-TLS-OpenJDK-Parameters">
 	<title>Setting up <literal>SSLParameters</literal> for TLS use
 	with OpenJDK</title>
 	<xi:include href="snippets/Features-TLS-OpenJDK-Parameters.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	As initialized above, the parameter object does not yet
 	require host name checking.  This has to be enabled
 	separately, and this is only supported by OpenJDK 7 and later:
      </para>
      <informalexample>
 	<xi:include href="snippets/Features-TLS-Client-OpenJDK-Hostname.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	All application protocols can use the
 	<literal>"HTTPS"</literal> algorithm.  (The algorithms have
 	minor differences with regard to wildcard handling, which
 	should not matter in practice.)
      </para>
      <para>
 	<xref linkend="ex-Defensive_Coding-TLS-Client-OpenJDK-Connect"/>
 	shows how to establish the connection.  Before the handshake
 	is initialized, the protocol and cipher configuration has to
 	be performed, by applying the parameter object
 	<literal>params</literal>.  (After this point, changes to
 	<literal>params</literal> will not affect this TLS socket.)
 	As mentioned initially, host name checking requires using an
 	internal API on OpenJDK 6.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-OpenJDK-Connect">
 	<title>Establishing a TLS connection with OpenJDK</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenJDK-Connect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	Starting with OpenJDK 7, the last lines can be omitted,
 	provided that host name verification has been enabled by
 	calling the
 	<function>setEndpointIdentificationAlgorithm</function> method
 	on the <literal>params</literal> object (before it was applied
 	to the socket).
      </para>
      <para>
 	The TLS socket can be used as a regular socket, as shown in
 	<xref linkend="ex-Defensive_Coding-TLS-Client-OpenJDK-Use"/>.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-OpenJDK-Use">
 	<title>Using a TLS client socket in OpenJDK</title>
 	<xi:include href="snippets/Features-TLS-Client-OpenJDK-Use.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <section>
 	<title>Overriding server certificate validation with OpenJDK 6</title>
 	<para>
 	  Overriding certificate validation requires a custom trust
 	  manager.  With OpenJDK 6, the trust manager lacks
 	  information about the TLS session, and to which server the
 	  connection is made.  Certificate overrides have to be tied
 	  to specific servers (host names).  Consequently, different
 	  <literal>TrustManager</literal> and
 	  <literal>SSLContext</literal> objects have to be used for
 	  different servers.
 	</para>
 	<para>
 	  In the trust manager shown in <xref
 	  linkend="ex-Defensive_Coding-TLS-Client-MyTrustManager"/>,
 	  the server certificate is identified by its SHA-256 hash.
 	</para>
 	<example id="ex-Defensive_Coding-TLS-Client-MyTrustManager">
 	  <title>A customer trust manager for OpenJDK TLS clients</title>
 	  <xi:include href="snippets/Features-TLS-Client-OpenJDK-MyTrustManager.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
 	<para>
 	  This trust manager has to be passed to the
 	  <literal>init</literal> method of the
 	  <literal>SSLContext</literal> object, as show in <xref
 	  linkend="ex-Defensive_Coding-TLS-Client-Context_For_Cert"/>.
 	</para>
 	<example id="ex-Defensive_Coding-TLS-Client-Context_For_Cert">
 	  <title>Using a custom TLS trust manager with OpenJDK</title>
 	  <xi:include href="snippets/Features-TLS-Client-OpenJDK-Context_For_Cert.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
 	<para>
 	  When certificate overrides are in place, host name
 	  verification should not be performed because there is no
 	  security requirement that the host name in the certificate
 	  matches the host name used to establish the connection (and
 	  it often will not).  However, without host name
 	  verification, it is not possible to perform transparent
 	  fallback to certification validation using the system
 	  certificate store.
 	</para>
 	<para>
 	  The approach described above works with OpenJDK 6 and later
 	  versions.  Starting with OpenJDK 7, it is possible to use a
 	  custom subclass of the
 	  <literal>javax.net.ssl.X509ExtendedTrustManager</literal>
 	  class.  The OpenJDK TLS implementation will call the new
 	  methods, passing along TLS session information.  This can be
 	  used to implement certificate overrides as a fallback (if
 	  certificate or host name verification fails), and a trust
 	  manager object can be used for multiple servers because the
 	  server address is available to the trust manager.
 	</para>
      </section>
    </section>
    <section id="sect-Defensive_Coding-TLS-Client-NSS">
      <title>Implementing TLS Clients With NSS</title>
      <para>
 	The following code shows how to implement a simple TLS client
 	using NSS.  These instructions apply to NSS version 3.14 and
 	later.  Versions before 3.14 need different initialization
 	code.
 	</para>
 	<para>
 	  Keep in mind that the error handling needs to be improved
 	  before the code can be used in production.
      </para>
      <para>
 	Using NSS needs several header files, as shown in 
 	<xref linkend="ex-Defensive_Coding-TLS-NSS-Includes"/>.
      </para>
      <example id="ex-Defensive_Coding-TLS-NSS-Includes">
 	<title>Include files for NSS</title>
 	<xi:include href="snippets/Features-TLS-NSS-Includes.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	Initializing the NSS library is shown in <xref
 	linkend="ex-Defensive_Coding-TLS-NSS-Init"/>.  This
 	initialization procedure overrides global state.  We only call
 	<function>NSS_SetDomesticPolicy</function> if there are no
 	strong ciphers available, assuming that it has already been
 	called otherwise.  This avoids overriding the process-wide
 	cipher suite policy unnecessarily.
      </para>
      <para>
 	The simplest way to configured the trusted root certificates
 	involves loading the <filename>libnssckbi.so</filename> NSS
 	module with a call to the
 	<function>SECMOD_LoadUserModule</function> function.  The root
 	certificates are compiled into this module.  (The PEM module
 	for NSS, <filename>libnsspem.so</filename>, offers a way to
 	load trusted CA certificates from a file.)
      </para>
      <example id="ex-Defensive_Coding-TLS-NSS-Init">
 	<title>Initializing the NSS library</title>
 	<xi:include href="snippets/Features-TLS-NSS-Init.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	Some of the effects of the initialization can be reverted with
 	the following function calls:
      </para>
      <informalexample id="ex-Defensive_Coding-TLS-NSS-Close">
 	<xi:include href="snippets/Features-TLS-NSS-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	After NSS has been initialized, the TLS connection can be
 	created (<xref
 	linkend="ex-Defensive_Coding-TLS-Client-NSS-Connect"/>).  The
 	internal <function>PR_ImportTCPSocket</function> function is
 	used to turn the POSIX file descriptor
 	<literal>sockfd</literal> into an NSPR file descriptor.  (This
 	function is de-facto part of the NSS public ABI, so it will
 	not go away.)  Creating the TLS-capable file descriptor
 	requires a <emphasis>model</emphasis> descriptor, which is
 	configured with the desired set of protocols.  The model
 	descriptor is not needed anymore after TLS support has been
 	activated for the existing connection descriptor.
      </para>
      <para>
 	The call to <function>SSL_BadCertHook</function> can be
 	omitted if no mechanism to override certificate verification
 	is needed.  The <literal>bad_certificate</literal> function
 	must check both the host name specified for the connection and
 	the certificate before granting the override.
      </para>
      <para>
 	Triggering the actual handshake requires three function calls,
 	<function>SSL_ResetHandshake</function>,
 	<function>SSL_SetURL</function>, and
 	<function>SSL_ForceHandshake</function>.  (If
 	<function>SSL_ResetHandshake</function> is omitted,
 	<function>SSL_ForceHandshake</function> will succeed, but the
 	data will not be encrypted.)  During the handshake, the
 	certificate is verified and matched against the host name.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-NSS-Connect">
 	<title>Creating a TLS connection with NSS</title>
 	<xi:include href="snippets/Features-TLS-Client-NSS-Connect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	After the connection has been established, <xref
 	linkend="ex-Defensive_Coding-TLS-NSS-Use"/> shows how to use
 	the NSPR descriptor to communicate with the server.
      </para>
      <example id="ex-Defensive_Coding-TLS-NSS-Use">
 	<title>Using NSS for sending and receiving data</title>
 	<xi:include href="snippets/Features-TLS-NSS-Use.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	<xref linkend="ex-Defensive_Coding-TLS-Client-NSS-Close"/>
 	shows how to close the connection.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-NSS-Close">
 	<title>Closing NSS client connections</title>
 	<xi:include href="snippets/Features-TLS-Client-NSS-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
    </section>
    <section id="sect-Defensive_Coding-TLS-Client-Python">
      <title>Implementing TLS Clients With Python</title>
      <para>
 	The Python distribution provides a TLS implementation in the
 	<literal>ssl</literal> module (actually a wrapper around
 	OpenSSL).  The exported interface is somewhat restricted, so
 	that the client code shown below does not fully implement the
 	recommendations in <xref
 	linkend="sect-Defensive_Coding-TLS-OpenSSL"/>.
      </para>
      <important>
 	<para>
 	  Currently, most Python function which accept
 	  <literal>https://</literal> URLs or otherwise implement
 	  HTTPS support do not perform certificate validation at all.
 	  (For example, this is true for the <literal>httplib</literal>
 	  and <literal>xmlrpclib</literal> modules.)  If you use
 	  HTTPS, you should not use the built-in HTTP clients.  The
 	  <literal>Curl</literal> class in the <literal>curl</literal>
 	  module, as provided by the <literal>python-pycurl</literal>
 	  package implements proper certificate validation.
 	</para>
      </important>
      <para>
 	The <literal>ssl</literal> module currently does not perform
 	host name checking on the server certificate.  <xref
 	linkend="ex-Defensive_Coding-TLS-Client-Python-check_host_name"/>
 	shows how to implement certificate matching, using the parsed
 	certificate returned by <function>getpeercert</function>.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-Python-check_host_name">
 	<title>Implementing TLS host name checking Python (without
 	wildcard support)</title>
 	<xi:include href="snippets/Features-TLS-Client-Python-check_host_name.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	To turn a regular, connected TCP socket into a TLS-enabled
 	socket, use the <function>ssl.wrap_socket</function> function.
 	The function call in <xref
 	linkend="ex-Defensive_Coding-TLS-Client-Python-Connect"/>
 	provides additional arguments to override questionable
 	defaults in OpenSSL and in the Python module.
      </para>
      <itemizedlist>
 	<listitem>
 	  <para>
 	    <literal>ciphers="HIGH:-aNULL:-eNULL:-PSK:RC4-SHA:RC4-MD5"</literal>
 	    selects relatively strong cipher suites with
 	    certificate-based authentication.  (The call to
 	    <function>check_host_name</function> function provides 
 	    additional protection against anonymous cipher suites.)
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    <literal>ssl_version=ssl.PROTOCOL_TLSv1</literal> disables
 	    SSL 2.0 support.  By default, the <literal>ssl</literal>
 	    module sends an SSL 2.0 client hello, which is rejected by
 	    some servers.  Ideally, we would request OpenSSL to
 	    negotiated the most recent TLS version supported by the
 	    server and the client, but the Python module does not
 	    allow this.
 	  </para>
 	</listitem>
 	<listitem>
          <para>
 	    <literal>cert_reqs=ssl.CERT_REQUIRED</literal> turns on
 	    certificate validation.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    <literal>ca_certs='/etc/ssl/certs/ca-bundle.crt'</literal>
 	    initializes the certificate store with a set of trusted
 	    root CAs.  Unfortunately, it is necessary to hard-code
 	    this path into applications because the default path in
 	    OpenSSL is not available through the Python
 	    <literal>ssl</literal> module.
 	  </para>
 	</listitem>
      </itemizedlist>
      <para>
 	The <literal>ssl</literal> module (and OpenSSL) perform
 	certificate validation, but the certificate must be compared
 	manually against the host name, by calling the
 	<function>check_host_name</function> defined above.
      </para>
      <example id="ex-Defensive_Coding-TLS-Client-Python-Connect">
 	<title>Establishing a TLS client connection with Python</title>
 	<xi:include href="snippets/Features-TLS-Client-Python-Connect.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	After the connection has been established, the TLS socket can
 	be used like a regular socket:
      </para>
      <informalexample>
 	<xi:include href="snippets/Features-TLS-Python-Use.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
      <para>
 	Closing the TLS socket is straightforward as well:
      </para>
      <informalexample>
 	<xi:include href="snippets/Features-TLS-Python-Close.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </informalexample>
    </section>
  </section>
 </chapter>
--- a/en-US/Go.xml
+++ b/en-US/Go.xml
@ -1,110 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Go">
 <title>The Go Programming Language</title>
 <para>
  This chapter contains language-specific recommendations for Go.
 </para>
 <section id="chap-Defensive_Coding-Go-Memory_Safety">
  <title>Memory Safety</title>
  <para>
    Go provides memory safety, but only if the program is not executed
    in parallel (that is, <envar>GOMAXPROCS</envar> is not larger than
    <literal>1</literal>).  The reason is that interface values and
    slices consist of multiple words are not updated atomically.
    Another thread of execution can observe an inconsistent pairing
    between type information and stored value (for interfaces) or
    pointer and length (for slices), and such inconsistency can lead
    to a memory safety violation.
  </para>
  <para>
    Code which does not run in parallel and does not use the
    <literal>unsafe</literal> package (or other packages which expose
    unsafe constructs) is memory-safe.  For example, invalid casts and
    out-of-range subscripting cause panics at run time.
  </para>
  <para>
    Keep in mind that finalization can introduce parallelism because
    finalizers are executed concurrently, potentially interleaved with
    the rest of the program.
  </para>
 </section>
 <section id="chap-Defensive_Coding-Go-Error_Handling">
  <title>Error Handling</title>
  <para>
    Only a few common operations (such as pointer dereference, integer
    division, array subscripting) trigger exceptions in Go, called
    <emphasis>panics</emphasis>.  Most interfaces in the standard
    library use a separate return value of type
    <literal>error</literal> to signal error.
  </para>
  <para>
    Not checking error return values can lead to incorrect operation
    and data loss (especially in the case of writes, using interfaces
    such as <literal>io.Writer</literal>).
  </para>
  <para>
    The correct way to check error return values depends on the
    function or method being called.  In the majority of cases, the
    first step after calling a function should be an error check
    against the <literal>nil</literal> value, handling any encountered
    error.  See <xref
    linkend="ex-Defensive_Coding-Go-Error_Handling-Regular"/> for
    details.
  </para>
  <example id="ex-Defensive_Coding-Go-Error_Handling-Regular">
    <title>Regular error handling in Go</title>
    <xi:include href="snippets/Go-Error_Handling-Regular.xml"
 		xmlns:xi="http://www.w3.org/2001/XInclude" />
  </example>
  <para>
    However, with <literal>io.Reader</literal>,
    <literal>io.ReaderAt</literal> and related interfaces, it is
    necessary to check for a non-zero number of read bytes first, as
    shown in <xref
    linkend="ex-Defensive_Coding-Go-Error_Handling-IO"/>.  If this
    pattern is not followed, data loss may occur.  This is due to the
    fact that the <literal>io.Reader</literal> interface permits
    returning both data and an error at the same time.
  </para>
  <example id="ex-Defensive_Coding-Go-Error_Handling-IO">
    <title>Read error handling in Go</title>
    <xi:include href="snippets/Go-Error_Handling-IO.xml"
 		xmlns:xi="http://www.w3.org/2001/XInclude" />
  </example>
 </section>
 <section id="chap-Defensive_Coding-Go-Garbage_Collector">
  <title>Garbage Collector</title>
  <para>
    Older Go releases (before Go 1.3) use a conservative garbage
    collector without blacklisting.  This means that data blobs can
    cause retention of unrelated data structures because the data is
    conservatively interpreted as pointers.  This phenomenon can be
    triggered accidentally on 32-bit architectures and is more likely
    to occur if the heap grows larger.  On 64-bit architectures, it
    may be possible to trigger it deliberately—it is unlikely to occur
    spontaneously.
  </para>
 </section>
 <section id="chap-Defensive_Coding-Go-Marshaling">
  <title>Marshaling and Unmarshaling</title>
  <para>
    Several packages in the <literal>encoding</literal> hierarchy
    provide support for serialization and deserialization.  The usual
    caveats apply (see
    <xref linkend="chap-Defensive_Coding-Tasks-Serialization"/>).
  </para>
  <para>
    As an additional precaution, the <function>Unmarshal</function>
    and <function>Decode</function> functions should only be used with
    fresh values in the <literal>interface{}</literal> argument.  This
    is due to the way defaults for missing values are implemented:
    During deserialization, missing value do not result in an error,
    but the original value is preserved.  Using a fresh value (with
    suitable default values if necessary) ensures that data from a
    previous deserialization operation does not leak into the current
    one.  This is especially relevant when structs are deserialized.
  </para>
 </section>
 </chapter>
--- a/en-US/Java-Language.xml
+++ b/en-US/Java-Language.xml
@ -1,291 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-Java-Language">
  <title>The Core Language</title>
  <para>
    Implementations of the Java programming language provide strong
    memory safety, even in the presence of data races in concurrent
    code.  This prevents a large range of security vulnerabilities
    from occurring, unless certain low-level features are used; see
    <xref linkend="sect-Defensive_Coding-Java-LowLevel"/>.
  </para>
  <section id="sect-Defensive_Coding-Java-Language-ReadArray">
    <title>Increasing Robustness when Reading Arrays</title>
    <para>
      External data formats often include arrays, and the data is
      stored as an integer indicating the number of array elements,
      followed by this number of elements in the file or protocol data
      unit.  This length specified can be much larger than what is
      actually available in the data source.
    </para>
    <para>
      To avoid allocating extremely large amounts of data, you can
      allocate a small array initially and grow it as you read more
      data, implementing an exponential growth policy.  See the
      <function>readBytes(InputStream, int)</function> function in
      <xref linkend="ex-Defensive_Coding-Java-Language-ReadArray"/>.
    </para>
    <example id="ex-Defensive_Coding-Java-Language-ReadArray">
      <title>Incrementally reading a byte array</title>
      <xi:include href="snippets/Java-Language-ReadArray.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      When reading data into arrays, hash maps or hash sets, use the
      default constructor and do not specify a size hint.  You can
      simply add the elements to the collection as you read them.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-Language-Resources">
    <title>Resource Management</title>
    <para>
      Unlike C++, Java does not offer destructors which can deallocate
      resources in a predictable fashion.  All resource management has
      to be manual, at the usage site.  (Finalizers are generally not
      usable for resource management, especially in high-performance
      code; see <xref
      linkend="sect-Defensive_Coding-Java-Language-Finalizers"/>.)
    </para>
    <para>
      The first option is the
      <literal>try</literal>-<literal>finally</literal> construct, as
      shown in <xref linkend="ex-Defensive_Coding-Java-Language-Finally"/>.
      The code in the <literal>finally</literal> block should be as short as
      possible and should not throw any exceptions.
    </para>
    <example id="ex-Defensive_Coding-Java-Language-Finally">
      <title>Resource management with a
      <literal>try</literal>-<literal>finally</literal> block</title>
      <xi:include href="snippets/Java-Finally.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      Note that the resource allocation happens
      <emphasis>outside</emphasis> the <literal>try</literal> block,
      and that there is no <literal>null</literal> check in the
      <literal>finally</literal> block.  (Both are common artifacts
      stemming from IDE code templates.)
    </para>
    <para>
      If the resource object is created freshly and implements the
      <literal>java.lang.AutoCloseable</literal> interface, the code
      in <xref
      linkend="ex-Defensive_Coding-Java-Language-TryWithResource"/> can be
      used instead.  The Java compiler will automatically insert the
      <function>close()</function> method call in a synthetic
      <literal>finally</literal> block.
    </para>
    <example id="ex-Defensive_Coding-Java-Language-TryWithResource">
      <title>Resource management using the
      <literal>try</literal>-with-resource construct</title>
      <xi:include href="snippets/Java-TryWithResource.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      To be compatible with the <literal>try</literal>-with-resource
      construct, new classes should name the resource deallocation
      method <function>close()</function>, and implement the
      <literal>AutoCloseable</literal> interface (the latter breaking
      backwards compatibility with Java 6).  However, using the
      <literal>try</literal>-with-resource construct with objects that
      are not freshly allocated is at best awkward, and an explicit
      <literal>finally</literal> block is usually the better approach.
    </para>
    <para>
      In general, it is best to design the programming interface in
      such a way that resource deallocation methods like
      <function>close()</function> cannot throw any (checked or
      unchecked) exceptions, but this should not be a reason to ignore
      any actual error conditions.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-Language-Finalizers">
    <title>Finalizers</title>
    <para>
      Finalizers can be used a last-resort approach to free resources
      which would otherwise leak.  Finalization is unpredictable,
      costly, and there can be a considerable delay between the last
      reference to an object going away and the execution of the
      finalizer.  Generally, manual resource management is required;
      see <xref linkend="sect-Defensive_Coding-Java-Language-Resources"/>.
    </para>
    <para>
      Finalizers should be very short and should only deallocate
      native or other external resources held directly by the object
      being finalized.  In general, they must use synchronization:
      Finalization necessarily happens on a separate thread because it is
      inherently concurrent.  There can be multiple finalization
      threads, and despite each object being finalized at most once,
      the finalizer must not assume that it has exclusive access to
      the object being finalized (in the <literal>this</literal>
      pointer).
    </para>
    <para>
      Finalizers should not deallocate resources held by other
      objects, especially if those objects have finalizers on their
      own.  In particular, it is a very bad idea to define a finalizer
      just to invoke the resource deallocation method of another object,
      or overwrite some pointer fields.
    </para>
    <para>
      Finalizers are not guaranteed to run at all.  For instance, the
      virtual machine (or the machine underneath) might crash,
      preventing their execution.
    </para>
    <para>
      Objects with finalizers are garbage-collected much later than
      objects without them, so using finalizers to zero out key
      material (to reduce its undecrypted lifetime in memory) may have
      the opposite effect, keeping objects around for much longer and
      prevent them from being overwritten in the normal course of
      program execution.
    </para>
    <para>
      For the same reason, code which allocates objects with
      finalizers at a high rate will eventually fail (likely with a
      <literal>java.lang.OutOfMemoryError</literal> exception) because
      the virtual machine has finite resources for keeping track of
      objects pending finalization.  To deal with that, it may be
      necessary to recycle objects with finalizers.
    </para>
    <para>
      The remarks in this section apply to finalizers which are
      implemented by overriding the <function>finalize()</function>
      method, and to custom finalization using reference queues.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-Language-Exceptions">
    <title>Recovering from Exceptions and Errors</title>
    <para>
      Java exceptions come in three kinds, all ultimately deriving
      from <literal>java.lang.Throwable</literal>:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  <emphasis>Run-time exceptions</emphasis> do not have to be
 	  declared explicitly and can be explicitly thrown from any
 	  code, by calling code which throws them, or by triggering an
 	  error condition at run time, like division by zero, or an
 	  attempt at an out-of-bounds array access.  These exceptions
 	  derive from from the
 	  <literal>java.lang.RuntimeException</literal> class (perhaps
 	  indirectly).
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <emphasis>Checked exceptions</emphasis> have to be declared
 	  explicitly by functions that throw or propagate them.  They
 	  are similar to run-time exceptions in other regards, except
 	  that there is no language construct to throw them (except
 	  the <literal>throw</literal> statement itself).  Checked
 	  exceptions are only present at the Java language level and
 	  are only enforced at compile time.  At run time, the virtual
 	  machine does not know about them and permits throwing
 	  exceptions from any code.  Checked exceptions must derive
 	  (perhaps indirectly) from the
 	  <literal>java.lang.Exception</literal> class, but not from 
 	  <literal>java.lang.RuntimeException</literal>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <emphasis>Errors</emphasis> are exceptions which typically
 	  reflect serious error conditions.  They can be thrown at any
 	  point in the program, and do not have to be declared (unlike
 	  checked exceptions).  In general, it is not possible to
 	  recover from such errors; more on that below, in <xref
 	  linkend="sect-Defensive_Coding-Java-Language-Exceptions-Errors"/>.
 	  Error classes derive (perhaps indirectly) from
 	  <literal>java.lang.Error</literal>, or from
 	  <literal>java.lang.Throwable</literal>, but not from
 	  <literal>java.lang.Exception</literal>.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      The general expection is that run-time errors are avoided by
      careful programming (e.g., not dividing by zero).  Checked
      exception are expected to be caught as they happen (e.g., when
      an input file is unexpectedly missing).  Errors are impossible
      to predict and can happen at any point and reflect that
      something went wrong beyond all expectations.
    </para>
    <section id="sect-Defensive_Coding-Java-Language-Exceptions-Errors">
      <title>The Difficulty of Catching Errors</title>
      <para>
 	Errors (that is, exceptions which do not (indirectly) derive
 	from <literal>java.lang.Exception</literal>), have the
 	peculiar property that catching them is problematic.  There
 	are several reasons for this:
      </para>
      <itemizedlist>
 	<listitem>
 	  <para>
 	    The error reflects a failed consistenty check, for example,
 	    <literal>java.lang.AssertionError</literal>.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The error can happen at any point, resulting in
 	    inconsistencies due to half-updated objects.  Examples are
 	    <literal>java.lang.ThreadDeath</literal>,
 	    <literal>java.lang.OutOfMemoryError</literal> and
 	    <literal>java.lang.StackOverflowError</literal>.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The error indicates that virtual machine failed to provide
 	    some semantic guarantees by the Java programming language.
 	    <literal>java.lang.ExceptionInInitializerError</literal>
 	    is an example—it can leave behind a half-initialized
 	    class.
 	  </para>
 	</listitem>
      </itemizedlist>
      <para>
 	In general, if an error is thrown, the virtual machine should
 	be restarted as soon as possible because it is in an
 	inconsistent state.  Continuing running as before can have
 	unexpected consequences.  However, there are legitimate
 	reasons for catching errors because not doing so leads to even
 	greater problems.
      </para>
      <para>
 	Code should be written in a way that avoids triggering errors.
 	See <xref linkend="sect-Defensive_Coding-Java-Language-ReadArray"/>
 	for an example.
      </para>
      <para>
 	It is usually necessary to log errors.  Otherwise, no trace of
 	the problem might be left anywhere, making it very difficult
 	to diagnose realted failures.  Consequently, if you catch
 	<literal>java.lang.Exception</literal> to log and suppress all
 	unexpected exceptions (for example, in a request dispatching
 	loop), you should consider switching to
 	<literal>java.lang.Throwable</literal> instead, to also cover
 	errors.
      </para>
      <para>
 	The other reason mainly applies to such request dispatching
 	loops: If you do not catch errors, the loop stops looping,
 	resulting in a denial of service.
      </para>
      <para>
 	However, if possible, catching errors should be coupled with a
 	way to signal the requirement of a virtual machine restart.
      </para>
    </section>
  </section>
 </section>
--- a/en-US/Java-LowLevel.xml
+++ b/en-US/Java-LowLevel.xml
@ -1,157 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-Java-LowLevel">
  <title>Low-level Features of the Virtual Machine</title>
  <section id="sect-Defensive_Coding-Java-Reflection">
    <title><literal>Reflection and Private Parts</literal></title>
    <para>
      The <function>setAccessible(boolean)</function> method of the
      <literal>java.lang.reflect.AccessibleObject</literal> class
      allows a program to disable language-defined access rules for
      specific constructors, methods, or fields.  Once the access
      checks are disabled, any code can use the
      <literal>java.lang.reflect.Constructor</literal>,
      <literal>java.lang.reflect.Method</literal>, or
      <literal>java.lang.reflect.Field</literal> object to access the
      underlying Java entity, without further permission checks.  This
      breaks encapsulation and can undermine the stability of the
      virtual machine.  (In contrast, without using the
      <function>setAccessible(boolean)</function> method, this should
      not happen because all the language-defined checks still apply.)
    </para>
    <para>
      This feature should be avoided if possible.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-JNI">
    <title>Java Native Interface (JNI)</title>
    <para>
      The Java Native Interface allows calling from Java code
      functions specifically written for this purpose, usually in C or
      C++.
    </para>
    <para>
      The transition between the Java world and the C world is not
      fully type-checked, and the C code can easily break the Java
      virtual machine semantics.  Therefore, extra care is needed when
      using this functionality.
    </para>
    <para>
      To provide a moderate amount of type safety, it is recommended
      to recreate the class-specific header file using
      <application>javah</application> during the build process,
      include it in the implementation, and use the
      <option>-Wmissing-declarations</option> option.
    </para>
    <para>
      Ideally, the required data is directly passed to static JNI
      methods and returned from them, and the code and the C side does
      not have to deal with accessing Java fields (or even methods).
    </para>
    <para>
      When using <function>GetPrimitiveArrayCritical</function> or
      <function>GetStringCritical</function>, make sure that you only
      perform very little processing between the get and release
      operations.  Do not access the file system or the network, and
      not perform locking, because that might introduce blocking.
      When processing large strings or arrays, consider splitting the
      computation into multiple sub-chunks, so that you do not prevent
      the JVM from reaching a safepoint for extended periods of time.
    </para>
    <para>
      If necessary, you can use the Java <literal>long</literal> type
      to store a C pointer in a field of a Java class.  On the C side,
      when casting between the <literal>jlong</literal> value and the
      pointer on the C side, 
    </para>
    <para>
      You should not try to perform pointer arithmetic on the Java
      side (that is, you should treat pointer-carrying
      <literal>long</literal> values as opaque).  When passing a slice
      of an array to the native code, follow the Java convention and
      pass it as the base array, the integer offset of the start of
      the slice, and the integer length of the slice.  On the native
      side, check the offset/length combination against the actual
      array length, and use the offset to compute the pointer to the
      beginning of the array.
    </para>
    <example id="ex-Defensive_Coding-Java-JNI-Pointers">
      <title>Array length checking in JNI code</title>
      <xi:include href="snippets/Java-JNI-Pointers.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      In any case, classes referring to native resources must be
      declared <literal>final</literal>, and must not be serializeable
      or cloneable.  Initialization and mutation of the state used by
      the native side must be controlled carefully.  Otherwise, it
      might be possible to create an object with inconsistent native
      state which results in a crash (or worse) when used (or perhaps
      only finalized) later.  If you need both Java inheritance and
      native resources, you should consider moving the native state to
      a separate class, and only keep a reference to objects of that
      class.  This way, cloning and serialization issues can be
      avoided in most cases.
    </para>
    <para>
      If there are native resources associated with an object, the
      class should have an explicit resource deallocation method
      (<xref
      linkend="sect-Defensive_Coding-Java-Language-Resources"/>) and a
      finalizer (<xref
      linkend="sect-Defensive_Coding-Java-Language-Finalizers"/>) as a
      last resort.  The need for finalization means that a minimum
      amount of synchronization is needed.  Code on the native side
      should check that the object is not in a closed/freed state.
    </para>
    <para>
      Many JNI functions create local references.  By default, these
      persist until the JNI-implemented method returns.  If you create
      many such references (e.g., in a loop), you may have to free
      them using <function>DeleteLocalRef</function>, or start using
      <function>PushLocalFrame</function> and
      <function>PopLocalFrame</function>.  Global references must be
      deallocated with <function>DeleteGlobalRef</function>, otherwise
      there will be a memory leak, just as with
      <function>malloc</function> and <function>free</function>.
    </para>
    <para>
      When throwing exceptions using <function>Throw</function> or
      <function>ThrowNew</function>, be aware that these functions
      return regularly.  You have to return control manually to the
      JVM.
    </para>
    <para>
      Technically, the <literal>JNIEnv</literal> pointer is not
      necessarily constant during the lifetime of your JNI module.
      Storing it in a global variable is therefore incorrect.
      Particularly if you are dealing with callbacks, you may have to
      store the pointer in a thread-local variable (defined with
      <literal>__thread</literal>).  It is, however, best to avoid the
      complexity of calling back into Java code.
    </para>
    <para>
      Keep in mind that C/C++ and Java are different languages,
      despite very similar syntax for expressions.  The Java memory
      model is much more strict than the C or C++ memory models, and
      native code needs more synchronization, usually using JVM
      facilities or POSIX threads mutexes.  Integer overflow in Java
      is defined, but in C/C++ it is not (for the
      <literal>jint</literal> and <literal>jlong</literal> types).
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-MiscUnsafe">
    <title><literal>sun.misc.Unsafe</literal></title>
    <para>
      The <literal>sun.misc.Unsafe</literal> class is unportable and
      contains many functions explicitly designed to break Java memory
      safety (for performance and debugging).  If possible, avoid
      using this class.
    </para>
  </section>
 </section>
--- a/en-US/Java-SecurityManager.xml
+++ b/en-US/Java-SecurityManager.xml
@ -1,292 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <section id="sect-Defensive_Coding-Java-SecurityManager">
  <title>Interacting with the Security Manager</title>
  <para>
    The Java platform is largely implemented in the Java language
    itself.  Therefore, within the same JVM, code runs which is part
    of the Java installation and which is trusted, but there might
    also be code which comes from untrusted sources and is restricted
    by the Java sandbox (to varying degrees).  The <emphasis>security
    manager</emphasis> draws a line between fully trusted, partially
    trusted and untrusted code.
  </para>
  <para>
    The type safety and accessibility checks provided by the Java
    language and JVM would be sufficient to implement a sandbox.
    However, only some Java APIs employ such a capabilities-based
    approach.  (The Java SE library contains many public classes with
    public constructors which can break any security policy, such as
    <literal>java.io.FileOutputStream</literal>.)  Instead, critical
    functionality is protected by <emphasis>stack
    inspection</emphasis>: At a security check, the stack is walked
    from top (most-nested) to bottom.  The security check fails if a
    stack frame for a method is encountered whose class lacks the
    permission which the security check requires.
  </para>
  <para>
    This simple approach would not allow untrusted code (which lacks
    certain permissions) to call into trusted code while the latter
    retains trust.  Such trust transitions are desirable because they
    enable Java as an implementation language for most parts of the
    Java platform, including security-relevant code.  Therefore, there
    is a mechanism to mark certain stack frames as trusted (<xref
    linkend="sect-Defensive_Coding-Java-SecurityManager-Privileged"/>).
  </para>
  <para>
    In theory, it is possible to run a Java virtual machine with a
    security manager that acts very differently from this approach,
    but a lot of code expects behavior very close to the platform
    default (including many classes which are part of the OpenJDK
    implementation).
  </para>
  <section id="sect-Defensive_Coding-Java-SecurityManager-Compatible">
    <title>Security Manager Compatibility</title>
    <para>
      A lot of code can run without any additional permissions at all,
      with little changes.  The following guidelines should help to
      increase compatibility with a restrictive security manager.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  When retrieving system properties using
 	  <function>System.getProperty(String)</function> or similar
 	  methods, catch <literal>SecurityException</literal>
 	  exceptions and treat the property as unset.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Avoid unnecessary file system or network access.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Avoid explicit class loading.  Access to a suitable class
 	  loader might not be available when executing as untrusted
 	  code.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      If the functionality you are implementing absolutely requires
      privileged access and this functionality has to be used from
      untrusted code (hopefully in a restricted and secure manner),
      see <xref
      linkend="sect-Defensive_Coding-Java-SecurityManager-Privileged"/>.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-SecurityManager-Activate">
    <title>Activating the Security Manager</title>
    <para>
      The usual command to launch a Java application,
      <command>java</command>, does not activate the security manager.
      Therefore, the virtual machine does not enforce any sandboxing
      restrictions, even if explicitly requested by the code (for
      example, as described in <xref
      linkend="sect-Defensive_Coding-Java-SecurityManager-Unprivileged"/>).
    </para>
    <para>
      The <option>-Djava.security.manager</option> option activates
      the security manager, with the fairly restrictive default
      policy.  With a very permissive policy, most Java code will run
      unchanged.  Assuming the policy in <xref
      linkend="ex-Defensive_Coding-Java-SecurityManager-GrantAll"/>
      has been saved in a file <filename>grant-all.policy</filename>,
      this policy can be activated using the option
      <option>-Djava.security.policy=grant-all.policy</option> (in
      addition to the <option>-Djava.security.manager</option>
      option).
    </para>
    <example id="ex-Defensive_Coding-Java-SecurityManager-GrantAll">
      <title>Most permissve OpenJDK policy file</title>
 <programlisting>
 grant {
      permission java.security.AllPermission;
 };
 </programlisting>
    </example>
    <para>
      With this most permissive policy, the security manager is still
      active, and explicit requests to drop privileges will be
      honored.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-SecurityManager-Unprivileged">
    <title>Reducing Trust in Code</title>
    <para>
      <xref linkend="ex-Defensive_Coding-Java-SecurityManager-Unprivileged"/>
      shows how to run a piece code of with reduced privileges.
    </para>
    <example id="ex-Defensive_Coding-Java-SecurityManager-Unprivileged">
      <title>Using the security manager to run code with reduced
      privileges</title>
      <xi:include href="snippets/Java-SecurityManager-Unprivileged.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      The example above does not add any additional permissions to the
      <literal>permissions</literal> object.  If such permissions are
      necessary, code like the following (which grants read permission
      on all files in the current directory) can be used:
    </para>
    <informalexample>
      <xi:include href="snippets/Java-SecurityManager-CurrentDirectory.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </informalexample>
    <important>
      <para>
 	Calls to the
 	<function>java.security.AccessController.doPrivileged()</function>
 	methods do not enforce any additional restriction if no
 	security manager has been set.  Except for a few special
 	exceptions, the restrictions no longer apply if the
 	<function>doPrivileged()</function> has returned, even to
 	objects created by the code which ran with reduced privileges.
 	(This applies to object finalization in particular.)
      </para>
      <para>
 	The example code above does not prevent the called code from
 	calling the
 	<function>java.security.AccessController.doPrivileged()</function>
 	methods.  This mechanism should be considered an additional
 	safety net, but it still can be used to prevent unexpected
 	behavior of trusted code.  As long as the executed code is not
 	dynamic and came with the original application or library, the
 	sandbox is fairly effective.
      </para>
      <para>
 	The <literal>context</literal> argument in <xref
 	linkend="ex-Defensive_Coding-Java-SecurityManager-Unprivileged"/>
 	is extremely important—otherwise, this code would increase
 	privileges instead of reducing them.
      </para>
    </important>
    <para>
      For activating the security manager, see <xref
      linkend="sect-Defensive_Coding-Java-SecurityManager-Activate"/>.
      Unfortunately, this affects the virtual machine as a whole, so
      it is not possible to do this from a library.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Java-SecurityManager-Privileged">
    <title>Re-gaining Privileges</title>
    <para>
      Ordinarily, when trusted code is called from untrusted code, it
      loses its privileges (because of the untrusted stack frames
      visible to stack inspection).  The
      <function>java.security.AccessController.doPrivileged()</function>
      family of methods provides a controlled backdoor from untrusted
      to trusted code.
    </para>
    <important>
      <para>
 	By design, this feature can undermine the Java security model
 	and the sandbox.  It has to be used very carefully.  Most
 	sandbox vulnerabilities can be traced back to its misuse.
       </para>
    </important>
    <para>
      In essence, the <function>doPrivileged()</function> methods
      cause the stack inspection to end at their call site.  Untrusted
      code further down the call stack becomes invisible to security
      checks.
    </para>
    <para>
      The following operations are common and safe to perform with
      elevated privileges.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Reading custom system properties with fixed names,
 	  especially if the value is not propagated to untrusted code.
 	  (File system paths including installation paths, host names
 	  and user names are sometimes considered private information
 	  and need to be protected.)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Reading from the file system at fixed paths, either
 	  determined at compile time or by a system property.  Again,
 	  leaking the file contents to the caller can be problematic.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Accessing network resources under a fixed address, name or
 	  URL, derived from a system property or configuration file,
 	  information leaks not withstanding.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      <xref linkend="ex-Defensive_Coding-Java-SecurityManager-Privileged"/>
      shows how to request additional privileges.
    </para>
    <example id="ex-Defensive_Coding-Java-SecurityManager-Privileged">
      <title>Using the security manager to run code with increased
      privileges</title>
      <xi:include href="snippets/Java-SecurityManager-Privileged.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
    <para>
      Obviously, this only works if the class containing the call to
      <function>doPrivileged()</function> is marked trusted (usually
      because it is loaded from a trusted class loader).
    </para>
    <para>
      When writing code that runs with elevated privileges, make sure
      that you follow the rules below.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Make the privileged code as small as possible.  Perform as
 	  many computations as possible before and after the
 	  privileged code section, even if it means that you have to
 	  define a new class to pass the data around.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Make sure that you either control the inputs to the
 	  privileged code, or that the inputs are harmless and cannot
 	  affect security properties of the privileged code.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Data that is returned from or written by the privileged code
 	  must either be restricted (that is, it cannot be accessed by
 	  untrusted code), or must be harmless.  Otherwise, privacy
 	  leaks or information disclosures which affect security
 	  properties can be the result.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      If the code calls back into untrusted code at a later stage (or
      performs other actions under control from the untrusted caller),
      you must obtain the original security context and restore it
      before performing the callback, as in <xref
      linkend="ex-Defensive_Coding-Java-SecurityManager-Callback"/>.
      (In this example, it would be much better to move the callback
      invocation out of the privileged code section, of course.)
    </para>
    <example id="ex-Defensive_Coding-Java-SecurityManager-Callback">
      <title>Restoring privileges when invoking callbacks</title>
      <xi:include href="snippets/Java-SecurityManager-Callback.xml"
 		  xmlns:xi="http://www.w3.org/2001/XInclude" />
    </example>
  </section>
 </section>
--- a/en-US/Java.xml
+++ b/en-US/Java.xml
@ -1,11 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Java">
  <title>The Java Programming Language</title>
  <xi:include href="Java-Language.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  <xi:include href="Java-LowLevel.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
  <xi:include href="Java-SecurityManager.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
 </chapter>
--- a/en-US/Python.xml
+++ b/en-US/Python.xml
@ -1,74 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Python">
  <title>The Python Programming Language</title>
  <para>
    Python provides memory safety by default, so low-level security
    vulnerabilities are rare and typically needs fixing the Python
    interpreter or standard library itself.
  </para>
  <para>
    Other sections with Python-specific advice include:
  </para>
  <itemizedlist>
    <listitem>
      <para>
 	<xref linkend="chap-Defensive_Coding-Tasks-Temporary_Files"/>
      </para>
    </listitem>
    <listitem>
      <para>
 	<xref linkend="sect-Defensive_Coding-Tasks-Processes-Creation"/>
      </para>
    </listitem>
    <listitem>
      <para>
 	<xref linkend="chap-Defensive_Coding-Tasks-Serialization"/>, in
 	particular <xref linkend="sect-Defensive_Coding-Tasks-Serialization-Library"/>
      </para>
    </listitem>
    <listitem>
      <para>
 	<xref linkend="sect-Defensive_Coding-Tasks-Cryptography-Randomness"/>
      </para>
    </listitem>
  </itemizedlist>
  <section>
    <title>Dangerous Standard Library Features</title>
    <para>
      Some areas of the standard library, notably the
      <literal>ctypes</literal> module, do not provide memory safety
      guarantees comparable to the rest of Python.  If such
      functionality is used, the advice in <xref
      linkend="sect-Defensive_Coding-C-Language"/> should be followed.
    </para>
  </section>
  <section>
    <title>Run-time Compilation and Code Generation</title>
    <para>
      The following Python functions and statements related to code
      execution should be avoided:
    </para>
    <itemizedlist>
      <listitem><para><function>compile</function></para></listitem>
      <listitem><para><function>eval</function></para></listitem>
      <listitem><para><literal>exec</literal></para></listitem>
      <listitem><para><function>execfile</function></para></listitem>
    </itemizedlist>
    <para>
      If you need to parse integers or floating point values, use the
      <function>int</function> and <function>float</function>
      functions instead of <function>eval</function>.  Sandboxing
      untrusted Python code does not work reliably.
    </para>
  </section>
  <section>
    <title>Sandboxing</title>
    <para>
      The <literal>rexec</literal> Python module cannot safely sandbox
      untrusted code and should not be used.  The standard CPython
      implementation is not suitable for sandboxing.
    </para>
  </section>
 </chapter>
--- a/en-US/Revision_History.xml
+++ b/en-US/Revision_History.xml
@ -1,96 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 <!ENTITY % BOOK_ENTITIES SYSTEM "Defensive_Coding.ent">
 %BOOK_ENTITIES;
 ]>
 <appendix id="appe-Defensive_Coding-Revision_History">
  <title>Revision History</title>
  <simpara>
    <revhistory>
      <revision>
        <revnumber>1.3-1</revnumber>
 	<date>Mon Oct 13 2014</date>
        <author>
          <firstname>Florian</firstname>
          <surname>Weimer</surname>
          <email>fweimer@redhat.com</email>
        </author>
        <revdescription>
          <simplelist>
 	    <member>Go: Mention default value handling in deserialization</member>
 	    <member>Shell: New chapter</member>
 	  </simplelist>
 	</revdescription>
      </revision>
      <revision>
        <revnumber>1.2-1</revnumber>
        <date>Wed Jul 16 2014</date>
        <author>
          <firstname>Florian</firstname>
          <surname>Weimer</surname>
          <email>fweimer@redhat.com</email>
        </author>
        <revdescription>
          <simplelist>
            <member>C: Corrected the <function>strncat</function> example</member>
 	    <member>C: Mention mixed signed/unsigned comparisons</member>
 	    <member>C: Unsigned overflow checking example</member>
 	    <member>C++: <literal>operator new[]</literal> has been fixed in GCC</member>
 	    <member>C++: Additional material on <literal>std::string</literal>, iterators</member>
 	    <member>OpenSSL: Mention <command>openssl genrsa</command> entropy issue</member>
 	    <member>Packaging: X.509 key generation</member>
 	    <member>Go, Vala: Add short chapters</member>
 	    <member>Serialization: Notes on fragmentation and reassembly</member>
          </simplelist>
        </revdescription>
      </revision>
      <revision>
        <revnumber>1.1-1</revnumber>
        <date>Tue Aug 27 2013</date>
        <author>
          <firstname>Eric</firstname>
          <surname>Christensen</surname>
          <email>sparks@redhat.com</email>
        </author>
        <revdescription>
          <simplelist>
            <member>Add a chapter which covers some Java topics.</member>
 	    <member>Deserialization: Warn about Java's java.beans.XMLDecoder.</member>
 	    <member>C: Correct the advice on array allocation
 	    (<ulink url="https://bugzilla.redhat.com/show_bug.cgi?id=995595">bug 995595</ulink>).</member>
 	    <member>C: Add material on global variables.</member>
          </simplelist>
        </revdescription>
      </revision>
      <revision>
        <revnumber>1.0-1</revnumber>
        <date>Thu May 09 2013</date>
        <author>
          <firstname>Eric</firstname>
          <surname>Christensen</surname>
          <email>sparks@redhat.com</email>
        </author>
        <revdescription>
          <simplelist>
            <member>Added more C and C++ examples.</member>
 	    <member>TLS Client NSS: Rely on NSS 3.14 cipher suite defaults.</member>
          </simplelist>
        </revdescription>
      </revision>
      <revision>
 	<revnumber>0-1</revnumber>
 	<date>Thu Mar 7 2013</date>
 	<author>
 	  <firstname>Eric</firstname>
 	  <surname>Christensen</surname>
 	  <email>sparks@redhat.com</email>
 	</author>
 	<revdescription>
 	  <simplelist>
 	    <member>Initial publication.</member>
 	  </simplelist>
 	</revdescription>
      </revision>
    </revhistory>
  </simpara>
 </appendix>
--- a/en-US/Shell.xml
+++ b/en-US/Shell.xml
@ -1,454 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Shell">
 <title>Shell Programming and <application>bash</application></title>
 <para>
  This chapter contains advice about shell programming, specifically
  in <application>bash</application>.  Most of the advice will apply
  to scripts written for other shells because extensions such as
  integer or array variables have been implemented there as well, with
  comparable syntax.
 </para>
 <section id="sect-Defensive_Coding-Shell-Alternatives">
  <title>Consider Alternatives</title>
  <para>
    Once a shell script is so complex that advice in this chapter
    applies, it is time to step back and consider the question: Is
    there a more suitable implementation language available?
  </para>
  <para>
    For example, Python with its <literal>subprocess</literal> module
    can be used to write scripts which are almost as concise as shell
    scripts when it comes to invoking external programs, and Python
    offers richer data structures, with less arcane syntax and more
    consistent behavior.
  </para>
 </section>
 <section id="sect-Defensive_Coding-Shell-Language">
 <title>Shell Language Features</title>
 <para>
  The following sections cover subtleties concerning the shell
  programming languages.  They have been written with the
  <application>bash</application> shell in mind, but some of these
  features apply to other shells as well.
 </para>
 <para>
  Some of the features described may seem like implementation defects,
  but these features have been replicated across multiple independent
  implementations, so they now have to be considered part of the shell
  programming language.
 </para>
 <section id="sect-Defensive_Coding-Shell-Parameter_Expansion">
  <title>Parameter Expansion</title>
  <para>
    The mechanism by which named shell variables and parameters are
    expanded is called <emphasis>parameter expansion</emphasis>.  The
    most basic syntax is
    “<literal>$</literal><emphasis>variable</emphasis>” or
    “<literal>${</literal><emphasis>variable</emphasis><literal>}</literal>”.
  </para>
  <para>
    In almost all cases, a parameter expansion should be enclosed in
    double quotation marks <literal>"</literal>…<literal>"</literal>.
  </para>
  <informalexample>
    <programlisting language="Bash">
 external-program "$arg1" "$arg2"
    </programlisting>
  </informalexample>
  <para>
    If the double quotation marks are omitted, the value of the
    variable will be split according to the current value of the
    <envar>IFS</envar> variable.  This may allow the injection of
    additional options which are then processed by
    <literal>external-program</literal>.
  </para>
  <para>
    Parameter expansion can use special syntax for specific features,
    such as substituting defaults or performing string or array
    operations.  These constructs should not be used because they can
    trigger arithmetic evaluation, which can result in code execution.
    See <xref linkend="sect-Defensive_Coding-Shell-Arithmetic"/>.
  </para>
 </section>
 <section id="sect-Defensive_Coding-Shell-Double_Expansion">
  <title>Double Expansion</title>
  <para>
    <emphasis>Double expansion</emphasis> occurs when, during the
    expansion of a shell variable, not just the variable is expanded,
    replacing it by its value, but the <emphasis>value</emphasis> of
    the variable is itself is expanded as well.  This can trigger
    arbitrary code execution, unless the value of the variable is
    verified against a restrictive pattern.
  </para>
  <para>
    The evaluation process is in fact recursive, so a self-referential
    expression can cause an out-of-memory condition and a shell crash.
  </para>
  <para>
    Double expansion may seem like as a defect, but it is implemented
    by many shells, and has to be considered an integral part of the
    shell programming language.  However, it does make writing robust
    shell scripts difficult.
  </para>
  <para>
    Double expansion can be requested explicitly with the
    <literal>eval</literal> built-in command, or by invoking a
    subshell with “<literal>bash -c</literal>”.  These constructs
    should not be used.
  </para>
  <para>
    The following sections give examples of places where implicit
    double expansion occurs.
  </para>
  <section id="sect-Defensive_Coding-Shell-Arithmetic">
    <title>Arithmetic Evaluation</title>
    <para>
      <emphasis>Arithmetic evaluation</emphasis> is a process by which
      the shell computes the integer value of an expression specified
      as a string.  It is highly problematic for two reasons: It
      triggers double expansion (see <xref
      linkend="sect-Defensive_Coding-Shell-Double_Expansion"/>), and the
      language of arithmetic expressions is not self-contained.  Some
      constructs in arithmetic expressions (notably array subscripts)
      provide a trapdoor from the restricted language of arithmetic
      expressions to the full shell language, thus paving the way
      towards arbitrary code execution.  Due to double expansion,
      input which is (indirectly) referenced from an arithmetic
      expression can trigger execution of arbitrary code, which is
      potentially harmful.
    </para>
    <para>
      Arithmetic evaluation is triggered by the follow constructs:
    </para>
    <!-- The list was constructed by looking at the bash sources and
         search for the string "expand_". -->
    <itemizedlist>
      <listitem>
 	<para>
 	  The <emphasis>expression</emphasis> in
 	  “<literal>$((</literal><emphasis>expression</emphasis><literal>))</literal>”
 	  is evaluated.  This construct is called <emphasis>arithmetic
 	  expansion</emphasis>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  “<literal>$[</literal><emphasis>expression</emphasis><literal>]</literal>”
 	  is a deprecated syntax with the same effect.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The arguments to the <literal>let</literal> shell built-in
 	  are evaluated.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  “<literal>((</literal><emphasis>expression</emphasis><literal>))</literal>”
 	  is an alternative syntax for “<literal>let
 	  </literal><emphasis>expression</emphasis>”.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Conditional expressions surrounded by
 	  “<literal>[[</literal>…<literal>]]</literal>” can trigger
 	  arithmetic evaluation if certain operators such as
 	  <literal>-eq</literal> are used.  (The
 	  <literal>test</literal> built-in does not perform arithmetic
 	  evaluation, even with integer operators such as
 	  <literal>-eq</literal>.)
 	</para>
 	<para>
 	  The conditional expression
 	  “<literal>[[ $</literal><emphasis>variable</emphasis><literal> =~ </literal><emphasis>regexp</emphasis><literal> ]]</literal>”
 	  can be used for input validation, assuming that
 	  <emphasis>regexp</emphasis> is a constant regular
 	  expression.
 	  See <xref linkend="sect-Defensive_Coding-Shell-Input_Validation"/>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Certain parameter expansions, for example
 	  “<literal>${</literal><emphasis>variable</emphasis><literal>[</literal><emphasis>expression</emphasis><literal>]}</literal>”
 	  (array indexing) or
 	  “<literal>${</literal><emphasis>variable</emphasis><literal>:</literal><emphasis>expression</emphasis><literal>}</literal>”
 	  (string slicing), trigger arithmetic evaluation of
 	  <emphasis>expression</emphasis>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Assignment to array elements using
 	  “<emphasis>array_variable</emphasis><literal>[</literal><emphasis>subscript</emphasis><literal>]=</literal><emphasis>expression</emphasis>”
 	  triggers evaluation of <emphasis>subscript</emphasis>, but
 	  not <emphasis>expression</emphasis>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The expressions in the arithmetic <literal>for</literal>
 	  command,
 	  “<literal>for ((</literal><emphasis>expression1</emphasis><literal>; </literal><emphasis>expression2</emphasis><literal>; </literal><emphasis>expression3</emphasis><literal>)); do </literal><emphasis>commands</emphasis><literal>; done</literal>”
 	  are evaluated.  This does not apply to the regular
 	  for command,
 	  “<literal>for </literal><emphasis>variable</emphasis><literal> in </literal><emphasis>list</emphasis><literal>; do </literal><emphasis>commands</emphasis><literal>; done</literal>”.
 	</para>
      </listitem>
    </itemizedlist>
    <important>
      <para>
 	Depending on the <application>bash</application> version, the
 	above list may be incomplete.
      </para>
      <para>
 	If faced with a situation where using such shell features
 	appears necessary, see <xref
 	linkend="sect-Defensive_Coding-Shell-Alternatives"/>.
      </para>
    </important>
    <para>
      If it is impossible to avoid shell arithmetic on untrusted
      inputs, refer to <xref
      linkend="sect-Defensive_Coding-Shell-Input_Validation"/>.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Shell-Types">
    <title>Type declarations</title>
    <para>
      <application>bash</application> supports explicit type
      declarations for shell variables:
    </para>
    <informalexample>
      <programlisting language="Bash">
 	declare -i integer_variable
 	declare -a array_variable
 	declare -A assoc_array_variable
 	typeset -i integer_variable
 	typeset -a array_variable
 	typeset -A assoc_array_variable
 	local -i integer_variable
 	local -a array_variable
 	local -A assoc_array_variable
 	readonly -i integer_variable
 	readonly -a array_variable
 	readonly -A assoc_array_variable
      </programlisting>
    </informalexample>
    <para>
      Variables can also be declared as arrays by assigning them an
      array expression, as in:
    </para>
    <informalexample>
      <programlisting language="Bash">
 array_variable=(1 2 3 4)
      </programlisting>
    </informalexample>
    <para>
      Some built-ins (such as <literal>mapfile</literal>) can
      implicitly create array variables.
    </para>
    <para>
      Such type declarations should not be used because assignment to
      such variables (independent of the concrete syntax used for the
      assignment) triggers arithmetic expansion (and thus double
      expansion) of the right-hand side of the assignment operation.
      See <xref linkend="sect-Defensive_Coding-Shell-Arithmetic"/>.
    </para>
    <para>
      Shell scripts which use integer or array variables should be
      rewritten in another, more suitable language.  Se <xref
      linkend="sect-Defensive_Coding-Shell-Alternatives"/>.
    </para>
  </section>
 </section>
 <section id="sect-Defensive_Coding-Shell-Obscure">
  <title>Other Obscurities</title>
  <para>
    Obscure shell language features should not be used.  Examples are:
  </para>
  <itemizedlist>
    <listitem>
      <para>
 	Exported functions (<literal>export -f</literal> or
 	<literal>declare -f</literal>).
      </para>
    </listitem>
    <listitem>
      <para>
 	Function names which are not valid variable names, such as
 	“<literal>module::function</literal>”.
      </para>
    </listitem>
    <listitem>
      <para>
 	The possibility to override built-ins or external commands
 	with shell functions.
      </para>
    </listitem>
    <listitem>
      <para>
 	Changing the value of the <envar>IFS</envar> variable to
 	tokenize strings.
      </para>
    </listitem>
  </itemizedlist>
 </section>
 </section>
 <section id="sect-Defensive_Coding-Shell-Invoke">
 <title>Invoking External Commands</title>
 <para>
  When passing shell variables as single command line arguments,
  they should always be surrounded by double quotes.  See
  <xref linkend="sect-Defensive_Coding-Shell-Parameter_Expansion"/>.
 </para>
 <para>
  Care is required when passing untrusted values as positional
  parameters to external commands.  If the value starts with a hyphen
  “<literal>-</literal>”, it may be interpreted by the external
  command as an option.  Depending on the external program, a
  “<literal>--</literal>” argument stops option processing and treats
  all following arguments as positional parameters.  (Double quotes
  are completely invisible to the command being invoked, so they do
  not prevent variable values from being interpreted as options.)
 </para>
 <para>
  Cleaning the environment before invoking child processes is
  difficult to implement in script.  <application>bash</application>
  keeps a hidden list of environment variables which do not correspond
  to shell variables, and unsetting them from within a
  <application>bash</application> script is not possible.  To reset
  the environment, a script can re-run itself under the “<literal>env
  -i</literal>” command with an additional parameter which indicates
  the environment has been cleared and suppresses a further
  self-execution.  Alternatively, individual commands can be executed
  with “<literal>env -i</literal>”.
 </para>
 <important>
  <para>
    Complete isolation from its original execution environment
    (which is required when the script is executed after a trust
    transition, e.g., triggered by the SUID mechanism) is impossible
    to achieve from within the shell script itself.  Instead, the
    invoking process has to clear the process environment (except for
    few trusted variables) before running the shell script.
  </para>
 </important>
 <para>
  Checking for failures in executed external commands is recommended.
  If no elaborate error recovery is needed, invoking “<literal>set
  -e</literal>” may be sufficient.  This causes the script to stop on
  the first failed command.  However, failures in pipes
  (“<literal>command1 | command2</literal>”) are only detected for the
  last command in the pipe, errors in previous commands are ignored.
  This can be changed by invoking “<literal>set -o pipefail</literal>”.
  Due to architectural limitations, only the process that spawned
  the entire pipe can check for failures in individual commands;
  it is not possible for a process to tell if the process feeding
  data (or the process consuming data) exited normally or with 
  an error.
 </para>
 <para>
  See <xref linkend="sect-Defensive_Coding-Tasks-Processes-Creation"/>
  for additional details on creating child processes.
 </para>
 </section>
 <section id="sect-Defensive_Coding-Shell-Temporary_Files">
  <title>Temporary Files</title>
  <para>
    Temporary files should be created with the
    <literal>mktemp</literal> command, and temporary directories with
    “<literal>mktemp -d</literal>”.
  </para>
  <para>
    To clean up temporary files and directories, write a clean-up
    shell function and register it as a trap handler, as shown in
    <xref linkend="ex-Defensive_Coding-Tasks-Temporary_Files"/>.
    Using a separate function avoids issues with proper quoting of
    variables.
  </para>
  <example id="ex-Defensive_Coding-Tasks-Temporary_Files">
    <title>Creating and Cleaning up Temporary Files</title>
 <informalexample>
   <programlisting language="Bash">
 tmpfile="$(mktemp)"
 cleanup () {
    rm -f -- "$tmpfile"
 }
 trap cleanup 0
   </programlisting>
 </informalexample>
  </example>
 </section>
 <section id="sect-Defensive_Coding-Shell-Input_Validation">
  <title>Performing Input Validation</title>
  <para>
    In some cases, input validation cannot be avoided.  For example,
    if arithmetic evaluation is absolutely required, it is imperative
    to check that input values are, in fact, integers.  See <xref
    linkend="sect-Defensive_Coding-Shell-Arithmetic"/>.
  </para>
  <para>
    <xref linkend="ex-Defensive_Coding-Shell-Input_Validation"/>
    shows a construct which can be used to check if a string
    “<literal>$value</literal>” is an integer.  This construct is
    specific to <application>bash</application> and not portable to
    POSIX shells.
  </para>
  <example id="ex-Defensive_Coding-Shell-Input_Validation">
    <title>Input validation in <application>bash</application></title>
    <xi:include href="snippets/Shell-Input_Validation.xml"
 		xmlns:xi="http://www.w3.org/2001/XInclude" />
  </example>
  <para>
    Using <literal>case</literal> statements for input validation is
    also possible and supported by other (POSIX) shells, but the
    pattern language is more restrictive, and it can be difficult to
    write suitable patterns.
  </para>
  <para>
    The <literal>expr</literal> external command can give misleading
    results (e.g., if the value being checked contains operators
    itself) and should not be used.
  </para>
 </section>
 <section id="sect-Defensive_Coding-Shell-Edit_Guard">
  <title>Guarding Shell Scripts Against Changes</title>
  <para>
    <application>bash</application> only reads a shell script up to
    the point it is needed for executed the next command.  This means
    that if script is overwritten while it is running, execution can
    jump to a random part of the script, depending on what is modified
    in the script and how the file offsets change as a result.  (This
    behavior is needed to support self-extracting shell archives whose
    script part is followed by a stream of bytes which does not follow
    the shell language syntax.)
  </para>
  <para>
    Therefore, long-running scripts should be guarded against
    concurrent modification by putting as much of the program logic
    into a <literal>main</literal> function, and invoking the
    <literal>main</literal> function at the end of the script, using
    this syntax:
  </para>
  <informalexample>
    <programlisting language="Bash">
 main "$@" ; exit $?
    </programlisting>
  </informalexample>
  <para>
    This construct ensures that <application>bash</application> will
    stop execution after the <literal>main</literal> function, instead
    of opening the script file and trying to read more commands.
  </para>
 </section>
 </chapter>
--- a/en-US/Tasks-Cryptography.xml
+++ b/en-US/Tasks-Cryptography.xml
@ -1,143 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-Cryptography">
  <title>Cryptography</title>
  <section>
    <title>Primitives</title>
    <para>
      Choosing from the following cryptographic primitives is
      recommended:
    </para>
    <itemizedlist>
      <listitem><para>RSA with 2048 bit keys and OAEP or PSS
        padding</para></listitem>
      <listitem><para>AES-128 in CBC mode</para></listitem>
      <listitem><para>AES-128 in GCM mode</para></listitem>
      <listitem><para>AES-256 in CBC mode</para></listitem>
      <listitem><para>AES-256 in GCM mode</para></listitem>
      <listitem><para>SHA-256</para></listitem>
      <listitem><para>HMAC-SHA-256</para></listitem>
      <listitem><para>HMAC-SHA-1</para></listitem>
    </itemizedlist>
    <para>
      Other cryptographic algorithms can be used if they are required
      for interoperability with existing software:
    </para>
    <itemizedlist>
      <listitem><para>RSA with key sizes larger than 1024
        and legacy padding</para></listitem>
      <listitem><para>AES-192</para></listitem>
      <listitem><para>3DES (triple DES, with two or three 56 bit keys),
        but strongly discouraged</para></listitem>
      <listitem><para>RC4 (but very, very strongly discouraged)</para></listitem>
      <listitem><para>SHA-1</para></listitem>
      <listitem><para>HMAC-MD5</para></listitem>
    </itemizedlist>
    <important>
      <title>Important</title>
      <para>
 	These primitives are difficult to use in a secure way.  Custom
 	implementation of security protocols should be avoided.  For
 	protecting confidentiality and integrity of network
 	transmissions, TLS should be used (<xref
 	linkend="chap-Defensive_Coding-TLS"/>).
      </para>
      <para>
 	In particular, when using AES in CBC mode, it is necessary to
 	add integrity checking by other means, preferably using
 	HMAC-SHA-256 and <emphasis>after</emphasis> encryption (that
 	is, on the encrypted cipher text).  For AES in GCM mode,
 	correct construction of nonces is absolutely essential.
      </para>
    </important>
 <!-- TODO: More algorithms are available in the NIST documents
     linked from: http://wiki.brq.redhat.com/SecurityTechnologies/FIPS -->
  </section>
  <section>
    <title id="sect-Defensive_Coding-Tasks-Cryptography-Randomness">Randomness</title>
    <para>
      The following facilities can be used to generate unpredictable
      and non-repeating values.  When these functions are used without
      special safeguards, each individual random value should be at
      least 12 bytes long.
    </para>
    <itemizedlist>
      <listitem>
 	<para><function>PK11_GenerateRandom</function> in the NSS library
 	  (usable for high data rates)</para>
      </listitem>
      <listitem>
 	<para><function>RAND_bytes</function> in the OpenSSL library
 	  (usable for high data rates)</para>
      </listitem>
      <listitem>
 	<para><function>gnutls_rnd</function> in GNUTLS, with
 	<literal>GNUTLS_RND_RANDOM</literal> as the first argument
 	(usable for high data rates)</para>
      </listitem>
      <listitem>
 	<para><type>java.security.SecureRandom</type> in Java
 	  (usable for high data rates)</para>
      </listitem>
      <listitem>
 	<para><function>os.urandom</function> in Python</para>
      </listitem>
      <listitem>
 	<para>The <function>getrandom</function> system call since glibc 2.25</para>
      </listitem>
      <listitem>
 	<para>The <function>getentropy</function> call since glibc 2.25</para>
      </listitem>
      <listitem>
 	<para>Reading from the <filename>/dev/urandom</filename>
 	  character device</para>
      </listitem>
    </itemizedlist>
    <para>
      All these functions should be non-blocking, and they should not
      wait until physical randomness becomes available.  (Some
      cryptography providers for Java can cause
      <type>java.security.SecureRandom</type> to block, however.)
      Those functions which do not obtain all bits directly from
      <filename>/dev/urandom</filename> are suitable for high data
      rates because they do not deplete the system-wide entropy pool.
    </para>
    <important>
      <title>Difficult to use API</title>
      <para>
 	Both <function>RAND_bytes</function> and
 	<function>PK11_GenerateRandom</function> have three-state
 	return values (with conflicting meanings).  Careful error
 	checking is required.  Please review the documentation when
 	using these functions.
      </para>
    </important>
    <important>
      <title>Difficult to use API</title>
      <para>
 	The <function>getrandom</function> system call has three-state
 	return values, hence requires careful error checking.
      </para>
      <para>
 	It was introduced in Linux kernel 3.17, but before glibc 2.25 no API wrappers were
 	provided. As such one could only use it via the syscall interface
 	as <function>syscall(SYS_getrandom, (void*)dest, (size_t)size, (unsigned int)0)</function>.
 	For portable code targetting multiple kernel versions one has to check 
 	for the function being	available on run-time, and switch to another
 	facility if the running kernel does not support this call.
      </para>
    </important>
    <para>
      Other sources of randomness should be considered predictable.
    </para>
    <para>
      Generating randomness for cryptographic keys in long-term use
      may need different steps and is best left to cryptographic
      libraries.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Descriptors.xml
+++ b/en-US/Tasks-Descriptors.xml
@ -1,267 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="sect-Defensive_Coding-Tasks-Descriptors">
  <title>File Descriptor Management</title>
  <para>
    File descriptors underlie all input/output mechanisms offered by
    the system.  They are used to implementation the <literal>FILE
    *</literal>-based functions found in
    <literal>&lt;stdio.h&gt;</literal>, and all the file and network
    communication facilities provided by the Python and Java
    environments are eventually implemented in them.
  </para>
  <para>
    File descriptors are small, non-negative integers in userspace,
    and are backed on the kernel side with complicated data structures
    which can sometimes grow very large.
  </para>
  <section>
    <title>Closing Descriptors</title>
    <para>
      If a descriptor is no longer used by a program and is not closed
      explicitly, its number cannot be reused (which is problematic in
      itself, see <xref
      linkend="sect-Defensive_Coding-Tasks-Descriptors-Limit"/>), and
      the kernel resources are not freed.  Therefore, it is important
      to close all descriptors at the earliest point in time
      possible, but not earlier.
    </para>
    <section>
      <title>Error Handling during Descriptor Close</title>
      <para>
 	The <function>close</function> system call is always
 	successful in the sense that the passed file descriptor is
 	never valid after the function has been called.  However,
 	<function>close</function> still can return an error, for
 	example if there was a file system failure.  But this error is
 	not very useful because the absence of an error does not mean
 	that all caches have been emptied and previous writes have
 	been made durable.  Programs which need such guarantees must
 	open files with <literal>O_SYNC</literal> or use
 	<literal>fsync</literal> or <literal>fdatasync</literal>, and
 	may also have to <literal>fsync</literal> the directory
 	containing the file.
      </para>
    </section>
    <section>
      <title>Closing Descriptors and Race Conditions</title>
      <para>
 	Unlike process IDs, which are recycle only gradually, the
 	kernel always allocates the lowest unused file descriptor when
 	a new descriptor is created.  This means that in a
 	multi-threaded program which constantly opens and closes file
 	descriptors, descriptors are reused very quickly.  Unless
 	descriptor closing and other operations on the same file
 	descriptor are synchronized (typically, using a mutex), there
 	will be race conditons and I/O operations will be applied to
 	the wrong file descriptor.
      </para>
      <para>
 	Sometimes, it is necessary to close a file descriptor
 	concurrently, while another thread might be about to use it in
 	a system call.  In order to support this, a program needs to
 	create a single special file descriptor, one on which all I/O
 	operations fail.  One way to achieve this is to use
 	<function>socketpair</function>, close one of the descriptors,
 	and call <literal>shutdown(fd, SHUTRDWR)</literal> on the
 	other.
      </para>
      <para>
 	When a descriptor is closed concurrently, the program does not
 	call <function>close</function> on the descriptor.  Instead it
 	program uses <function>dup2</function> to replace the
 	descriptor to be closed with the dummy descriptor created
 	earlier.  This way, the kernel will not reuse the descriptor,
 	but it will carry out all other steps associated with calling
 	a descriptor (for instance, if the descriptor refers to a
 	stream socket, the peer will be notified).
      </para>
      <para>
 	This is just a sketch, and many details are missing.
 	Additional data structures are needed to determine when it is
 	safe to really close the descriptor, and proper locking is
 	required for that.
      </para>
    </section>
    <section>
      <title>Lingering State after Close</title>
      <para>
 	By default, closing a stream socket returns immediately, and
 	the kernel will try to send the data in the background.  This
 	means that it is impossible to implement accurate accounting
 	of network-related resource utilization from userspace.
      </para>
      <para>
 	The <literal>SO_LINGER</literal> socket option alters the
 	behavior of <function>close</function>, so that it will return
 	only after the lingering data has been processed, either by
 	sending it to the peer successfully, or by discarding it after
 	the configured timeout.  However, there is no interface which
 	could perform this operation in the background, so a separate
 	userspace thread is needed for each <function>close</function>
 	call, causing scalability issues.
      </para>
      <para>
 	Currently, there is no application-level countermeasure which
 	applies universally.  Mitigation is possible with
 	<application>iptables</application> (the
 	<literal>connlimit</literal> match type in particular) and
 	specialized filtering devices for denial-of-service network
 	traffic.
      </para>
      <para>
 	These problems are not related to the
 	<literal>TIME_WAIT</literal> state commonly seen in
 	<application>netstat</application> output.  The kernel
 	automatically expires such sockets if necessary.
      </para>
    </section>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Descriptors-Child_Processes">
    <title>Preventing File Descriptor Leaks to Child Processes</title>
    <para>
      Child processes created with <function>fork</function> share
      the initial set of file descriptors with their parent
      process.  By default, file descriptors are also preserved if
      a new process image is created with <function>execve</function>
      (or any of the other functions such as <function>system</function>
      or <function>posix_spawn</function>).
    </para>
    <para>
      Usually, this behavior is not desirable.  There are two ways to
      turn it off, that is, to prevent new process images from
      inheriting the file descriptors in the parent process:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Set the close-on-exec flag on all newly created file
 	  descriptors.  Traditionally, this flag is controlled by the
 	  <literal>FD_CLOEXEC</literal> flag, using
 	  <literal>F_GETFD</literal> and <literal>F_SETFD</literal>
 	  operations of the <function>fcntl</function> function.
 	</para>
 	<para>
 	  However, in a multi-threaded process, there is a race
 	  condition: a subprocess could have been created between the
 	  time the descriptor was created and the
 	  <literal>FD_CLOEXEC</literal> was set.  Therefore, many system
 	  calls which create descriptors (such as
 	  <function>open</function> and <function>openat</function>)
 	  now accept the <function>O_CLOEXEC</function> flag
 	  (<function>SOCK_CLOEXEC</function> for
 	  <function>socket</function> and
 	  <function>socketpair</function>), which cause the
 	  <literal>FD_CLOEXEC</literal> flag to be set for the file
 	  descriptor in an atomic fashion.  In addition, a few new
 	  systems calls were introduced, such as
 	  <function>pipe2</function> and <function>dup3</function>.
 	</para>
 	<para>
 	  The downside of this approach is that every descriptor needs
 	  to receive special treatment at the time of creation,
 	  otherwise it is not completely effective.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  After calling <function>fork</function>, but before creating
 	  a new process image with <function>execve</function>, all
 	  file descriptors which the child process will not need are
 	  closed.
 	</para>
 	<para>
 	  Traditionally, this was implemented as a loop over file
 	  descriptors ranging from <literal>3</literal> to
 	  <literal>255</literal> and later <literal>1023</literal>.
 	  But this is only an approximation because it is possible to
 	  create file descriptors outside this range easily (see <xref
 	  linkend="sect-Defensive_Coding-Tasks-Descriptors-Limit"/>).
 	  Another approach reads <filename>/proc/self/fd</filename>
 	  and closes the unexpected descriptors listed there, but this
 	  approach is much slower.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      At present, environments which care about file descriptor
      leakage implement the second approach.  OpenJDK 6 and 7
      are among them.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Descriptors-Limit">
    <title>Dealing with the <function>select</function> Limit</title>
    <para>
      By default, a user is allowed to open only 1024 files in a
      single process, but the system administrator can easily change
      this limit (which is necessary for busy network servers).
      However, there is another restriction which is more difficult to
      overcome.
    </para>
    <para>
      The <function>select</function> function only supports a
      maximum of <literal>FD_SETSIZE</literal> file descriptors
      (that is, the maximum permitted value for a file descriptor
      is <literal>FD_SETSIZE - 1</literal>, usually 1023.)  If a
      process opens many files, descriptors may exceed such
      limits.  It is impossible to query such descriptors using
      <function>select</function>.
    </para>
    <para>
      If a library which creates many file descriptors is used in
      the same process as a library which uses
      <function>select</function>, at least one of them needs to
      be changed.  <!-- ??? refer to event-driven programming -->
      Calls to <function>select</function> can be replaced with
      calls to <function>poll</function> or another event handling
      mechanism.  Replacing the <function>select</function> function
      is the recommended approach.
    </para>
    <para>
      Alternatively, the library with high descriptor usage can
      relocate descriptors above the <literal>FD_SETSIZE</literal>
      limit using the following procedure.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Create the file descriptor <literal>fd</literal> as
 	  usual, preferably with the <literal>O_CLOEXEC</literal>
 	  flag.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Before doing anything else with the descriptor
 	  <literal>fd</literal>, invoke:
 	</para>
 	<programlisting language="C">
 	  int newfd = fcntl(fd, F_DUPFD_CLOEXEC, (long)FD_SETSIZE);
 	</programlisting>
      </listitem>
      <listitem>
 	<para>
 	  Check that <literal>newfd</literal> result is
 	  non-negative, otherwise close <literal>fd</literal> and
 	  report an error, and return.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Close <literal>fd</literal> and continue to use
 	  <literal>newfd</literal>.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      The new descriptor has been allocated above the
      <literal>FD_SETSIZE</literal>.  Even though this algorithm
      is racy in the sense that the <literal>FD_SETSIZE</literal>
      first descriptors could fill up, a very high degree of
      physical parallelism is required before this becomes a problem.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-File_System.xml
+++ b/en-US/Tasks-File_System.xml
@ -1,339 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-File_System">
  <title>File System Manipulation</title>
  <para>
    In this chapter, we discuss general file system manipulation, with
    a focus on access files and directories to which an other,
    potentially untrusted user has write access.
  </para>
  <para>
    Temporary files are covered in their own chapter, <xref
    linkend="chap-Defensive_Coding-Tasks-Temporary_Files"/>.
  </para>
  <section id="sect-Defensive_Coding-Tasks-File_System-Unowned">
    <title>Working with Files and Directories Owned by Other Users</title>
    <para>
      Sometimes, it is necessary to operate on files and directories
      owned by other (potentially untrusted) users.  For example, a
      system administrator could remove the home directory of a user,
      or a package manager could update a file in a directory which is
      owned by an application-specific user.  This differs from
      accessing the file system as a specific user; see 
      <xref linkend="sect-Defensive_Coding-Tasks-File_System-Foreign"/>.
    </para>
    <para>
      Accessing files across trust boundaries faces several
      challenges, particularly if an entire directory tree is being
      traversed:
    </para>
    <orderedlist>
      <listitem>
 	<para>
 	  Another user might add file names to a writable directory at
 	  any time.  This can interfere with file creation and the
 	  order of names returned by <function>readdir</function>.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Merely opening and closing a file can have side effects.
 	  For instance, an automounter can be triggered, or a tape
 	  device rewound.  Opening a file on a local file system can
 	  block indefinitely, due to mandatory file locking, unless
 	  the <literal>O_NONBLOCK</literal> flag is specified.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Hard links and symbolic links can redirect the effect of
 	  file system operations in unexpected ways.  The
 	  <literal>O_NOFOLLOW</literal> and
 	  <literal>AT_SYMLINK_NOFOLLOW</literal> variants of system
 	  calls only affected final path name component.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The structure of a directory tree can change.  For example,
 	  the parent directory of what used to be a subdirectory
 	  within the directory tree being processed could suddenly
 	  point outside that directory tree.
 	</para>
      </listitem>
    </orderedlist>
    <para>
      Files should always be created with the
      <literal>O_CREAT</literal> and <literal>O_EXCL</literal> flags,
      so that creating the file will fail if it already exists.  This
      guards against the unexpected appearance of file names, either
      due to creation of a new file, or hard-linking of an existing
      file.  In multi-threaded programs, rather than manipulating the
      umask, create the files with mode <literal>000</literal> if
      possible, and adjust it afterwards with
      <function>fchmod</function>.
    </para>
    <para>
      To avoid issues related to symbolic links and directory tree
      restructuring, the “<literal>at</literal>” variants of system
      calls have to be used (that is, functions like
      <function>openat</function>, <function>fchownat</function>,
      <function>fchmodat</function>, and
      <function>unlinkat</function>, together with
      <literal>O_NOFOLLOW</literal> or
      <literal>AT_SYMLINK_NOFOLLOW</literal>).  Path names passed to
      these functions must have just a single component (that is,
      without a slash).  When descending, the descriptors of parent
      directories must be kept open.  The missing
      <literal>opendirat</literal> function can be emulated with
      <literal>openat</literal> (with an
      <literal>O_DIRECTORY</literal> flag, to avoid opening special
      files with side effects), followed by
      <literal>fdopendir</literal>.
    </para>
    <para>
      If the “<literal>at</literal>” functions are not available, it
      is possible to emulate them by changing the current directory.
      (Obviously, this only works if the process is not multi-threaded.)
      <function>fchdir</function> has to be used to change the current
      directory, and the descriptors of the parent directories have to
      be kept open, just as with the “<literal>at</literal>”-based
      approach.  <literal>chdir("...")</literal> is unsafe because it
      might ascend outside the intended directory tree.
    </para>
    <para>
      This “<literal>at</literal>” function emulation is currently
      required when manipulating extended attributes.  In this case,
      the <function>lsetxattr</function> function can be used, with a
      relative path name consisting of a single component.  This also
      applies to SELinux contexts and the
      <function>lsetfilecon</function> function.
    </para>
    <para>
      Currently, it is not possible to avoid opening special files
      <emphasis>and</emphasis> changes to files with hard links if the
      directory containing them is owned by an untrusted user.
      (Device nodes can be hard-linked, just as regular files.)
      <function>fchmodat</function> and <function>fchownat</function>
      affect files whose link count is greater than one.  But opening
      the files, checking that the link count is one with
      <function>fstat</function>, and using
      <function>fchmod</function> and <function>fchown</function> on
      the file descriptor may have unwanted side effects, due to item
      2 above.  When creating directories, it is therefore important
      to change the ownership and permissions only after it has been
      fully created.  Until that point, file names are stable, and no
      files with unexpected hard links can be introduced.
    </para>
    <para>
      Similarly, when just reading a directory owned by an untrusted
      user, it is currently impossible to reliably avoid opening
      special files.
    </para>
    <para>
      There is no workaround against the instability of the file list
      returned by <function>readdir</function>.  Concurrent
      modification of the directory can result in a list of files
      being returned which never actually existed on disk.
    </para>
    <para>
      Hard links and symbolic links can be safely deleted using
      <function>unlinkat</function> without further checks because
      deletion only affects the name within the directory tree being
      processed.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-File_System-Foreign">
    <title>Accessing the File System as a Different User</title>
    <para>
      This section deals with access to the file system as a specific
      user.  This is different from accessing files and directories owned by a
      different, potentially untrusted user; see <xref
      linkend="sect-Defensive_Coding-Tasks-File_System-Foreign"/>.
    </para>
    <para>
      One approach is to spawn a child process which runs under the
      target user and group IDs (both effective and real IDs).  Note
      that this child process can block indefinitely, even when
      processing regular files only.  For example, a special FUSE file
      system could cause the process to hang in uninterruptible sleep
      inside a <function>stat</function> system call.
    </para>
    <para>
      An existing process could change its user and group ID using
      <function>setfsuid</function> and <function>setfsgid</function>.
      (These functions are preferred over <function>seteuid</function>
      and <function>setegid</function> because they do not allow the
      impersonated user to send signals to the process.)  These
      functions are not thread safe.  In multi-threaded processes,
      these operations need to be performed in a single-threaded child
      process.  Unexpected blocking may occur as well.
    </para>
    <para>
      It is not recommended to try to reimplement the kernel
      permission checks in user space because the required checks are
      complex.  It is also very difficult to avoid race conditions
      during path name resolution.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-File_System-Limits">
    <title>File System Limits</title>
    <para>
      For historical reasons, there are preprocessor constants such as
      <literal>PATH_MAX</literal>, <literal>NAME_MAX</literal>.
      However, on most systems, the length of canonical path names
      (absolute path names with all symbolic links resolved, as
      returned by <function>realpath</function> or
      <function>canonicalize_file_name</function>) can exceed
      <literal>PATH_MAX</literal> bytes, and individual file name
      components can be longer than <literal>NAME_MAX</literal>.  This
      is also true of the <literal>_PC_PATH_MAX</literal> and
      <literal>_PC_NAME_MAX</literal> values returned by
      <function>pathconf</function>, and the
      <literal>f_namemax</literal> member of <literal>struct
      statvfs</literal>.  Therefore, these constants should not be
      used.  This is also reason why the
      <function>readdir_r</function> should never be used (instead,
      use <function>readdir</function>).
    </para>
    <para>
      You should not write code in a way that assumes that there is an
      upper limit on the number of subdirectories of a directory, the
      number of regular files in a directory, or the link count of an
      inode.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-File_System-Features">
    <title>File system features</title>
    <para>
      Not all file systems support all features.  This makes it very
      difficult to write general-purpose tools for copying files.  For
      example, a copy operation intending to preserve file permissions
      will generally fail when copying to a FAT file system.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Some file systems are case-insensitive.  Most should be
 	  case-preserving, though.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Name length limits vary greatly, from eight to thousands of
 	  bytes.  Path length limits differ as well.  Most systems
 	  impose an upper bound on path names passed to the kernel,
 	  but using relative path names, it is possible to create and
 	  access files whose absolute path name is essentially of
 	  unbounded length.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Some file systems do not store names as fairly unrestricted
 	  byte sequences, as it has been traditionally the case on GNU
 	  systems.  This means that some byte sequences (outside the
 	  POSIX safe character set) are not valid names.  Conversely,
 	  names of existing files may not be representable as byte
 	  sequences, and the files are thus inaccessible on GNU
 	  systems.  Some file systems perform Unicode canonicalization
 	  on file names.  These file systems preserve case, but
 	  reading the name of a just-created file using
 	  <function>readdir</function> might still result in a
 	  different byte sequence.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Permissions and owners are not universally supported (and
 	  SUID/SGID bits may not be available).  For example, FAT file
 	  systems assign ownership based on a mount option, and
 	  generally mark all files as executable.  Any attempt to
 	  change permissions would result in an error.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Non-regular files (device nodes, FIFOs) are not generally
 	  available.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Only on some file systems, files can have holes, that is,
 	  not all of their contents is backed by disk storage.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  <function>ioctl</function> support (even fairly generic
 	  functionality such as <literal>FIEMAP</literal> for
 	  discovering physical file layout and holes) is
 	  file-system-specific.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Not all file systems support extended attributes, ACLs and
 	  SELinux metadata.  Size and naming restriction on extended
 	  attributes vary.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Hard links may not be supported at all (FAT) or only within
 	  the same directory (AFS).  Symbolic links may not be
 	  available, either.  Reflinks (hard links with copy-on-write
 	  semantics) are still very rare.  Recent systems restrict
 	  creation of hard links to users which own the target file or
 	  have read/write access to it, but older systems do not.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Renaming (or moving) files using <function>rename</function>
 	  can fail (even when <function>stat</function> indicates that
 	  the source and target directories are located on the same
 	  file system).  This system call should work if the old and
 	  new paths are located in the same directory, though.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Locking semantics vary among file systems.  This affects
 	  advisory and mandatory locks.  For example, some network
 	  file systems do not allow deleting files which are opened by
 	  any process.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Resolution of time stamps varies from two seconds to
 	  nanoseconds.  Not all time stamps are available on all file
 	  systems.  File creation time (<emphasis>birth
 	  time</emphasis>) is not exposed over the
 	  <function>stat</function>/<function>fstat</function>
 	  interface, even if stored by the file system.
 	</para>
      </listitem>
    </itemizedlist>
  </section>
  <section id="sect-Defensive_Coding-Tasks-File_System-Free_Space">
    <title>Checking Free Space</title>
    <para>
      The <function>statvfs</function> and
      <function>fstatvfs</function> functions allow programs to
      examine the number of available blocks and inodes, through the
      members <literal>f_bfree</literal>, <literal>f_bavail</literal>,
      <literal>f_ffree</literal>, and <literal>f_favail</literal> of
      <literal>struct statvfs</literal>.  Some file systems return
      fictional values in the <literal>f_ffree</literal> and
      <literal>f_favail</literal> fields, so the only reliable way to
      discover if the file system still has space for a file is to try
      to create it.  The <literal>f_bfree</literal> field should be
      reasonably accurate, though.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Library_Design.xml
+++ b/en-US/Tasks-Library_Design.xml
@ -1,195 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-Library_Design">
  <title>Library Design</title>
  <para>
    Through this section, the term <emphasis>client code</emphasis>
    refers to applications and other libraries using the library.
  </para>
  <section>
    <title>State Management</title>
    <para>
    </para>
    <section>
      <title>Global State</title>
      <para>
 	Global state should be avoided.
      </para>
      <para>
 	If this is impossible, the global state must be protected with
 	a lock.  For C/C++, you can use the
 	<function>pthread_mutex_lock</function>
 	and <function>pthread_mutex_unlock</function>
 	functions without linking against <literal>-lpthread</literal>
 	because the system provides stubs for non-threaded processes.
      </para>
      <para>
 	For compatibility with <function>fork</function>, these locks
 	should be acquired and released in helpers registered with
 	<function>pthread_atfork</function>.  This function is not
 	available without <literal>-lpthread</literal>, so you need to
 	use <function>dlsym</function> or a weak symbol to obtain its
 	address.
      </para>
      <para>
 	If you need <function>fork</function> protection for other
 	reasons, you should store the process ID and compare it to the
 	value returned by <function>getpid</function> each time you
 	access the global state.  (<function>getpid</function> is not
 	implemented as a system call and is fast.)  If the value
 	changes, you know that you have to re-create the state object.
 	(This needs to be combined with locking, of course.)
      </para>
    </section>
    <section>
      <title>Handles</title>
      <para>
 	Library state should be kept behind a curtain.  Client code
 	should receive only a handle.  In C, the handle can be a
 	pointer to an incomplete <literal>struct</literal>.  In C++,
 	the handle can be a pointer to an abstract base class, or it
 	can be hidden using the pointer-to-implementation idiom.
      </para>
      <para>
 	The library should provide functions for creating and
 	destroying handles.  (In C++, it is possible to use virtual
 	destructors for the latter.)  Consistency between creation and
 	destruction of handles is strongly recommended: If the client
 	code created a handle, it is the responsibility of the client
 	code to destroy it.  (This is not always possible or
 	convenient, so sometimes, a transfer of ownership has to
 	happen.)
      </para>
      <para>
 	Using handles ensures that it is possible to change the way
 	the library represents state in a way that is transparent to
 	client code.  This is important to facilitate security updates
 	and many other code changes.
      </para>
      <para>
 	It is not always necessary to protect state behind a handle
 	with a lock.  This depends on the level of thread safety
 	the library provides.
      </para>
    </section>
  </section>
  <section>
    <title>Object Orientation</title>
    <para>
      Classes should be either designed as base classes, or it should
      be impossible to use them as base classes (like
      <literal>final</literal> classes in Java).  Classes which are
      not designed for inheritance and are used as base classes
      nevertheless create potential maintenance hazards because it is
      difficult to predict how client code will react when calls to
      virtual methods are added, reordered or removed.
    </para>
    <para>
      Virtual member functions can be used as callbacks.  See 
      <xref linkend="sect-Defensive_Coding-Tasks-Library_Design-Callbacks"/>
      for some of the challenges involved.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Library_Design-Callbacks">
    <title>Callbacks</title>
    <para>
      Higher-order code is difficult to analyze for humans and
      computers alike, so it should be avoided.  Often, an
      iterator-based interface (a library function which is called
      repeatedly by client code and returns a stream of events) leads
      to a better design which is easier to document and use.
    </para>
    <para>
      If callbacks are unavoidable, some guidelines for them follow.
    </para>
    <para>
      In modern C++ code, <literal>std::function</literal> objects
      should be used for callbacks.
    </para>
    <para>
      In older C++ code and in C code, all callbacks must have an
      additional closure parameter of type <literal>void *</literal>,
      the value of which can be specified by client code.  If
      possible, the value of the closure parameter should be provided
      by client code at the same time a specific callback is
      registered (or specified as a function argument).  If a single
      closure parameter is shared by multiple callbacks, flexibility
      is greatly reduced, and conflicts between different pieces of
      client code using the same library object could be unresolvable.
      In some cases, it makes sense to provide a de-registration
      callback which can be used to destroy the closure parameter when
      the callback is no longer used.
    </para>
    <para>
      Callbacks can throw exceptions or call
      <function>longjmp</function>.  If possible, all library objects
      should remain in a valid state.  (All further operations on them
      can fail, but it should be possible to deallocate them without
      causing resource leaks.)
    </para>
    <para>
      The presence of callbacks raises the question if functions
      provided by the library are <emphasis>reentrant</emphasis>.
      Unless a library was designed for such use, bad things will
      happen if a callback function uses functions in the same library
      (particularly if they are invoked on the same objects and
      manipulate the same state).  When the callback is invoked, the
      library can be in an inconsistent state.  Reentrant functions
      are more difficult to write than thread-safe functions (by
      definition, simple locking would immediately lead to deadlocks).
      It is also difficult to decide what to do when destruction of an
      object which is currently processing a callback is requested.
    </para>
  </section>
  <section>
    <title>Process Attributes</title>
    <para>
      Several attributes are global and affect all code in the
      process, not just the library that manipulates them.
    </para>
    <itemizedlist>
    <listitem><para>
      environment variables
      (see <xref linkend="sect-Defensive_Coding-Tasks-secure_getenv"/>)
    </para></listitem>
    <listitem><para>
      umask
    </para></listitem>
    <listitem><para>
      user IDs, group IDs and capabilities
    </para></listitem>
    <listitem><para>
      current working directory
    </para></listitem>
    <listitem><para>
      signal handlers, signal masks and signal delivery
    </para></listitem>
    <listitem><para>
      file locks (especially <function>fcntl</function> locks
      behave in surprising ways, not just in a multi-threaded
      environment)
    </para></listitem>
    </itemizedlist>
    <para>
      Library code should avoid manipulating these global process
      attributes.  It should not rely on environment variables, umask,
      the current working directory and signal masks because these
      attributes can be inherited from an untrusted source.
    </para>
    <para>
      In addition, there are obvious process-wide aspects such as the
      virtual memory layout, the set of open files and dynamic shared
      objects, but with the exception of shared objects, these can be
      manipulated in a relatively isolated way.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Locking.xml
+++ b/en-US/Tasks-Locking.xml
@ -1,5 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="sect-Defensive_Coding-Tasks-Locking">
 </chapter>
--- a/en-US/Tasks-Packaging.xml
+++ b/en-US/Tasks-Packaging.xml
@ -1,183 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-Packaging">
  <title>RPM Packaging</title>
  <para>
    This chapter deals with security-related concerns around RPM
    packaging.  It has to be read in conjunction with
    distribution-specific packaging guidelines.
  </para>
  <section id="sect-Defensive_Coding-Tasks-Packaging-Certificates">
    <title>Generating X.509 Self-signed Certificates during
    Installation</title>
    <para>
      Some applications need X.509 certificates for authentication
      purposes.  For example, a single private/public key pair could
      be used to define cluster membership, enabling authentication
      and encryption of all intra-cluster communication.  (Lack of
      certification from a CA matters less in such a context.)  For
      such use, generating the key pair at package installation time
      when preparing system images for use in the cluster is
      reasonable.  For other use cases, it is necessary to generate
      the key pair before the service is started for the first time,
      see <xref linkend="sect-Defensive_Coding-Tasks-Packaging-Certificates-Service"/>,
      and <ulink url="https://fedoraproject.org/wiki/Packaging:Initial_Service_Setup#Generating_Self-Signed_Certificates">Packaging:Initial Service Setup</ulink>.
    </para>
    <important>
      <para>
 	The way the key is generated may not be suitable for key
 	material of critical value.  (<command>openssl
 	genrsa</command> uses, but does not require, entropy from a
 	physical source of randomness, among other things.)  Such keys
 	should be stored in a hardware security module if possible,
 	and generated from random bits reserved for this purpose
 	derived from a non-deterministic physical source.
      </para>
    </important>
    <para>
      In the spec file, we define two RPM variables which contain the
      names of the files used to store the private and public key, and
      the user name for the service:
    </para>
    <informalexample>
      <programlisting language="RPM Spec">
 # Name of the user owning the file with the private key
 %define tlsuser %{name}
 # Name of the directory which contains the key and certificate files
 %define tlsdir %{_sysconfdir}/%{name}
 %define tlskey %{tlsdir}/%{name}.key
 %define tlscert %{tlsdir}/%{name}.crt
      </programlisting>
    </informalexample>
    <para>
      These variables likely need adjustment based on the needs of the
      package.
    </para>
    <para>
      Typically, the file with the private key needs to be owned by
      the system user which needs to read it,
      <literal>%{tlsuser}</literal> (not <literal>root</literal>).  In
      order to avoid races, if the <emphasis>directory</emphasis>
      <literal>%{tlsdir}</literal> is <emphasis>owned by the services
      user</emphasis>, you should use the code in <xref
      linkend="ex-Defensive_Coding-Packaging-Certificates-Owned"/>.
      The invocation of <application>su</application> with the
      <option>-s /bin/bash</option> argument is necessary in case the
      login shell for the user has been disabled.
    </para>
    <example id="ex-Defensive_Coding-Packaging-Certificates-Owned">
      <title>Creating a key pair in a user-owned directory</title>
      <programlisting language="Bash">
 %post
 if [ $1 -eq 1 ] ; then
  if ! test -e %{tlskey} ; then
    su -s /bin/bash \
      -c "umask 077 &amp;&amp; openssl genrsa -out %{tlskey} 2048 2>/dev/null" \
      %{tlsuser}
  fi
  if ! test -e %{tlscert} ; then
    cn="Automatically generated certificate for the %{tlsuser} service"
    req_args="-key %{tlskey} -out %{tlscert} -days 7305 -subj \"/CN=$cn/\""
    su -s /bin/bash \
      -c "openssl req -new -x509 -extensions usr_cert $req_args" \
      %{tlsuser}
  fi
 fi
 %files
 %dir %attr(0755,%{tlsuser},%{tlsuser]) %{tlsdir}
 %ghost %attr(0600,%{tlsuser},%{tlsuser}) %config(noreplace) %{tlskey}
 %ghost %attr(0644,%{tlsuser},%{tlsuser}) %config(noreplace) %{tlscert}
      </programlisting>
    </example>
    <para>
      The files containing the key material are marked as ghost
      configuration files.  This ensures that they are tracked in the
      RPM database as associated with the package, but RPM will not
      create them when the package is installed and not verify their
      contents (the <literal>%ghost</literal>), or delete the files
      when the package is uninstalled (the
      <literal>%config(noreplace)</literal> part).
    </para>
    <para>
      If the <emphasis>directory</emphasis>
      <literal>%{tlsdir}</literal> <emphasis>is owned by</emphasis>
      <literal>root</literal>, use the code in <xref
      linkend="ex-Defensive_Coding-Packaging-Certificates-Unowned"/>.
    </para>
    <example id="ex-Defensive_Coding-Packaging-Certificates-Unowned">
      <title>Creating a key pair in a <literal>root</literal>-owned directory</title>
      <programlisting language="Bash">
 %post
 if [ $1 -eq 1 ] ; then
  if ! test -e %{tlskey} ; then
    (umask 077 &amp;&amp; openssl genrsa -out %{tlskey} 2048 2>/dev/null)
    chown %{tlsuser} %{tlskey}
  fi
  if ! test -e %{tlscert} ; then
    cn="Automatically generated certificate for the %{tlsuser} service"
    openssl req -new -x509 -extensions usr_cert \
      -key %{tlskey} -out %{tlscert} -days 7305 -subj "/CN=$cn/"
  fi
 fi
 %files
 %dir %attr(0755,root,root]) %{tlsdir}
 %ghost %attr(0600,%{tlsuser},%{tlsuser}) %config(noreplace) %{tlskey}
 %ghost %attr(0644,root,root) %config(noreplace) %{tlscert}
      </programlisting>
    </example>
    <para>
      In order for this to work, the package which generates the keys
      must require the <application>openssl</application> package.  If
      the user which owns the key file is generated by a different
      package, the package generating the certificate must specify a
      <literal>Requires(pre):</literal> on the package which creates
      the user.  This ensures that the user account will exist when it
      is needed for the <application>su</application> or
      <application>chmod</application> invocation.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Packaging-Certificates-Service">
    <title>Generating X.509 Self-signed Certificates before Service
    Start</title>
    <para>
      An alternative way to automatically provide an X.509 key pair is
      to create it just before the service is started for the first
      time.  This ensures that installation images which are created
      from installed RPM packages receive different key material.
      Creating the key pair at package installation time (see <xref
      linkend="sect-Defensive_Coding-Tasks-Packaging-Certificates"/>)
      would put the key into the image, which may or may not make
      sense.    </para>
    <important>
      <para>
 	The caveats about the way the key is generated in <xref
 	linkend="sect-Defensive_Coding-Tasks-Packaging-Certificates"/>
 	apply to this procedure as well.
    </para>
    </important>
    <para>
      Generating key material before service start may happen very
      early during boot, when the kernel randomness pool has not yet
      been initialized.  Currently, the only way to check for the
      initialization is to look for the kernel message
      <literal>random: nonblocking pool is initialized</literal>, or
      ensure that the application used for generating the keys
      is utilizing the <filename>getrandom()</filename> system call.
    </para>
    <para>
      In theory, it is also possible to use an application which reads from
      <filename>/dev/random</filename> while generating the key
      material (instead of <filename>/dev/urandom</filename>), but
      this can block not just during the boot process, but also much
      later at run time, and generally results in a poor user
      experience.
    </para>
    <para>
      The requirements for generating such keys is documented at
      <ulink url="https://fedoraproject.org/wiki/Packaging:Initial_Service_Setup#Generating_Self-Signed_Certificates">Packaging:Initial Service Setup</ulink>.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Processes.xml
+++ b/en-US/Tasks-Processes.xml
@ -1,483 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="sect-Defensive_Coding-Tasks-Processes">
  <title>Processes</title>
  <section id="sect-Defensive_Coding-Tasks-Processes-Creation">
    <title>Creating Safe Processes</title>
    <para>
      This section describes how to create new child processes in a
      safe manner.  In addition to the concerns addressed below, there
      is the possibility of file descriptor leaks, see <xref
      linkend="sect-Defensive_Coding-Tasks-Descriptors-Child_Processes"/>.
    </para>
    <section>
      <title>Obtaining the Program Path and the Command-line
      Template</title>
      <para>
 	The name and path to the program being invoked should be
 	hard-coded or controlled by a static configuration file stored
 	at a fixed location (at an file system absolute path).  The
 	same applies to the template for generating the command line.
      </para>
      <para>
 	The configured program name should be an absolute path.  If it
 	is a relative path, the contents of the <envar>PATH</envar>
 	must be obtained in a secure manner (see <xref
 	linkend="sect-Defensive_Coding-Tasks-secure_getenv"/>).
 	If the <envar>PATH</envar> variable is not set or untrusted,
 	the safe default <literal>/bin:/usr/bin</literal> must be
 	used.
      </para>
      <para>
 	If too much flexibility is provided here, it may allow
 	invocation of arbitrary programs without proper authorization.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Processes-execve">
      <title>Bypassing the Shell</title>
      <para>
 	Child processes should be created without involving the system
 	shell.
      </para>
      <para>
 	For C/C++, <function>system</function> should not be used.
 	The <function>posix_spawn</function> function can be used
 	instead, or a combination <function>fork</function> and
 	<function>execve</function>.  (In some cases, it may be
 	preferable to use <function>vfork</function> or the
 	Linux-specific <function>clone</function> system call instead
 	of <function>fork</function>.)
      </para>
      <para>
 	In Python, the <literal>subprocess</literal> module bypasses
 	the shell by default (when the <literal>shell</literal>
 	keyword argument is not set to true).
 	<function>os.system</function> should not be used.
      </para>
      <para>
 	The Java class <type>java.lang.ProcessBuilder</type> can be
 	used to create subprocesses without interference from the
 	system shell.
      </para>
      <important>
 	<title>Portability notice</title>
 	<para>
 	  On Windows, there is no argument vector, only a single
 	  argument string.  Each application is responsible for parsing
 	  this string into an argument vector.  There is considerable
 	  variance among the quoting style recognized by applications.
 	  Some of them expand shell wildcards, others do not.  Extensive
 	  application-specific testing is required to make this secure.
 	</para>
      </important>
      <para>
 	Note that some common applications (notably
 	<application>ssh</application>) unconditionally introduce the
 	use of a shell, even if invoked directly without a shell.  It is
 	difficult to use these applications in a secure manner.  In this
 	case, untrusted data should be supplied by other means.  For
 	example, standard input could be used, instead of the command
 	line.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Processes-environ">
      <title>Specifying the Process Environment</title>
      <para>
 	Child processes should be created with a minimal set of
 	environment variables.  This is absolutely essential if there
 	is a trust transition involved, either when the parent process
 	was created, or during the creation of the child process.
      </para>
      <para>
 	In C/C++, the environment should be constructed as an array of
 	strings and passed as the <varname>envp</varname> argument to
 	<function>posix_spawn</function> or <function>execve</function>.
 	The functions <function>setenv</function>,
 	<function>unsetenv</function> and <function>putenv</function>
 	should not be used.  They are not thread-safe and suffer from
 	memory leaks.
      </para>
      <para>
 	Python programs need to specify a <literal>dict</literal> for
 	the the <varname>env</varname> argument of the
 	<function>subprocess.Popen</function> constructor.
 	The Java class <literal>java.lang.ProcessBuilder</literal>
 	provides a <function>environment()</function> method,
 	which returns a map that can be manipulated.
      </para>
      <para>
 	The following list provides guidelines for selecting the set
 	of environment variables passed to the child process.
      </para>
      <itemizedlist>
 	<listitem>
 	  <para>
 	    <envar>PATH</envar> should be initialized to
 	    <literal>/bin:/usr/bin</literal>.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    <envar>USER</envar> and <envar>HOME</envar> can be inhereted
 	    from the parent process environment, or they can be
 	    initialized from the <literal>pwent</literal> structure
 	    for the user. <!-- ??? refer to dropping privileges -->
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>The <envar>DISPLAY</envar> and <envar>XAUTHORITY</envar>
 	  variables should be passed to the subprocess if it is an X
 	  program.  Note that this will typically not work across trust
 	  boundaries because <envar>XAUTHORITY</envar> refers to a file
 	  with <literal>0600</literal> permissions.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The location-related environment variables
 	    <envar>LANG</envar>, <envar>LANGUAGE</envar>,
 	    <envar>LC_ADDRESS</envar>, <envar>LC_ALL</envar>,
 	    <envar>LC_COLLATE</envar>, <envar>LC_CTYPE</envar>,
 	    <envar>LC_IDENTIFICATION</envar>,
 	    <envar>LC_MEASUREMENT</envar>, <envar>LC_MESSAGES</envar>,
 	    <envar>LC_MONETARY</envar>, <envar>LC_NAME</envar>,
 	    <envar>LC_NUMERIC</envar>, <envar>LC_PAPER</envar>,
 	    <envar>LC_TELEPHONE</envar> and <envar>LC_TIME</envar>
 	    can be passed to the subprocess if present.
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    The called process may need application-specific
 	    environment variables, for example for passing passwords.
 	    (See <xref
 	    linkend="sect-Defensive_Coding-Tasks-Processes-Command_Line_Visibility"/>.)
 	  </para>
 	</listitem>
 	<listitem>
 	  <para>
 	    All other environment variables should be dropped.  Names
 	    for new environment variables should not be accepted from
 	    untrusted sources.
 	  </para>
 	</listitem>
      </itemizedlist>
    </section>
    <section>
      <title>Robust Argument List Processing</title>
      <para>
 	When invoking a program, it is sometimes necessary to include
 	data from untrusted sources.  Such data should be checked
 	against embedded <literal>NUL</literal> characters because the
 	system APIs will silently truncate argument strings at the first
 	<literal>NUL</literal> character.
      </para>
      <para>
 	The following recommendations assume that the program being
 	invoked uses GNU-style option processing using
 	<function>getopt_long</function>.  This convention is widely
 	used, but it is just that, and individual programs might
 	interpret a command line in a different way.
      </para>
      <para>
 	If the untrusted data has to go into an option, use the
 	<literal>--option-name=VALUE</literal> syntax, placing the
 	option and its value into the same command line argument.
 	This avoids any potential confusion if the data starts with
 	<literal>-</literal>.
      </para>
      <para>
 	For positional arguments, terminate the option list with a
 	single <option>--</option> marker after the last option, and
 	include the data at the right position.  The
 	<option>--</option> marker terminates option processing, and
 	the data will not be treated as an option even if it starts
 	with a dash.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Processes-Command_Line_Visibility">
      <title>Passing Secrets to Subprocesses</title>
      <para>
 	The command line (the name of the program and its argument) of
 	a running process is traditionally available to all local
 	users.  The called program can overwrite this information, but
 	only after it has run for a bit of time, during which the
 	information may have been read by other processes.  However,
 	on Linux, the process environment is restricted to the user
 	who runs the process.  Therefore, if you need a convenient way
 	to pass a password to a child process, use an environment
 	variable, and not a command line argument.  (See <xref
 	linkend="sect-Defensive_Coding-Tasks-Processes-environ"/>.)
      </para>
      <important>
 	<title>Portability notice</title>
 	<para>
 	  On some UNIX-like systems (notably Solaris), environment
 	  variables can be read by any system user, just like command
 	  lines.
 	</para>
      </important>
      <para>
 	If the environment-based approach cannot be used due to
 	portability concerns, the data can be passed on standard
 	input.  Some programs (notably <application>gpg</application>)
 	use special file descriptors whose numbers are specified on
 	the command line.  Temporary files are an option as well, but
 	they might give digital forensics access to sensitive data
 	(such as passphrases) because it is difficult to safely delete
 	them in all cases.
      </para>
    </section>
  </section>
  <section>
    <title>Handling Child Process Termination</title>
    <para>
      When child processes terminate, the parent process is signalled.
      A stub of the terminated processes (a
      <emphasis>zombie</emphasis>, shown as
      <literal>&lt;defunct&gt;</literal> by
      <application>ps</application>) is kept around until the status
      information is collected (<emphasis>reaped</emphasis>) by the
      parent process.  Over the years, several interfaces for this
      have been invented:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  The parent process calls <function>wait</function>,
 	  <function>waitpid</function>, <function>waitid</function>,
 	  <function>wait3</function> or <function>wait4</function>,
 	  without specifying a process ID.  This will deliver any
 	  matching process ID.  This approach is typically used from
 	  within event loops.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The parent process calls <function>waitpid</function>,
 	  <function>waitid</function>, or <function>wait4</function>,
 	  with a specific process ID.  Only data for the specific
 	  process ID is returned.  This is typically used in code
 	  which spawns a single subprocess in a synchronous manner.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  The parent process installs a handler for the
 	  <literal>SIGCHLD</literal> signal, using
 	  <function>sigaction</function>, and specifies to the
 	  <literal>SA_NOCLDWAIT</literal> flag.
 	  This approach could be used by event loops as well.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      None of these approaches can be used to wait for child process
      terminated in a completely thread-safe manner.  The parent
      process might execute an event loop in another thread, which
      could pick up the termination signal.  This means that libraries
      typically cannot make free use of child processes (for example,
      to run problematic code with reduced privileges in a separate
      address space).
    </para>
    <para>
      At the moment, the parent process should explicitly wait for
      termination of the child process using
      <function>waitpid</function> or <function>waitid</function>,
      and hope that the status is not collected by an event loop
      first.
    </para>
  </section>
  <section>
    <title><literal>SUID</literal>/<literal>SGID</literal>
    processes</title>
    <!-- ??? need to document real vs effective UID -->
    <para>
      Programs can be marked in the file system to indicate to the
      kernel that a trust transition should happen if the program is
      run.  The <literal>SUID</literal> file permission bit indicates
      that an executable should run with the effective user ID equal
      to the owner of the executable file.  Similarly, with the
      <literal>SGID</literal> bit, the effective group ID is set to
      the group of the executable file.
    </para>
    <para>
      Linux supports <emphasis>fscaps</emphasis>, which can grant
      additional capabilities to a process in a finer-grained manner.
      Additional mechanisms can be provided by loadable security
      modules.
    </para>
    <para>
      When such a trust transition has happened, the process runs in a
      potentially hostile environment.  Additional care is necessary
      not to rely on any untrusted information.  These concerns also
      apply to libraries which can be linked into such processes.
    </para>
    <section id="sect-Defensive_Coding-Tasks-secure_getenv">
      <title>Accessing Environment Variables</title>
      <para>
 	The following steps are required so that a program does not
 	accidentally pick up untrusted data from environment
 	variables.
      </para>
      <itemizedlist>
 	<listitem><para>
 	  Compile your C/C++ sources with <literal>-D_GNU_SOURCE</literal>.
 	  The Autoconf macro <literal>AC_GNU_SOURCE</literal> ensures this.
 	</para></listitem>
 	<listitem><para>
 	  Check for the presence of the <function>secure_getenv</function>
 	  and <function>__secure_getenv</function> function.  The Autoconf
 	  directive <literal>AC_CHECK_FUNCS([__secure_getenv secure_getenv])</literal>
 	  performs these checks.
 	</para></listitem>
 	<listitem><para>
 	  Arrange for a proper definition of the
 	  <function>secure_getenv</function> function.  See <xref
 	  linkend="ex-Defensive_Coding-Tasks-secure_getenv"/>.
 	</para></listitem>
 	<listitem><para>
 	  Use <function>secure_getenv</function> instead of
 	  <function>getenv</function> to obtain the value of critical
 	  environment variables.  <function>secure_getenv</function>
 	  will pretend the variable has not bee set if the process
 	  environment is not trusted.
 	</para></listitem>
      </itemizedlist>
      <para>
 	Critical environment variables are debugging flags,
 	configuration file locations, plug-in and log file locations,
 	and anything else that might be used to bypass security
 	restrictions or cause a privileged process to behave in an
 	unexpected way.
      </para>
      <para>
 	Either the <function>secure_getenv</function> function or the
 	<function>__secure_getenv</function> is available from GNU libc.
      </para>
      <example id="ex-Defensive_Coding-Tasks-secure_getenv">
 	<title>Obtaining a definition for <function>secure_getenv</function></title>
 	<programlisting language="C">
 <![CDATA[
 #include <stdlib.h>
 #ifndef HAVE_SECURE_GETENV
 #  ifdef HAVE__SECURE_GETENV
 #    define secure_getenv __secure_getenv
 #  else
 #    error neither secure_getenv nor __secure_getenv are available
 #  endif
 #endif
 ]]>
 	</programlisting>
      </example>
    </section>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Processes-Daemons">
    <title>Daemons</title>
    <para>
      Background processes providing system services
      (<emphasis>daemons</emphasis>) need to decouple themselves from
      the controlling terminal and the parent process environment:
    </para>
    <itemizedlist>
      <listitem>
 	<para>Fork.</para>
      </listitem>
      <listitem>
 	<para>
 	  In the child process, call <function>setsid</function>.  The
 	  parent process can simply exit (using
 	  <function>_exit</function>, to avoid running clean-up
 	  actions twice).
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  In the child process, fork again.  Processing continues in
 	  the child process.  Again, the parent process should just
 	  exit.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Replace the descriptors 0, 1, 2 with a descriptor for
 	  <filename>/dev/null</filename>.  Logging should be
 	  redirected to <application>syslog</application>.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      Older instructions for creating daemon processes recommended a
      call to <literal>umask(0)</literal>.  This is risky because it
      often leads to world-writable files and directories, resulting
      in security vulnerabilities such as arbitrary process
      termination by untrusted local users, or log file truncation.
      If the <emphasis>umask</emphasis> needs setting, a restrictive
      value such as <literal>027</literal> or <literal>077</literal>
      is recommended.
    </para>
    <para>
      Other aspects of the process environment may have to changed as
      well (environment variables, signal handler disposition).
    </para>
    <para>
      It is increasingly common that server processes do not run as
      background processes, but as regular foreground process under a
      supervising master process (such as
      <application>systemd</application>).  Server processes should
      offer a command line option which disables forking and
      replacement of the standard output and standard error streams.
      Such an option is also useful for debugging.
    </para>
  </section>
  <section>
    <title>Semantics of Command-line Arguments</title>
    <!-- ??? This applies in two ways, safely calling an other process
         and support for being called safely.  Also need to address
         untrusted current directory on USB sticks.  -->
    <para>
      After process creation and option processing, it is up to the
      child process to interpret the arguments. Arguments can be
      file names, host names, or URLs, and many other things.  URLs
      can refer to the local network, some server on the Internet,
      or to the local file system.  Some applications even accept
      arbitrary code in arguments (for example,
      <application>python</application> with the
      <option>-c</option> option).
    </para>
    <para>
      Similar concerns apply to environment variables, the contents
      of the current directory and its subdirectories.
      <!-- ??? refer to section on temporary directories -->
    </para>
    <para>
      Consequently, careful analysis is required if it is safe to
      pass untrusted data to another program.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Processes-Fork-Parallel">
    <title><function>fork</function> as a Primitive for Parallelism</title>
    <para>
      A call to <function>fork</function> which is not immediately
      followed by a call to <function>execve</function> (perhaps after
      rearranging and closing file descriptors) is typically unsafe,
      especially from a library which does not control the state of
      the entire process.  Such use of <function>fork</function>
      should be replaced with proper child processes or threads.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Serialization.xml
+++ b/en-US/Tasks-Serialization.xml
@ -1,610 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-Serialization">
  <title>Serialization and Deserialization</title>
  <para>
    Protocol decoders and file format parsers are often the
    most-exposed part of an application because they are exposed with
    little or no user interaction and before any authentication and
    security checks are made.  They are also difficult to write
    robustly in languages which are not memory-safe.
  </para>
  <section id="sect-Defensive_Coding-Tasks-Serialization-Decoders">
    <title>Recommendations for Manually-written Decoders</title>
    <para>
      For C and C++, the advice in <xref
      linkend="sect-Defensive_Coding-C-Pointers"/> applies.  In
      addition, avoid non-character pointers directly into input
      buffers.  Pointer misalignment causes crashes on some
      architectures.
    </para>
    <para>
      When reading variable-sized objects, do not allocate large
      amounts of data solely based on the value of a size field.  If
      possible, grow the data structure as more data is read from the
      source, and stop when no data is available.  This helps to avoid
      denial-of-service attacks where little amounts of input data
      results in enormous memory allocations during decoding.
      Alternatively, you can impose reasonable bounds on memory
      allocations, but some protocols do not permit this.
    </para>
  </section>
  <section>
    <title>Protocol Design</title>
    <para>
      Binary formats with explicit length fields are more difficult to
      parse robustly than those where the length of dynamically-sized
      elements is derived from sentinel values.  A protocol which does
      not use length fields and can be written in printable ASCII
      characters simplifies testing and debugging.  However, binary
      protocols with length fields may be more efficient to parse.
    </para>
    <para>
      In new datagram-oriented protocols, unique numbers such as
      sequence numbers or identifiers for fragment reassembly (see
      <xref
      linkend="sect-Defensive_Coding-Tasks-Serialization-Fragmentation"/>)
      should be at least 64 bits large, and really should not be
      smaller than 32 bits in size.  Protocols should not permit
      fragments with overlapping contents.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Serialization-Fragmentation">
    <title>Fragmentation</title>
    <para>
      Some serialization formats use frames or protocol data units
      (PDUs) on lower levels which are smaller than the PDUs on higher
      levels.  With such an architecture, higher-level PDUs may have
      to be <emphasis>fragmented</emphasis> into smaller frames during
      serialization, and frames may need
      <emphasis>reassembly</emphasis> into large PDUs during
      deserialization.
    </para>
    <para>
      Serialization formats may use conceptually similar structures
      for completely different purposes, for example storing multiple
      layers and color channels in a single image file.
    </para>
    <para>
      When fragmenting PDUs, establish a reasonable lower bound for
      the size of individual fragments (as large as possible—limits as
      low as one or even zero can add substantial overhead).  Avoid
      fragmentation if at all possible, and try to obtain the maximum
      acceptable fragment length from a trusted data source.
    </para>
    <para>
      When implementing reassembly, consider the following aspects.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Avoid allocating significant amount of resources without
 	  proper authentication.  Allocate memory for the unfragmented
 	  PDU as more and more and fragments are encountered, and not
 	  based on the initially advertised unfragmented PDU size,
 	  unless there is a sufficiently low limit on the unfragmented
 	  PDU size, so that over-allocation cannot lead to performance
 	  problems.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Reassembly queues on top of datagram-oriented transports
 	  should be bounded, both in the combined size of the arrived
 	  partial PDUs waiting for reassembly, and the total number of
 	  partially reassembled fragments.  The latter limit helps to
 	  reduce the risk of accidental reassembly of unrelated
 	  fragments, as it can happen with small fragment IDs (see
 	  <xref linkend="sect-Defensive_Coding-Tasks-Serialization-Fragmentation-ID"/>).
 	  It also guards to some extent against deliberate injection of fragments,
 	  by guessing fragment IDs.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Carefully keep track of which bytes in the unfragmented PDU
 	  have been covered by fragments so far.  If message
 	  reordering is a concern, the most straightforward data
 	  structure for this is an array of bits, with one bit for
 	  every byte (or other atomic unit) in the unfragmented PDU.
 	  Complete reassembly can be determined by increasing a
 	  counter of set bits in the bit array as the bit array is
 	  updated, taking overlapping fragments into consideration.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Reject overlapping fragments (that is, multiple fragments
 	  which provide data at the same offset of the PDU being
 	  fragmented), unless the protocol explicitly requires
 	  accepting overlapping fragments.  The bit array used for
 	  tracking already arrived bytes can be used for this purpose.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Check for conflicting values of unfragmented PDU lengths (if
 	  this length information is part of every fragment) and
 	  reject fragments which are inconsistent.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Validate fragment lengths and offsets of individual
 	  fragments against the unfragmented PDU length (if they are
 	  present).  Check that the last byte in the fragment does not
 	  lie after the end of the unfragmented PDU.  Avoid integer
 	  overflows in these computations (see <xref
 	  linkend="sect-Defensive_Coding-C-Arithmetic"/>).
 	</para>
      </listitem>
    </itemizedlist>
    <section id="sect-Defensive_Coding-Tasks-Serialization-Fragmentation-ID">
      <title>Fragment IDs</title>
      <para>
 	If the underlying transport is datagram-oriented (so that PDUs
 	can be reordered, duplicated or be lost, like with UDP),
 	fragment reassembly needs to take into account endpoint
 	addresses of the communication channel, and there has to be
 	some sort of fragment ID which identifies the individual
 	fragments as part of a larger PDU.  In addition, the
 	fragmentation protocol will typically involve fragment offsets
 	and fragment lengths, as mentioned above.
      </para>
      <para>
 	If the transport may be subject to blind PDU injection (again,
 	like UDP), the fragment ID must be generated randomly.  If the
 	fragment ID is 64 bit or larger (strongly recommended), it can
 	be generated in a completely random fashion for most traffic
 	volumes.  If it is less than 64 bits large (so that accidental
 	collisions can happen if a lot of PDUs are transmitted), the
 	fragment ID should be incremented sequentially from a starting
 	value.  The starting value should be derived using a HMAC-like
 	construction from the endpoint addresses, using a long-lived
 	random key.  This construction ensures that despite the
 	limited range of the ID, accidental collisions are as unlikely
 	as possible.  (This will not work reliable with really short
 	fragment IDs, such as the 16 bit IDs used by the Internet
 	Protocol.)
      </para>
    </section>
  </section>
  <section>
    <title id="sect-Defensive_Coding-Tasks-Serialization-Library">Library
    Support for Deserialization</title>
    <para>
      For some languages, generic libraries are available which allow
      to serialize and deserialize user-defined objects.  The
      deserialization part comes in one of two flavors, depending on
      the library.  The first kind uses type information in the data
      stream to control which objects are instantiated.  The second
      kind uses type definitions supplied by the programmer.  The
      first one allows arbitrary object instantiation, the second one
      generally does not.
    </para>
    <para>
      The following serialization frameworks are in the first category,
      are known to be unsafe, and must not be used for untrusted data:
    </para>
    <itemizedlist>
      <listitem><para>
 	Python's <package>pickle</package> and <package>cPickle</package>
 	modules, and wrappers such as <package>shelve</package>
      </para></listitem>
      <listitem><para>
 	Perl's <package>Storable</package> package
      </para></listitem>
      <listitem><para>
 	Java serialization (<type>java.io.ObjectInputStream</type>),
 	even if encoded in other formats (as with
 	<type>java.beans.XMLDecoder</type>)
      </para></listitem>
      <listitem><para>
 	PHP serialization (<function>unserialize</function>)
      </para></listitem>
      <listitem><para>
 	Most implementations of YAML
      </para></listitem>
    </itemizedlist>
    <para>
      When using a type-directed deserialization format where the
      types of the deserialized objects are specified by the
      programmer, make sure that the objects which can be instantiated
      cannot perform any destructive actions in their destructors,
      even when the data members have been manipulated.
    </para>
    <para>
      In general, JSON decoders do not suffer from this problem.  But
      you must not use the <function>eval</function> function to parse
      JSON objects in Javascript; even with the regular expression
      filter from RFC 4627, there are still information leaks
      remaining.  JSON-based formats can still turn out risky if they
      serve as an encoding form for any if the serialization
      frameworks listed above.
    </para>
    <para>
      For serialization in C and C++ projects, the Protocol Buffers serialization
      (<package>protobuf</package>) provides type safe automated serialization
      by relying on code generation. It is positioned as similar, but simpler and
      more efficient to XML serialization.
    </para>
  </section>
  <section id="sect-Defensive_Coding-Tasks-Serialization-XML">
    <title>XML Serialization</title>
    <para>
    </para>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-External">
      <title>External References</title>
      <para>
 	XML documents can contain external references.  They can occur
 	in various places.
      </para>
      <itemizedlist>
 	<listitem>
 	  <para>
 	    In the DTD declaration in the header of an XML document:
 	  </para>
 	  <informalexample>
 	    <programlisting language="XML">
 <![CDATA[<!DOCTYPE html PUBLIC
  "-//W3C//DTD XHTML 1.0 Transitional//EN"
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]]>
 	    </programlisting>
 	  </informalexample>
 	</listitem>
 	<listitem>
 	  <para>
 	    In a namespace declaration:
 	  </para>
 	  <informalexample>
 	    <programlisting language="XML">
 <![CDATA[<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">]]>
 	    </programlisting>
 	  </informalexample>
 	</listitem>
 	<listitem>
 	  <para>
 	    In an entity defintion:
 	  </para>
 	  <informalexample>
 	    <programlisting language="XML">
 <![CDATA[<!ENTITY sys SYSTEM "http://www.example.com/ent.xml">
 <!ENTITY pub PUBLIC "-//Example//Public Entity//EN"
  "http://www.example.com/pub-ent.xml">]]>
 	    </programlisting>
 	  </informalexample>
 	</listitem>
 	<listitem>
 	  <para>
 	    In a notation:
 	  </para>
 	  <informalexample>
 	    <programlisting language="XML">
 <![CDATA[<!NOTATION not SYSTEM "../not.xml">]]>
 	    </programlisting>
 	  </informalexample>
 	</listitem>
      </itemizedlist>
      <para>
 	Originally, these external references were intended as unique
 	identifiers, but by many XML implementations, they are used
 	for locating the data for the referenced element.  This causes
 	unwanted network traffic, and may disclose file system
 	contents or otherwise unreachable network resources, so this
 	functionality should be disabled.
      </para>
      <para>
 	Depending on the XML library, external referenced might be
 	processed not just when parsing XML, but also when generating
 	it.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-Entities">
      <title>Entity Expansion</title>
      <para>
 	When external DTD processing is disabled, an internal DTD
 	subset can still contain entity definitions.  Entity
 	declarations can reference other entities.  Some XML libraries
 	expand entities automatically, and this processing cannot be
 	switched off in some places (such as attribute values or
 	content models).  Without limits on the entity nesting level,
 	this expansion results in data which can grow exponentially in
 	length with size of the input.  (If there is a limit on the
 	nesting level, the growth is still polynomial, unless further
 	limits are imposed.)
      </para>
      <para>
 	Consequently, the processing internal DTD subsets should be
 	disabled if possible, and only trusted DTDs should be
 	processed.  If a particular XML application does not permit
 	such restrictions, then application-specific limits are called
 	for.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-XInclude">
      <title>XInclude Processing</title>
      <para>
 	XInclude processing can reference file and network resources
 	and include them into the document, much like external entity
 	references.  When parsing untrusted XML documents, XInclude
 	processing should be turned off.
      </para>
      <para>
 	XInclude processing is also fairly complex and may pull in
 	support for the XPointer and XPath specifications,
 	considerably increasing the amount of code required for XML
 	processing.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-Validation">
      <title>Algorithmic Complexity of XML Validation</title>
      <para>
 	DTD-based XML validation uses regular expressions for content
 	models.  The XML specification requires that content models
 	are deterministic, which means that efficient validation is
 	possible.  However, some implementations do not enforce
 	determinism, and require exponential (or just polynomial)
 	amount of space or time for validating some DTD/document
 	combinations.
      </para>
      <para>
 	XML schemas and RELAX NG (via the <literal>xsd:</literal>
 	prefix) directly support textual regular expressions which are
 	not required to be deterministic.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-Expat">
      <title>Using Expat for XML parsing</title>
      <para>
 	By default, Expat does not try to resolve external IDs, so no
 	steps are required to block them.  However, internal entity
 	declarations are processed.  Installing a callback which stops
 	parsing as soon as such entities are encountered disables
 	them, see <xref
 	linkend="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-EntityDeclHandler"/>.
 	Expat does not perform any validation, so there are no
 	problems related to that.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-EntityDeclHandler">
 	<title>Disabling XML entity processing with Expat</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-Expat-EntityDeclHandler.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	This handler must be installed when the
 	<literal>XML_Parser</literal> object is created (<xref
 	linkend="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-Create"/>).
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-Expat-Create">
 	<title>Creating an Expat XML parser</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-Expat-Create.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	It is also possible to reject internal DTD subsets altogether,
 	using a suitable
 	<literal>XML_StartDoctypeDeclHandler</literal> handler
 	installed with <function>XML_SetDoctypeDeclHandler</function>.
      </para>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-Qt">
      <title>Using Qt for XML Parsing</title>
      <para>
 	The XML component of Qt, QtXml, does not resolve external IDs
 	by default, so it is not required to prevent such resolution.
 	Internal entities are processed, though.  To change that, a
 	custom <literal>QXmlDeclHandler</literal> and
 	<literal>QXmlSimpleReader</literal> subclasses are needed.  It
 	is not possible to use the
 	<function>QDomDocument::setContent(const QByteArray
 	&amp;)</function> convenience methods.
      </para>
      <para>
 	<xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityHandler"/>
 	shows an entity handler which always returns errors, causing
 	parsing to stop when encountering entity declarations.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityHandler">
 	<title>A QtXml entity handler which blocks entity processing</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-Qt-NoEntityHandler.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	This handler is used in the custom
 	<literal>QXmlReader</literal> subclass in <xref
 	linkend="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityReader"/>.
 	Some parts of QtXml will call the
 	<function>setDeclHandler(QXmlDeclHandler *)</function> method.
 	Consequently, we prevent overriding our custom handler by
 	providing a definition of this method which does nothing.  In
 	the constructor, we activate namespace processing; this part
 	may need adjusting.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-NoEntityReader">
 	<title>A QtXml XML reader which blocks entity processing</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-Qt-NoEntityReader.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	Our <literal>NoEntityReader</literal> class can be used with
 	one of the overloaded
 	<function>QDomDocument::setContent</function> methods.
 	<xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-QDomDocument"/>
 	shows how the <literal>buffer</literal> object (of type
 	<literal>QByteArray</literal>) is wrapped as a
 	<literal>QXmlInputSource</literal>.  After calling the
 	<function>setContent</function> method, you should check the
 	return value and report any error.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-Qt-QDomDocument">
 	<title>Parsing an XML document with QDomDocument, without entity expansion</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-Qt-QDomDocument.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
    </section>
    <section id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse">
      <title>Using OpenJDK for XML Parsing and Validation</title>
      <para>
 	OpenJDK contains facilities for DOM-based, SAX-based, and
 	StAX-based document parsing.  Documents can be validated
 	against DTDs or XML schemas.
      </para>
      <para>
 	The approach taken to deal with entity expansion differs from
 	the general recommendation in <xref
 	linkend="sect-Defensive_Coding-Tasks-Serialization-XML-Entities"/>.
 	We enable the the feature flag
 	<literal>javax.xml.XMLConstants.FEATURE_SECURE_PROCESSING</literal>,
 	which enforces heuristic restrictions on the number of entity
 	expansions.  Note that this flag alone does not prevent
 	resolution of external references (system IDs or public IDs),
 	so it is slightly misnamed.
      </para>
      <para>
 	In the following sections, we use helper classes to prevent
 	external ID resolution.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoEntityResolver">
 	<title>Helper class to prevent DTD external entity resolution in OpenJDK</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-OpenJDK-NoEntityResolver.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoResourceResolver">
 	<title>Helper class to prevent schema resolution in
 	OpenJDK</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-OpenJDK-NoResourceResolver.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <para>
 	<xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-Imports"/>
 	shows the imports used by the examples.
      </para>
      <example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-Imports">
 	<title>Java imports for OpenJDK XML parsing</title>
 	<xi:include href="snippets/Tasks-Serialization-XML-OpenJDK-Imports.xml"
 		    xmlns:xi="http://www.w3.org/2001/XInclude" />
      </example>
      <section id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM">
 	<title>DOM-based XML parsing and DTD validation in OpenJDK</title>
 	<para>
 	  This approach produces a
 	  <literal>org.w3c.dom.Document</literal> object from an input
 	  stream.  <xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM"/>
 	  use the data from the <literal>java.io.InputStream</literal>
 	  instance in the <literal>inputStream</literal> variable.
 	</para>
 	<example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM">
 	  <title>DOM-based XML parsing in OpenJDK</title>
 	  <xi:include href="snippets/Tasks-Serialization-XML-OpenJDK_Parse-DOM.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
 	<para>
 	  External entity references are prohibited using the
 	  <literal>NoEntityResolver</literal> class in
 	  <xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoEntityResolver"/>.
 	  Because external DTD references are prohibited, DTD validation
 	  (if enabled) will only happen against the internal DTD subset
 	  embedded in the XML document.
 	</para>
 	<para>
 	  To validate the document against an external DTD, use a
 	  <literal>javax.xml.transform.Transformer</literal> class to
 	  add the DTD reference to the document, and an entity
 	  resolver which whitelists this external reference.
 	</para>
      </section>
      <section id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-SAX">
 	<title>XML Schema Validation in OpenJDK</title>
 	<para>
 	  <xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_SAX"/>
 	  shows how to validate a document against an XML Schema,
 	  using a SAX-based approach.  The XML data is read from an
 	  <literal>java.io.InputStream</literal> in the
 	  <literal>inputStream</literal> variable.
 	</para>
 	<example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_SAX">
 	  <title>SAX-based validation against an XML schema in
 	  OpenJDK</title>
 	  <xi:include href="snippets/Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_SAX.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
 	<para>
 	  The <literal>NoResourceResolver</literal> class is defined
 	  in <xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK-NoResourceResolver"/>.
 	</para>
 	<para>
 	  If you need to validate a document against an XML schema,
 	  use the code in <xref
 	  linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-DOM"/>
 	  to create the document, but do not enable validation at this
 	  point.  Then use
 	  <xref linkend="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_DOM"/>
 	  to perform the schema-based validation on the
 	  <literal>org.w3c.dom.Document</literal> instance
 	  <literal>document</literal>.
 	</para>
 	<example id="ex-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_DOM">
 	  <title>Validation of a DOM document against an XML schema in
 	  OpenJDK</title>
 	  <xi:include href="snippets/Tasks-Serialization-XML-OpenJDK_Parse-XMLSchema_DOM.xml"
 		      xmlns:xi="http://www.w3.org/2001/XInclude" />
 	</example>
      </section>
      <section id="sect-Defensive_Coding-Tasks-Serialization-XML-OpenJDK_Parse-Other">
 	<title>Other XML Parsers in OpenJDK</title>
 	<para>
 	  OpenJDK contains additional XML parsing and processing
 	  facilities.  Some of them are insecure.
 	</para>
 	<para>
 	  The class <type>java.beans.XMLDecoder</type> acts as a
 	  bridge between the Java object serialization format and XML.
 	  It is close to impossible to securely deserialize Java
 	  objects in this format from untrusted inputs, so its use is
 	  not recommended, as with the Java object serialization
 	  format itself.  See <xref
 	  linkend="sect-Defensive_Coding-Tasks-Serialization-Library"/>.
 	</para>
      </section>
    </section>
  </section>
  <section>
    <title>Protocol Encoders</title>
    <para>
      For protocol encoders, you should write bytes to a buffer which
      grows as needed, using an exponential sizing policy.  Explicit
      lengths can be patched in later, once they are known.
      Allocating the required number of bytes upfront typically
      requires separate code to compute the final size, which must be
      kept in sync with the actual encoding step, or vulnerabilities
      may result.  In multi-threaded code, parts of the object being
      deserialized might change, so that the computed size is out of
      date.
    </para>
    <para>
      You should avoid copying data directly from a received packet
      during encoding, disregarding the format.  Propagating malformed
      data could enable attacks on other recipients of that data.
    </para>
    <para>
      When using C or C++ and copying whole data structures directly
      into the output, make sure that you do not leak information in
      padding bytes between fields or at the end of the
      <literal>struct</literal>.
    </para>
  </section>
 </chapter>
--- a/en-US/Tasks-Temporary_Files.xml
+++ b/en-US/Tasks-Temporary_Files.xml
@ -1,257 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Tasks-Temporary_Files">
  <title>Temporary Files</title>
  <para>
    In this chapter, we describe how to create temporary files and
    directories, how to remove them, and how to work with programs
    which do not create files in ways that are safe with a shared
    directory for temporary files.  General file system manipulation
    is treated in a separate chapter, <xref
    linkend="chap-Defensive_Coding-Tasks-File_System"/>.
  </para>
  <para>
    Secure creation of temporary files has four different aspects.
  </para>
  <itemizedlist>
    <listitem>
      <para>
 	The location of the directory for temporary files must be
 	obtained in a secure manner (that is, untrusted environment
 	variables must be ignored, see <xref
 	linkend="sect-Defensive_Coding-Tasks-secure_getenv"/>).
      </para>
    </listitem>
    <listitem>
      <para>
 	A new file must be created.  Reusing an existing file must be
 	avoided (the <filename class="directory">/tmp</filename> race
 	condition).  This is tricky because traditionally, system-wide
 	temporary directories shared by all users are used.
      </para>
    </listitem>
    <listitem>
      <para>
 	The file must be created in a way that makes it impossible for
 	other users to open it.
      </para>
    </listitem>
    <listitem>
      <para>
 	The descriptor for the temporary file should not leak to
 	subprocesses.
      </para>
    </listitem>
  </itemizedlist>
  <para>
    All functions mentioned below will take care of these aspects.
  </para>
  <para>
    Traditionally, temporary files are often used to reduce memory
    usage of programs.  More and more systems use RAM-based file
    systems such as <literal>tmpfs</literal> for storing temporary
    files, to increase performance and decrease wear on Flash storage.
    As a result, spooling data to temporary files does not result in
    any memory savings, and the related complexity can be avoided if
    the data is kept in process memory.
  </para>
  <section id="chap-Defensive_Coding-Tasks-Temporary_Files-Location">
    <title>Obtaining the Location of Temporary Directory</title>
    <para>
      Some functions below need the location of a directory which
      stores temporary files.  For C/C++ programs, use the following
      steps to obtain that directory:
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Use <function>secure_getenv</function> to obtain the value
 	  of the <literal>TMPDIR</literal> environment variable.  If
 	  it is set, convert the path to a fully-resolved absolute
 	  path, using <literal>realpath(path, NULL)</literal>.  Check
 	  if the new path refers to a directory and is writeable.  In
 	  this case, use it as the temporary directory.
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Fall back to <filename class="directory">/tmp</filename>.
 	</para>
      </listitem>
    </itemizedlist>
    <para>
      In Python, you can use the <varname>tempfile.tempdir</varname>
      variable.
    </para>
    <para>
      Java does not support SUID/SGID programs, so you can use the
      <function>java.lang.System.getenv(String)</function> method to
      obtain the value of the <literal>TMPDIR</literal> environment
      variable, and follow the two steps described above.  (Java's
      default directory selection does not honor
      <literal>TMPDIR</literal>.)
    </para>
  </section>
  <section>
    <title>Named Temporary Files</title>
    <para>
      The <function>mkostemp</function> function creates a named
      temporary file.  You should specify the
      <literal>O_CLOEXEC</literal> flag to avoid file descriptor leaks
      to subprocesses. (Applications which do not use multiple threads
      can also use <function>mkstemp</function>, but libraries should
      use <function>mkostemp</function>.)  For determining the
      directory part of the file name pattern, see <xref
      linkend="chap-Defensive_Coding-Tasks-Temporary_Files-Location"/>.
    </para>
    <para>
      The file is not removed automatically.  It is not safe to rename
      or delete the file before processing, or transform the name in
      any way (for example, by adding a file extension).  If you need
      multiple temporary files, call <function>mkostemp</function>
      multiple times.  Do not create additional file names derived
      from the name provided by a previous
      <function>mkostemp</function> call.  However, it is safe to close
      the descriptor returned by <function>mkostemp</function> and
      reopen the file using the generated name.
    </para>
    <para>
      The Python class <literal>tempfile.NamedTemporaryFile</literal>
      provides similar functionality, except that the file is deleted
      automatically by default.  Note that you may have to use the
      <literal>file</literal> attribute to obtain the actual file
      object because some programming interfaces cannot deal with
      file-like objects. The C function <function>mkostemp</function>
      is also available as <function>tempfile.mkstemp</function>.
    </para>
    <para>
      In Java, you can use the
      <function>java.io.File.createTempFile(String, String,
      File)</function> function, using the temporary file location
      determined according to <xref
      linkend="chap-Defensive_Coding-Tasks-Temporary_Files-Location"/>.
      Do not use <function>java.io.File.deleteOnExit()</function> to
      delete temporary files, and do not register a shutdown hook for
      each temporary file you create.  In both cases, the deletion
      hint cannot be removed from the system if you delete the
      temporary file prior to termination of the VM, causing a memory
      leak.
    </para>
  </section>
  <section>
    <title>Temporary Files without Names</title>
    <para>
      The <function>tmpfile</function> function creates a temporary
      file and immediately deletes it, while keeping the file open.
      As a result, the file lacks a name and its space is deallocated
      as soon as the file descriptor is closed (including the implicit
      close when the process terminates).  This avoids cluttering the
      temporary directory with orphaned files.
    </para>
    <para>
      Alternatively, if the maximum size of the temporary file is
      known beforehand, the <function>fmemopen</function> function can
      be used to create a <literal>FILE *</literal> object which is
      backed by memory.
    </para>
    <para>
      In Python, unnamed temporary files are provided by the
      <literal>tempfile.TemporaryFile</literal> class, and the
      <literal>tempfile.SpooledTemporaryFile</literal> class provides
      a way to avoid creation of small temporary files.
    </para>
    <para>
      Java does not support unnamed temporary files.
    </para>
  </section>
  <section id="chap-Defensive_Coding-Tasks-Temporary_Directory">
    <title>Temporary Directories</title>
    <para>
      The <function>mkdtemp</function> function can be used to create
      a temporary directory.  (For determining the directory part of
      the file name pattern, see <xref
      linkend="chap-Defensive_Coding-Tasks-Temporary_Files-Location"/>.)
      The directory is not automatically removed.  In Python, this
      function is available as <function>tempfile.mkdtemp</function>.
      In Java 7, temporary directories can be created using the
      <function>java.nio.file.Files.createTempDirectory(Path, String,
      FileAttribute...)</function> function.
    </para>
    <para>
      When creating files in the temporary directory, use
      automatically generated names, e.g., derived from a sequential
      counter.  Files with externally provided names could be picked
      up in unexpected contexts, and crafted names could actually
      point outside of the tempoary directory (due to
      <emphasis>directory traversal</emphasis>).
    </para>
    <para>
      Removing a directory tree in a completely safe manner is
      complicated.  Unless there are overriding performance concerns,
      the <application>rm</application> program should be used, with
      the <option>-rf</option> and <option>--</option> options.
    </para>
  </section>
  <section>
    <title>Compensating for Unsafe File Creation</title>
    <para>
      There are two ways to make a function or program which excepts a
      file name safe for use with temporary files. See
      <xref linkend="sect-Defensive_Coding-Tasks-Processes-Creation"/>,
      for details on subprocess creation.
    </para>
    <itemizedlist>
      <listitem>
 	<para>
 	  Create a temporary directory and place the file there.  If
 	  possible, run the program in a subprocess which uses the
 	  temporary directory as its current directory, with a
 	  restricted environment.
 	  Use generated names for all files in that temporary
 	  directory.  (See <xref
 	  linkend="chap-Defensive_Coding-Tasks-Temporary_Directory"/>.)
 	</para>
      </listitem>
      <listitem>
 	<para>
 	  Create the temporary file and pass the generated file name
 	  to the function or program.  This only works if the function
 	  or program can cope with a zero-length existing file.  It is
 	  safe only under additional assumptions:
 	</para>
 	<itemizedlist>
 	  <listitem>
 	    <para>
 	      The function or program must not create additional files
 	      whose name is derived from the specified file name or
 	      are otherwise predictable.
 	    </para>
 	  </listitem>
 	  <listitem>
 	    <para>
 	      The function or program must not delete the file before
 	      processing it.
 	    </para>
 	  </listitem>
 	  <listitem>
 	    <para>
 	      It must not access any existing files in the same
 	      directory.
 	    </para>
 	  </listitem>
 	</itemizedlist>
 	<para>
 	  It is often difficult to check whether these additional
 	  assumptions are matched, therefore this approach is not
 	  recommended.
 	</para>
      </listitem>
    </itemizedlist>
  </section>
 </chapter>
--- a/en-US/Vala.xml
+++ b/en-US/Vala.xml
@ -1,53 +0,0 @@
 <?xml version='1.0' encoding='utf-8' ?>
 <!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
 ]>
 <chapter id="chap-Defensive_Coding-Vala">
 <title>The Vala Programming Language</title>
 <para>
  Vala is a programming language mainly targeted at GNOME developers.
 </para>
 <para>
  Its syntax is inspired by C# (and thus, indirectly, by Java).  But
  unlike C# and Java, Vala does not attempt to provide memory safety:
  Vala is compiled to C, and the C code is compiled with GCC using
  typical compiler flags.  Basic operations like integer arithmetic
  are directly mapped to C constructs.  As a results, the
  recommendations in <xref linkend="chap-Defensive_Coding-C"/> apply.
 </para>
 <para>
  In particular, the following Vala language constructs can result in
  undefined behavior at run time:
 </para>
 <itemizedlist>
  <listitem>
    <para>
      Integer arithmetic, as described in <xref
      linkend="sect-Defensive_Coding-C-Arithmetic"/>.
    </para>
  </listitem>
  <listitem>
    <para>
      Pointer arithmetic, string subscripting and the
      <literal>substring</literal> method on strings (the
      <literal>string</literal> class in the
      <literal>glib-2.0</literal> package) are not range-checked.  It
      is the responsibility of the calling code to ensure that the
      arguments being passed are valid.  This applies even to cases
      (like <literal>substring</literal>) where the implementation
      would have range information to check the validity of indexes.
      See <xref linkend="sect-Defensive_Coding-C-Pointers"/>.
    </para>
  </listitem>
  <listitem>
    <para>
      Similarly, Vala only performs garbage collection (through
      reference counting) for <literal>GObject</literal> values.  For
      plain C pointers (such as strings), the programmer has to ensure
      that storage is deallocated once it is no longer needed (to
      avoid memory leaks), and that storage is not being deallocated
      while it is still being used (see <xref
      linkend="sect-Defensive_Coding-C-Use-After-Free"/>).
    </para>
  </listitem>
 </itemizedlist>
 </chapter>
--- a/en-US/Web_Applications.xml.txt
+++ b/en-US/Web_Applications.xml.txt
--- a/en-US/images/icon.svg
+++ b/en-US/images/icon.svg
--- a/en-US/schemas.xml
+++ b/en-US/schemas.xml
@ -1,4 +0,0 @@
 <?xml version="1.0"?>
 <locatingRules xmlns="http://thaiopensource.com/ns/locating-rules/1.0">
  <include rules="../schemas.xml"/>
 </locatingRules>
--- a/publican.cfg
+++ b/publican.cfg
@ -1,10 +0,0 @@
 xml_lang: en-US
 brand: fedora
 chunk_section_depth: 3
 #product: Defensive_Coding
 #mainfile: Defensive_Coding
 #docname: Defensive_Coding
 web_version_label: UNUSED
 version: 1
 #git_branch: eng-docs-rhel-6
 doc_url: https://gitlab.com/redhat-sectech/defensive-coding-guide
--- a/schemas.xml
+++ b/schemas.xml
@ -1,6 +0,0 @@
 <?xml version="1.0"?>
 <locatingRules xmlns="http://thaiopensource.com/ns/locating-rules/1.0">
  <documentElement localName="book" uri="docbook-schema/docbook.rnc"/>
  <documentElement localName="chapter" uri="docbook-schema/docbook.rnc"/>
  <documentElement localName="section" uri="docbook-schema/docbook.rnc"/>
 </locatingRules>
		`@ -1,2 +0,0 @@`
			`<!ENTITY YEAR "2012-2017">`
			`<!ENTITY HOLDER "Red Hat, Inc">`