
0c@_=                 @   s  d  d l  m Z m Z m Z d  d l m Z d  d l Z d  d l m Z m	 Z	 d d l
 m Z m Z m Z d d l
 m Z m Z m Z d d l m Z m Z d  d	 l m Z d
 j e  d Z e j d e d  Z e j d e d  Z i  Z e d  d k Z x e e j    D] \ Z  Z! e o9e e!  d k pRe oRe e!  d k rXqe! d k re e!  d k re j" e!  Z! n e# e!  Z! e! e k pe  j$   re  e e! <qWd d   Z% e d e%  d d d d  Z& Gd d   d e'  Z( Gd d   d e)  Z* d S)    )absolute_importdivisionunicode_literals)	text_typeN)register_errorxmlcharrefreplace_errors   )voidElementsbooleanAttributesspaceCharacters)rcdataElementsentitiesxmlEntities)treewalkers_utils)escape z"'=<>`[]u_    	
 /`  ᠎᠏               　]u   􏿿   &c       
      C   s  t  |  t t f  rzg  } g  } d } x t |  j |  j |  j   D] \ } } | rb d } qG | |  j } t j |  j | t	 |  j | d g    r t j
 |  j | | d   } d } n t |  } | j |  qG Wx} | D]u } t j |  }	 |	 r?| j d  | j |	  |	 j d  s`| j d  q | j d t |  d d    q Wd j |  |  j f St |   Sd  S)NFr   Tr   ;z&#x%s;r   )
isinstanceUnicodeEncodeErrorUnicodeTranslateError	enumerateobjectstartendr   isSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr   )
excres
codepointsskipicindex	codepointcpe r3   >/tmp/pip-build-jynh7p1z/pip/pip/_vendor/html5lib/serializer.pyhtmlentityreplace_errors*   s0    ,/ 	%r5   htmlentityreplaceetreec             K   s1   t  j |  } t |   } | j | |   |  S)a  Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string

    Example:

    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'

    )r   getTreeWalkerHTMLSerializerrender)inputtreeencodingserializer_optswalkersr3   r3   r4   	serializeK   s    rA   c               @   s   e  Z d  Z d Z d Z d Z d Z d Z d Z d Z	 d Z
 d Z d Z d Z d Z d Z d Z d! Z d d   Z d d   Z d d   Z d d d  Z d d d  Z d d d   Z d S)"r9   legacy"TFquote_attr_values
quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrsescape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec          	   K   s   t  |  t  |  j  } t |  d k rG t d t t |     d | k r\ d |  _ x6 |  j D]+ } t |  | | j | t	 |  |    qf Wg  |  _
 d |  _ d S)aB
  Initialize HTMLSerializer

        :arg inject_meta_charset: Whether or not to inject the meta charset.

            Defaults to ``True``.

        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).

            Defaults to ``"legacy"``.

        :arg quote_char: Use given quote character for attribute quoting.

            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.

        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.

            Defaults to ``False``.

        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.

            Defaults to ``False``.

        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.

            Defaults to ``True``.

        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)

            Defaults to ``False``.

        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::

              <input disabled="disabled">

            becomes::

              <input disabled>

            Defaults to ``True``.

        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.

            Defaults to ``False``.

        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.

            Defaults to ``True``.

        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.

            Defaults to ``False``.

        :arg omit_optional_tags: Omit start/end tags that are optional.

            Defaults to ``True``.

        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.

            Defaults to ``False``.

        r   z2__init__() got an unexpected keyword argument '%s'rE   FN)	frozensetoptionslen	TypeErrornextiterrF   setattrr%   getattrerrorsstrict)selfkwargsZunexpected_argsattrr3   r3   r4   __init__   s    O	)	zHTMLSerializer.__init__c             C   s9   t  | t  s t  |  j r1 | j |  j d  S| Sd  S)Nr6   )r   r   AssertionErrorr=   encode)r\   stringr3   r3   r4   ra      s    	zHTMLSerializer.encodec             C   s9   t  | t  s t  |  j r1 | j |  j d  S| Sd  S)Nr[   )r   r   r`   r=   ra   )r\   rb   r3   r3   r4   encodeStrict   s    	zHTMLSerializer.encodeStrictNc             c   s8  | |  _  d } g  |  _ | rF |  j rF d d l m } | | |  } |  j rk d d l m } | |  } |  j r d d l m } | |  } |  j	 r d d l
 m } | |  } |  j r d d l m } | |  } xW| D]O} | d } | d k rd | d } | d r*| d	 | d 7} n | d
 r>| d 7} | d
 r| d
 j d  d k r| d
 j d  d k r|  j d  d } n d } | d | | d
 | f 7} | d 7} |  j |  Vq | d5 k rF| d k s| r+| r| d j d  d k r|  j d  |  j | d  Vq0|  j t | d   Vq | d6 k r| d }	 |  j d |	  V|	 t k r|  j rd } n | r|  j d  x| d j   D]\ \ }
 } } | } | } |  j d  V|  j |  V|  j s+| t j |	 t    k r| t j d t    k r|  j d  V|  j d k sZt |  d k rcd } nZ |  j d  k rt j |  d  k	 } n3 |  j d! k rt j |  d  k	 } n t d"   | j d# d$  } |  j  r| j d% d&  } | r|  j! } |  j" rAd | k r#d | k r#d } n d | k rAd | k rAd } | d k rb| j d d'  } n | j d d(  } |  j |  V|  j |  V|  j |  Vq|  j |  VqW|	 t# k r|  j$ r|  j% r|  j d)  Vn |  j d*  V|  j d  Vq | d+ k rT| d }	 |	 t k r,d } n | r?|  j d  |  j d, |	  Vq | d- k r| d } | j d.  d k r|  j d/  |  j d0 | d  Vq | d1 k r| d }	 |	 d2 } | t& k r|  j d3 |	  |  j' r| t( k rt& | } n
 d4 |	 } |  j |  Vq |  j | d  q Wd  S)7NFr   )FiltertypeDoctypez<!DOCTYPE %snamepublicIdz PUBLIC "%s"systemIdz SYSTEMrC   r   'zBSystem identifier contains both single and double quote charactersz %s%s%s>
CharactersSpaceCharactersdataz</zUnexpected </ in CDATAStartTagEmptyTagz<%sTz+Unexpected child element of a CDATA element r   =alwaysspecrB   z?quote_attr_values must be one of: 'always', 'spec', or 'legacy'r   z&amp;<z&lt;z&#39;z&quot;z //EndTagz</%s>Commentz--zComment contains --z	<!--%s-->Entityr   zEntity %s not recognizedz&%s;)z
CharacterszSpaceCharacters)zStartTagzEmptyTag))r=   rZ   rO   filters.inject_meta_charsetrd   rN   filters.alphabeticalattributesrP   filters.whitespacerQ   filters.sanitizerrG   filters.optionaltagsfindserializeErrorrc   ra   r   r   rL   itemsrH   r
   r%   tuplerD   rT   _quoteAttributeSpecsearch_quoteAttributeLegacy
ValueErrorreplacerK   rE   rF   r	   rI   rJ   r   rM   r   )r\   
treewalkerr=   in_cdatard   tokenre   doctyperE   rg   _	attr_name
attr_valuekv
quote_attrrn   keyr3   r3   r4   rA      s    						




	

	#
!						
	



zHTMLSerializer.serializec             C   sE   | r% d j  t |  j | |    Sd j  t |  j |    Sd S)an  Serializes the stream from the treewalker into a string

        :arg treewalker: the treewalker to serialize

        :arg encoding: the string encoding to use

        :returns: the serialized tree

        Example:

        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'

            r   N)r(   listrA   )r\   r   r=   r3   r3   r4   r:   w  s    zHTMLSerializer.renderzXXX ERROR MESSAGE NEEDEDc             C   s#   |  j  j |  |  j r t  d  S)N)rZ   r#   r[   SerializeError)r\   rn   r3   r3   r4   r     s    	zHTMLSerializer.serializeError)zquote_attr_valuesz
quote_charzuse_best_quote_charzomit_optional_tagszminimize_boolean_attributeszuse_trailing_soliduszspace_before_trailing_soliduszescape_lt_in_attrszescape_rcdatazresolve_entitieszalphabetical_attributeszinject_meta_charsetzstrip_whitespacezsanitize)__name__
__module____qualname__rD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rS   r_   ra   rc   rA   r:   r   r3   r3   r3   r4   r9   h   s4        Yr9   c               @   s   e  Z d  Z d Z d S)r   zError in serialized treeN)r   r   r   __doc__r3   r3   r3   r4   r     s   r   )+
__future__r   r   r   Zpip._vendor.sixr   recodecsr   r   	constantsr	   r
   r   r   r   r   r   r   r   xml.sax.saxutilsr   r(   Z_quoteAttributeSpecCharscompiler   r   r$   rT   Z_is_ucs4r   r   r   r   r!   r"   islowerr5   rA   r   r9   	Exceptionr   r3   r3   r3   r4   <module>   s:   
	 0