U
    Sh5                     @  s^  d Z ddlmZ ddlZddlZddlmZmZ ddlm	Z	 ddl
mZmZmZmZ G dd deZG d	d
 d
ejZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd  d eZG d!d" d"ZG d#d$ d$e	Zd,d'd'd(dd)d*d+Z dS )-a  A simple but complete HTML to Abstract Syntax Tree (AST) parser.

The AST can also reproduce the HTML text.

Example::

    >> text = '<div class="note"><p>text</p></div>'
    >> ast = tokenize_html(text)
    >> list(ast.walk(include_self=True))
    [Root(''), Tag('div', {'class': 'note'}), Tag('p'), Data('text')]
    >> str(ast)
    '<div class="note"><p>text</p></div>'
    >> str(ast[0][0])
    '<p>text</p>'

Note: optional tags are not accounted for
(see https://html.spec.whatwg.org/multipage/syntax.html#optional-tags)

    )annotationsN)abcdeque)
HTMLParser)AnyCallableIterableIteratorc                   @  s@   e Zd ZdZdddddZedddd	Zddd
dZdS )	Attributez'This class holds the tags's attributes.str)keyreturnc                 C  s   |  |dS )z+If self doesn't have the key it returns ''. )get)selfr    r   Q/root/rtd-docs/venv/lib/python3.8/site-packages/myst_parser/parsers/parse_html.py__getitem__!   s    zAttribute.__getitem__z	list[str]r   c                 C  s   | d   S )z!Return 'class' attribute as list.class)splitr   r   r   r   classes%   s    zAttribute.classesc                 C  s   d dd |  D S )z0Return a htmlized representation for attributes. c                 s  s"   | ]\}}| d | dV  qdS )z=""Nr   ).0r   valuer   r   r   	<genexpr>,   s     z$Attribute.__str__.<locals>.<genexpr>)joinitemsr   r   r   r   __str__*   s    zAttribute.__str__N)__name__
__module____qualname____doc__r   propertyr   r    r   r   r   r   r
      s
   r
   c                   @  s@  e Zd ZdZdAdddddd	Zed
dddZeddddZdBdddddZdd dddZ	dd dddZ
ddddZddd d!Zd"dd#d$Zdd dd%d&Zd dd'd(Zddd)d*ZdCd+dd,d-d.Zddd/d0Zd1dd2d3d4ZdDdd"d5d6d7ZdEddd d8d9d:ZdFd<dd=ddd"d>d?d@ZdS )GElementz]An Element of the xml/html document.

    All xml/html entities inherit from this class.
    r   Nr   zdict | NoneNone)nameattrr   c                 C  s$   || _ t|pi | _d| _g | _dS )zInitialise the element.N)r(   r
   attrs_parent	_childrenr   r(   r)   r   r   r   __init__5   s    zElement.__init__zElement | Noner   c                 C  s   | j S )zReturn parent.)r+   r   r   r   r   parent<   s    zElement.parentzlist[Element]c                 C  s   | j dd S )zReturn copy of children.Nr,   r   r   r   r   childrenA   s    zElement.childrenFbool)r1   deepcopyc                 C  sn   g }t |D ]V\}}t|ts"t|r.| }|jd kr@| |_n|j| krXtd| || q|| _d S )Nz&different parent already set for item )	enumerate
isinstancer&   AssertionErrorr3   r+   appendr,   )r   r1   r3   Znew_childreniitemr   r   r   reset_childrenF   s    

zElement.reset_childrenint)indexr   c                 C  s
   | j | S Nr0   r   r<   r   r   r   r   S   s    zElement.__getitem__)r<   r9   c                 C  sD   t |tst|jd k	r0|j| kr0td|| |_| j||S Nz"different parent already set for: )r5   r&   r6   r+   r,   __setitem__r   r<   r9   r   r   r   r@   V   s
    zElement.__setitem__)r<   c                 C  s   | j |S r=   )r,   __delitem__r>   r   r   r   rB   ]   s    zElement.__delitem__c                 C  s
   | j  S r=   )r,   __len__r   r   r   r   rC   `   s    zElement.__len__zIterator[Element]c                 c  s   | j E d H  d S r=   r0   r   r   r   r   __iter__c   s    zElement.__iter__c                 C  sD   t |tst|jd k	r0|j| kr0td|| |_| j||S r?   )r5   r&   r6   r+   r,   insertrA   r   r   r   rE   f   s
    zElement.insertc                 C  s0   |  | j| j}| D ]}| }|| q|S )z#Recursively copy and remove parent.)	__class__r(   r*   r3   r7   )r   _copychildZ_copy_childr   r   r   r3   m   s
    zElement.deepcopyc                 C  s6   | j j d| j}| jr*|d| j7 }|d7 }|S )N(z, ))rF   r!   r(   r*   r   textr   r   r   __repr__u   s
    zElement.__repr__0dict[str, Callable[[Element, dict], str]] | Nonetag_overridesr   c                 K  s   t dS )zReturns a HTML string representation of the element.

        :param tag_overrides: Provide a dictionary of render function
            for specific tag names, to override the normal render format

        N)NotImplementedErrorr   rP   kwargsr   r   r   render|   s    zElement.renderc                 C  s   |   S r=   rT   r   r   r   r   r       s    zElement.__str__r   )r9   r   c                 C  s   || kS r=   r   )r   r9   r   r   r   __eq__   s    zElement.__eq__)include_selfr   c                 c  s,   |r
| V  | D ]}|V  |  E dH  qdS )zWalk through the xml/html AST.N)walk)r   rW   rH   r   r   r   rX      s
    zElement.walk)inplacerecurser   c                 C  sF   | }|s|   }|dd |jD  |rB|D ]}|jddd q.|S )zdReturn copy with all `Data` tokens
        that only contain whitespace / newlines removed.
        c                 S  s(   g | ] }t |tr |j d ks|qS )r   )r5   Datadatastrip)r   er   r   r   
<listcomp>   s   
 z!Element.strip.<locals>.<listcomp>T)rY   rZ   )r3   r:   r1   r]   )r   rY   rZ   elementrH   r   r   r   r]      s    zElement.stripTzstr | type[Element]zIterable[str] | None)
identifierr*   r   rW   rZ   r   c                 #  s   |r|   n| }|r"t| g|}t r8 fddn
 fdd}|dk	rTt|n|}|D ]R}||r\|dk	r||jjsq\|pi 	 D ]\}	}
|j|	 |
kr q\q|V  q\dS )z:Find all elements that match name and specific attributes.c                   s
   t |  S r=   )r5   cra   r   r   <lambda>       zElement.find.<locals>.<lambda>c                   s
   | j  kS r=   r(   rb   rd   r   r   re      rf   N)
rX   	itertoolschaininspectisclasssetissubsetr*   r   r   )r   ra   r*   r   rW   rZ   iteratorZ	test_funcrH   r   r   r   rd   r   find   s     	
zElement.find)r   N)F)N)F)FF)NNFT)r!   r"   r#   r$   r.   r%   r/   r1   r:   r   r@   rB   rC   rD   rE   r3   rM   rT   r    rV   rX   r]   ro   r   r   r   r   r&   /   s4   	     r&   c                   @  s   e Zd ZdZddddZdS )RootzThe root of the AST tree.r   r   c                   s   d  fdd| D S )z6Returns a string HTML representation of the structure.r   c                 3  s   | ]}|j f  V  qd S r=   rU   r   rH   rS   r   r   r      s     zRoot.render.<locals>.<genexpr>)r   r   rS   r   rr   r   rT      s    zRoot.renderNr!   r"   r#   r$   rT   r   r   r   r   rp      s   rp   c                   @  s"   e Zd ZdZddddddZdS )	TagzKRepresent xml/html tags under the form: <name key="value" ...> ... </name>.NrN   r   rO   c                   sh   r| j kr| j  | S d| j  | jr0dnd | j dd fdd| D  d| j  d S )N<r   r   >c                 3  s"   | ]}|j f d i V  qdS )rP   NrU   rq   rS   rP   r   r   r      s    zTag.render.<locals>.<genexpr>z</)r(   r*   r   rR   r   rx   r   rT      s     z
Tag.render)Nrt   r   r   r   r   ru      s    ru   c                   @  s"   e Zd ZdZddddddZdS )	XTagzGRepresent XHTML style tags with no children, like `<img src="t.gif" />`NrN   r   rO   c                 K  sD   |d k	r"| j |kr"|| j  | |S d| j  | jr4dnd | j dS )Nrv   r   r   z/>r(   r*   rR   r   r   r   rT      s    zXTag.render)Nrt   r   r   r   r   ry      s    ry   c                   @  s   e Zd ZdZddddZdS )VoidTagzJRepresent tags with no children, only start tag, like `<img src="t.gif" >`r   r   c                 K  s"   d| j  | jrdnd | j dS )Nrv   r   r   rw   rz   rs   r   r   r   rT      s    zVoidTag.renderNrt   r   r   r   r   r{      s   r{   c                      s>   e Zd Zdd fddZddddZd ddd	Z  ZS )
TerminalElementr   r\   c                   s   t  d || _d S )Nr   )superr.   r\   r   r\   rF   r   r   r.      s    zTerminalElement.__init__r   c                 C  s6   | j }t|dkr"|d d d }| jj d|dS )N      z...rI   rJ   )r\   lenrF   r!   rK   r   r   r   rM      s    zTerminalElement.__repr__c                 C  s   |  | j}|S )zCopy and remove parent.)rF   r\   )r   rG   r   r   r   r3     s    zTerminalElement.deepcopy)r!   r"   r#   r.   rM   r3   __classcell__r   r   r   r   r|      s   r|   c                   @  s   e Zd ZdZddddZdS )r[   z8Represent data inside xml/html documents, like raw text.r   r   c                 K  s   | j S r=   r}   rs   r   r   r   rT   
  s    zData.renderNrt   r   r   r   r   r[     s   r[   c                   @  s   e Zd ZdZddddZdS )Declarationz.Represent declarations, like `<!DOCTYPE html>`r   r   c                 K  s   d| j  dS )Nz<!rw   r}   rs   r   r   r   rT     s    zDeclaration.renderNrt   r   r   r   r   r     s   r   c                   @  s   e Zd ZdZddddZdS )CommentzRepresent HTML commentsr   r   c                 K  s   d| j  dS )Nz<!--z-->r}   rs   r   r   r   rT     s    zComment.renderNrt   r   r   r   r   r     s   r   c                   @  s   e Zd ZdZddddZdS )Piz<Represent processing instructions like `<?xml-stylesheet ?>`r   r   c                 K  s   d| j  dS )Nz<?rw   r}   rs   r   r   r   rT     s    z	Pi.renderNrt   r   r   r   r   r     s   r   c                   @  s   e Zd ZdZddddZdS )Charz%Represent character codes like: `&#0`r   r   c                 K  s   d| j  dS )Nz&#;r}   rs   r   r   r   rT   &  s    zChar.renderNrt   r   r   r   r   r   #  s   r   c                   @  s   e Zd ZdZddddZdS )EntityzRepresent entities like `&amp`r   r   c                 K  s   d| j  dS )N&r   r}   rs   r   r   r   rT   -  s    zEntity.renderNrt   r   r   r   r   r   *  s   r   c                   @  s   e Zd ZdZdddddZdd Zd	d
ddZdddddZdddddZdddddZ	dddddZ
ddddZdS )Treez*The engine class to generate the AST tree.r   r   rg   c                 C  s*   || _ t|| _t | _| j| j dS )zInitialise TreeN)r(   rp   outmostr   stackr7   r   r(   r   r   r   r.   4  s    
zTree.__init__c                 C  s(   t | j| _| j  | j| j dS )z.Clear the outmost and stack for a new parsing.N)rp   r(   r   r   clearr7   r   r   r   r   r   ;  s    
z
Tree.clearr&   r   c                 C  s
   | j d S )z<Return the last pointer which point to the actual tag scope.)r   r   r   r   r   lastA  s    z	Tree.lastdictrz   c                 C  s:   | j  }t||}|| | j | | j | dS )z[Nest a given tag at the bottom of the tree using
        the last stack's pointer.
        N)r   popru   r7   )r   r(   r*   Zpointerr9   r   r   r   nest_tagE  s
    


zTree.nest_tagc                 C  s    |   }t||}|| dS )zNest an XTag onto the tree.N)r   ry   r7   r   r(   r*   topr9   r   r   r   	nest_xtagO  s    
zTree.nest_xtagc                 C  s    |   }t||}|| dS )zNest a VoidTag onto the tree.N)r   r{   r7   r   r   r   r   	nest_vtagU  s    
zTree.nest_vtagztype[TerminalElement])klassr\   c                 C  s   |   }||}|| dS )zNest the data onto the tree.N)r   r7   )r   r   r\   r   r9   r   r   r   nest_terminal[  s    zTree.nest_terminalc                 C  sJ   d}t | jD ]}|d }|j|kr q.qd}t|D ]}| j  q6dS )zWhen a closing tag is found, pop the pointer's scope from the stack,
        to then point to the earlier scope's tag.
        r      N)reversedr   r(   ranger   )r   r(   countind_r   r   r   enclosea  s    
zTree.encloseN)r   )r!   r"   r#   r$   r.   r   r   r   r   r   r   r   r   r   r   r   r   1  s   
r   c                      s   e Zd ZdZdddddddd	d
dddddhZd2ddd fddZddd fddZddddZddddZddd d!Z	dd"d#d$Z
dd%d&d'Zdd%d(d)Zdd"d*d+Zdd"d,d-Zdd"d.d/Zdd"d0d1Z  ZS )3	HtmlToAstzThe tokenizer class.ZareabasebrcolZembedhrZimginputlinkmetaparamsourcetrackZwbrr   Fr   r2   )r(   convert_charrefsc                   s   t  j|d t|| _d S N)r   )r~   r.   r   struct)r   r(   r   r   r   r   r.     s    zHtmlToAst.__init__rp   )r   r   c                   s   | j   t | | j jS )zParse the source string.)r   r   r~   feedr   )r   r   r   r   r   r     s    
zHtmlToAst.feedrg   c                 C  s,   || j kr| j|| n| j|| dS )z5When found an opening tag then nest it onto the tree.N)void_elementsr   r   r   r-   r   r   r   handle_starttag  s    
zHtmlToAst.handle_starttagc                 C  s   | j || dS )z9When found a XHTML tag style then nest it up to the tree.N)r   r   r-   r   r   r   handle_startendtag  s    zHtmlToAst.handle_startendtagc                 C  s   || j kr| j| dS )z@When found a closing tag then makes it point to the right scope.N)r   r   r   r   r   r   r   handle_endtag  s    
zHtmlToAst.handle_endtagr}   c                 C  s   | j t| dS )zNest data onto the tree.N)r   r   r[   r   r   r   r   handle_data  s    zHtmlToAst.handle_data)declc                 C  s   | j t| d S r=   r   r   r   r   r   r   r   r   handle_decl  s    zHtmlToAst.handle_declc                 C  s   | j t| d S r=   r   r   r   r   r   unknown_decl  s    zHtmlToAst.unknown_declc                 C  s   | j t| d S r=   )r   r   r   r   r   r   r   handle_charref  s    zHtmlToAst.handle_charrefc                 C  s   | j t| d S r=   )r   r   r   r   r   r   r   handle_entityref  s    zHtmlToAst.handle_entityrefc                 C  s   | j t| d S r=   )r   r   r   r   r   r   r   	handle_pi  s    zHtmlToAst.handle_pic                 C  s   | j t| d S r=   )r   r   r   r   r   r   r   handle_comment  s    zHtmlToAst.handle_comment)r   F)r!   r"   r#   r$   r   r.   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  s8   r   r   Fr   r2   )rL   r(   r   r   c                 C  s   t ||d}|| S r   )r   r   )rL   r(   r   parserr   r   r   tokenize_html  s    r   )r   F)!r$   
__future__r   rj   rh   collectionsr   r   html.parserr   typingr   r   r   r	   r   r
   MutableSequencer&   rp   ru   ry   r{   r|   r[   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s.    AF