œ_#ÁÕ§TE NAŒ“KeÉ:”(åŽÖJÞùY’‚ñùž7; «]Û ý`8g“¯B© jdÖÖ¸ðzœ¸¦4Ç3Kó^(ÍÖ¼ Õ€pvìwšõB4df$Èü^0˜…åÌC$#2FŽÑ§±¦ÛZ/÷š&m£ñzÒÖ ’.Î]!Î;ƒ(Õ–¢d/—#Kª+tZyuÏB>NÛÖ†(¸ŒSà'³„Y˜´-_•¦¼´˜OlNK§¶ÒàŠˆTHµƒeTPå·fïM’…þuÏÍüp6دªE£åü‡ZØ'CKF#â«;‹eyO Qp„†l"ö1èíÙP ÏŒúl! BÝ2ñª•_VÁÉ÷3eu`–F¸ìI--ö<¿žë¯4õ캿¢)34Å{wMÉ2ÆÖFŸ¥`e9Ú¶¸P‡.”FÔï rY ‚²ÈTB,{ÛœéJ}«àQ4¹0Rû4D‚B§S‘ dO•v¾„™Sן¯3FeŸ™«+ÓâwH dÕÛÌì·P4ë&¥#rÜÉ Ù¦ê†ý·xòqk¯2,¹§™E\ék‚×Sá”ÚºÙ⺷ö£6…à ʾ qSá³Å|;àû}4Ÿ($â¹VY~óÍ!èÜÒŒËX½Ù1j‚VíÍŸš³+œ]«½g{_{/vµ½\¢¶vÉWKÿ:ñám½ ¥ S²x‘t ŽšÝÙÿÀÇ^ný PK IW™k‚½÷ á _rels/.relsUT dìd dìd dìd’ÏNÃ0‡ï{ŠÈ÷ÕÝ@¡¥» ¤Ý*`%îÑ&QâÁöö‚J£ì°cœŸ¿|¶²ÙÆA½rL½wVE Šñ¶w†çúay * 9Kƒw¬áÈ ¶ÕbóÄIîI]’Ê—4t"á1™ŽGJ…ìòMããH’±Å@æ…ZÆuYÞ`üÍ€jÂT;«!îì T}|Û7MoøÞ›ýÈNN<|v–í2ÄÜ¥ÏèšbË¢Ázó˜Ë )„"£OÏ7ú{ZYÈ’yÞç#1'tuÉM?6o>Z´_å9›ëKÚ˜}?þ³žÏÌ·N>fµx PK IWª½e ¢ U € word/document.xmlUT dìdPK IWþË3” z €J¢ word/settings.xmlUT dìdPK IWC‡{š' ƒ €¤ docProps/custom.xmlUT dìdPK IW츱=Œ €‡¥ [Content_Types].xmlUT dìdPK IWV%ë±" €U§ docProps/app.xmlUT dìdPK IW€RŒ 3 €¶¨ docProps/core.xmlUT dìdPK IWkòDn ô €ª word/_rels/document.xml.relsUT dìdPK IW;$î €Î« word/fontTable.xmlUT dìdPK IW+åäz] ÷. €ý¬ word/numbering.xmlUT dìdPK IW¤2×r- ¿ €›° word/styles.xmlUT dìdPK IWMFÒ ø €´ word/header1.xmlUT dìdPK IWF— T e €· word/media/image1.jpegUT dìdPK IW!Yéáå €°Ë word/media/image2.pngUT dìdPK IW°Àºë ú €ÙÌ word/media/image3.pngUT dìdPK IW$“†ª L €Î word/footer1.xmlUT dìdPK IWzaGôM €ñÑ word/footer2.xmlUT dìdPK IW–µâº P €}Õ word/theme/theme1.xmlUT dìdPK IW™k‚½÷ á €{Û _rels/.relsUT PK ! bîh^ [Content_Types].xml ¢( ¬”ËNÃ0E÷HüCä-Jܲ@5í‚Ç*Q>Àēƪc[žiiÿž‰ûB¡j7±ÏÜ{2ñÍh²nm¶‚ˆÆ»R‹ÈÀU^7/ÅÇì%¿’rZYï @1__f› ˜q·ÃR4DáAJ¬h>€ãÚÇV߯¹ªZ¨9ÈÛÁàNVÞ8Ê©ÓãÑÔji){^óã-I‹"{Üv^¥P!XS)bR¹rú—K¾s(¸3Õ`cÞ0†½ÝÎß»¾7M4²©ŠôªZÆk+¿|\|z¿(Ž‹ôPúº6h_-[ž@!‚ÒØ Pk‹´2nÏ}Ä?£LËð Ýû%áÄßdºždN"m,à¥ÇžDO97*‚~§Èɸ8ÀOíc|n¦Ñ äEøÿöéºóÀBÉÀ!$}‡íàÈé;{ìÐå[ƒîñ–é2þ ÿÿ PK ! µU0#ô L _rels/.rels ¢( ¬’MOÃ0†ïHü‡È÷ÕÝBKwAH»!T~€Iܵ£$Ý¿'TƒG½~üÊÛÝ<êÈ!öâ4¬‹;#¶w†—úqu *&r–Fq¬áÄvÕõÕö™GJy(v½*«¸¨¡KÉß#FÓñD±Ï.W ¥†=™ZÆMYÞbø®ÕBS톰·7 ê“Ï›×–¦é ?ˆ9LìÒ™ÈsbgÙ®|Èl!õùUSh9i°bžr:"y_dlÀóD›¿ý|-NœÈR"4ø2ÏGÇ% õZ´4ñËyÄ7 ëÈðÉ‚‹¨Þ ÿÿ PK ! Q48wÛ — xl/workbook.xml¤UÙnâ0}iþ!cñ‡ *–¢AšVU×$dC¬&vÆv UÕŸë@XÊK§/¹p|Žï¹N÷b“¥Ö •Š ÞC¸î"‹òHÄŒ¯zèá~b·‘¥4á1I§=ôJºèÿüÑ] ù¼âÙ ®z(Ñ:GE ͈ª‹œrˆ,…̈†©\9*—”Ä*¡Tg©ã¹nàd„q´Eåg0ÄrÉ":Q‘Q®· ’¦D}•°\UhYô¸ŒÈç"·#‘å ±`)Ó¯%(²²(œ®¸d‘‚ì nZ w v¡ñª• t¶TÆ")”Xê:@;[Ògú±ë`|²›ó=ø’ïHúÂL÷¬dðEVÁ+8€a÷Ûh¬Uz%„Íû"ZsÏÍCýî’¥ôqk]‹äù5ÉL¦Rd¥Dé˘i÷P ¦bM/|dÉ",…¨çãFNoçiûéë>aêiçsó#ðÄ ÕTr¢éHp ÜIú®ÝJìQ"ÀÜÖ-ý[0I¡¦ÀZ Z…d¡nˆN¬B¦=4 g %PDF-1.4 %âãÏÓ 3 0 obj << /Linearized 1 /L 422775 ÿØÿà JFIF ÿÛ C ÿÛ C ÿÀ X" ÿÄ ÿÄ H !1A"Qaq2‘¡#±ÁBRÑ3Cbrá$S‚¢²ð4ñ%6DTc’ÂsÿÄ ÿÄ = !1AQ"aq‘Á2R¡±BÑð#3br’²4á$‚¢ÂñÿÚ ? áHBßÝ`„! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! !@B„ „! ! stream
"""
An interface to html5lib that mimics the lxml.html interface.
"""
import sys
import string
from html5lib import HTMLParser as _HTMLParser
from html5lib.treebuilders.etree_lxml import TreeBuilder
from lxml import etree
from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag
# python3 compatibility
try:
_strings = basestring
except NameError:
_strings = (bytes, str)
try:
from urllib2 import urlopen
except ImportError:
from urllib.request import urlopen
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
class HTMLParser(_HTMLParser):
"""An html5lib HTML parser with lxml as tree."""
def __init__(self, strict=False, **kwargs):
_HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
try:
from html5lib import XHTMLParser as _XHTMLParser
except ImportError:
pass
else:
class XHTMLParser(_XHTMLParser):
"""An html5lib XHTML Parser with lxml as tree."""
def __init__(self, strict=False, **kwargs):
_XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
xhtml_parser = XHTMLParser()
def _find_tag(tree, tag):
elem = tree.find(tag)
if elem is not None:
return elem
return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
def document_fromstring(html, guess_charset=None, parser=None):
"""
Parse a whole document into a string.
If `guess_charset` is true, or if the input is not Unicode but a
byte string, the `chardet` library will perform charset guessing
on the string.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
if parser is None:
parser = html_parser
options = {}
if guess_charset is None and isinstance(html, bytes):
# html5lib does not accept useChardet as an argument, if it
# detected the html argument would produce unicode objects.
guess_charset = True
if guess_charset is not None:
options['useChardet'] = guess_charset
return parser.parse(html, **options).getroot()
def fragments_fromstring(html, no_leading_text=False,
guess_charset=None, parser=None):
"""Parses several HTML elements, returning a list of elements.
The first item in the list may be a string. If no_leading_text is true,
then it will be an error if there is leading text, and it will always be
a list of only elements.
If `guess_charset` is true, the `chardet` library will perform charset
guessing on the string.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
if parser is None:
parser = html_parser
options = {}
if guess_charset is None and isinstance(html, bytes):
# html5lib does not accept useChardet as an argument, if it
# detected the html argument would produce unicode objects.
guess_charset = False
if guess_charset is not None:
options['useChardet'] = guess_charset
children = parser.parseFragment(html, 'div', **options)
if children and isinstance(children[0], _strings):
if no_leading_text:
if children[0].strip():
raise etree.ParserError('There is leading text: %r' %
children[0])
del children[0]
return children
def fragment_fromstring(html, create_parent=False,
guess_charset=None, parser=None):
"""Parses a single HTML element; it is an error if there is more than
one element, or if anything but whitespace precedes or follows the
element.
If 'create_parent' is true (or is a tag name) then a parent node
will be created to encapsulate the HTML in a single element. In
this case, leading or trailing text is allowed.
If `guess_charset` is true, the `chardet` library will perform charset
guessing on the string.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
accept_leading_text = bool(create_parent)
elements = fragments_fromstring(
html, guess_charset=guess_charset, parser=parser,
no_leading_text=not accept_leading_text)
if create_parent:
if not isinstance(create_parent, _strings):
create_parent = 'div'
new_root = Element(create_parent)
if elements:
if isinstance(elements[0], _strings):
new_root.text = elements[0]
del elements[0]
new_root.extend(elements)
return new_root
if not elements:
raise etree.ParserError('No elements found')
if len(elements) > 1:
raise etree.ParserError('Multiple elements found')
result = elements[0]
if result.tail and result.tail.strip():
raise etree.ParserError('Element followed by text: %r' % result.tail)
result.tail = None
return result
def fromstring(html, guess_charset=None, parser=None):
"""Parse the html, returning a single element/document.
This tries to minimally parse the chunk of text, without knowing if it
is a fragment or a document.
'base_url' will set the document's base_url attribute (and the tree's
docinfo.URL)
If `guess_charset` is true, or if the input is not Unicode but a
byte string, the `chardet` library will perform charset guessing
on the string.
"""
if not isinstance(html, _strings):
raise TypeError('string required')
doc = document_fromstring(html, parser=parser,
guess_charset=guess_charset)
# document starts with doctype or <html>, full document!
start = html[:50]
if isinstance(start, bytes):
# Allow text comparison in python3.
# Decode as ascii, that also covers latin-1 and utf-8 for the
# characters we need.
start = start.decode('ascii', 'replace')
start = start.lstrip().lower()
if start.startswith('<html') or start.startswith('<!doctype'):
return doc
head = _find_tag(doc, 'head')
# if the head is not empty we have a full document
if len(head):
return doc
body = _find_tag(doc, 'body')
# The body has just one element, so it was probably a single
# element passed in
if (len(body) == 1 and (not body.text or not body.text.strip())
and (not body[-1].tail or not body[-1].tail.strip())):
return body[0]
# Now we have a body which represents a bunch of tags which have the
# content that was passed in. We will create a fake container, which
# is the body tag, except <body> implies too much structure.
if _contains_block_level_tag(body):
body.tag = 'div'
else:
body.tag = 'span'
return body
def parse(filename_url_or_file, guess_charset=None, parser=None):
"""Parse a filename, URL, or file-like object into an HTML document
tree. Note: this returns a tree, not an element. Use
``parse(...).getroot()`` to get the document root.
If ``guess_charset`` is true, the ``useChardet`` option is passed into
html5lib to enable character detection. This option is on by default
when parsing from URLs, off by default when parsing from file(-like)
objects (which tend to return Unicode more often than not), and on by
default when parsing from a file path (which is read in binary mode).
"""
if parser is None:
parser = html_parser
if not isinstance(filename_url_or_file, _strings):
fp = filename_url_or_file
if guess_charset is None:
# assume that file-like objects return Unicode more often than bytes
guess_charset = False
elif _looks_like_url(filename_url_or_file):
fp = urlopen(filename_url_or_file)
if guess_charset is None:
# assume that URLs return bytes
guess_charset = True
else:
fp = open(filename_url_or_file, 'rb')
if guess_charset is None:
guess_charset = True
options = {}
# html5lib does not accept useChardet as an argument, if it
# detected the html argument would produce unicode objects.
if guess_charset:
options['useChardet'] = guess_charset
return parser.parse(fp, **options)
def _looks_like_url(str):
scheme = urlparse(str)[0]
if not scheme:
return False
elif (sys.platform == 'win32' and
scheme in string.ascii_letters
and len(scheme) == 1):
# looks like a 'normal' absolute path
return False
else:
return True
html_parser = HTMLParser()