"""
 wget - web client for accessing web-pages from python scripts
        this script uses urllib2, BeautifulSoup and SoupSelector
"""

__author__ = "Tomas Pokorny (tomas.zemres@gmail.com)"
__version__ = "0.2"
__license__ = "GPL"

__all__ = ["WGet"]

import urllib
import urllib2
import re
import SoupSelector
import BeautifulSoup

RE_FORMAT = re.compile('^text/(html|xml)');


class WGet(object):
    """ Web client for accessing web pages based on urllib2 """

    base_headers = {
            'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)',
        }

    def __init__(self):
        """ Constructor """
        processor = urllib2.HTTPCookieProcessor()
        self.opener = urllib2.build_opener( processor )
        self.opener.addheaders = self.base_headers.items()


    def request(self, url, data=None, headers={}):
        """ Make request to given url """
        if data:
            data = urllib.urlencode(data)

        self.request_url = url
        self._request = urllib2.Request(url, data, headers)
        self.response = self.opener.open(self._request)
        self._content = None
        self._soup    = None
 

    @property
    def content(self):
        """ Response content """
        if self._content is None:
            self._content = self.response.read()
            self.response.close()
        return self._content
 

    @property
    def soup(self):
        """ BeautifulSoup instance """
        if self._soup is None:
            if self.format == 'xml':
                parserClass = BeautifulSoup.BeautifulStoneSoup
            else:
                parserClass = BeautifulSoup.BeautifulSoup

            self._soup = parserClass(self.content, convertEntities='xhtml')
        return self._soup


    def select(self, selector, **kwargs):
        """ Do CSS selector query on HTML content """
        return self.soup.select(selector, **kwargs)


    def select_first(self, selector, **kwargs):
        """ Do CSS selector query on HTML content and return first item """
        return self.soup.select_first(selector, **kwargs)


    @property
    def format(self):
        """ get content format ('xml' or 'html' or None) """
        m = RE_FORMAT.match(self.response.headers['Content-Type'])
        if m:
            return m.group(1)
        return None

