federation/federation/utils/network.py

94 wiersze
4.2 KiB
Python

# -*- coding: utf-8 -*-
import logging
import requests
from requests.exceptions import RequestException, HTTPError, SSLError
from federation import __version__
logger = logging.getLogger("social-federation")
USER_AGENT = "python/social-federation/%s" % __version__
def fetch_document(url=None, host=None, path="/", timeout=10, raise_ssl_errors=True):
"""Helper method to fetch remote document.
Must be given either the ``url`` or ``host``.
If ``url`` is given, only that will be tried without falling back to http from https.
If ``host`` given, `path` will be added to it. Will fall back to http on non-success status code.
:arg url: Full url to fetch, including protocol
:arg host: Domain part only without path or protocol
:arg path: Path without domain (defaults to "/")
:arg timeout: Seconds to wait for response (defaults to 10)
:arg raise_ssl_errors: Pass False if you want to try HTTP even for sites with SSL errors (default True)
:returns: Tuple of document (str or None), status code (int or None) and error (an exception class instance or None)
:raises ValueError: If neither url nor host are given as parameters
"""
if not url and not host:
raise ValueError("Need url or host.")
logger.debug("fetch_document: url=%s, host=%s, path=%s, timeout=%s, raise_ssl_errors=%s",
url, host, path, timeout, raise_ssl_errors)
headers = {'user-agent': USER_AGENT}
if url:
# Use url since it was given
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code)
return response.text, response.status_code, None
except RequestException as ex:
logger.debug("fetch_document: exception %s", ex)
return None, None, ex
# Build url with some little sanitizing
host_string = host.replace("http://", "").replace("https://", "").strip("/")
path_string = path if path.startswith("/") else "/%s" % path
url = "https://%s%s" % (host_string, path_string)
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status()
return response.text, response.status_code, None
except (HTTPError, SSLError) as ex:
if isinstance(ex, SSLError) and raise_ssl_errors:
logger.debug("fetch_document: exception %s", ex)
return None, None, ex
# Try http then
url = url.replace("https://", "http://")
logger.debug("fetch_document: trying %s", url)
try:
response = requests.get(url, timeout=timeout, headers=headers)
logger.debug("fetch_document: found document, code %s", response.status_code)
response.raise_for_status()
return response.text, response.status_code, None
except RequestException as ex:
logger.debug("fetch_document: exception %s", ex)
return None, None, ex
except RequestException as ex:
logger.debug("fetch_document: exception %s", ex)
return None, None, ex
def send_document(url, data, timeout=10, *args, **kwargs):
"""Helper method to send a document via POST.
Additional ``*args`` and ``**kwargs`` will be passed on to ``requests.post``.
:arg url: Full url to send to, including protocol
:arg data: POST data to send (dict)
:arg timeout: Seconds to wait for response (defaults to 10)
:returns: Tuple of status code (int or None) and error (exception class instance or None)
"""
logger.debug("send_document: url=%s, data=%s, timeout=%s", url, data, timeout)
headers = {'user-agent': USER_AGENT}
try:
response = requests.post(url, data=data, timeout=timeout, headers=headers, *args, **kwargs)
logger.debug("send_document: response status code %s", response.status_code)
return response.status_code, None
except RequestException as ex:
logger.debug("send_document: exception %s", ex)
return None, ex