Source code for fundamentals.download.extract_filename_from_url

#!/usr/local/bin/python
# encoding: utf-8
"""
*Try and extract the name of the document located at the given URL*

Author
: David Young
"""

from builtins import str
import sys
import os

os.environ["TERM"] = "vt100"
from fundamentals import tools



[docs]
def extract_filename_from_url(log, url):
    """
    *get the filename from a URL.*

    *Will return 'untitled.html', if no filename is found.*

    **Key Arguments**

    - ``url`` -- the url to extract filename from


    Returns:
        - ``filename`` -- the filename

    **Usage**

    ```python
    from fundamentals.download import extract_filename_from_url
    name = extract_filename_from_url(
        log=log,
        url="https://en.wikipedia.org/wiki/Docstring"
    )
    print name
    # OUT: Docstring.html
    ```

    """
    import re

    # EXTRACT THE FILENAME FROM THE URL
    try:
        log.debug("extracting filename from url " + url)
        reEoURL = re.compile("([\w\.\_\-]*)$")
        filename = reEoURL.findall(url)[0]
        # log.debug(filename)
        if len(filename) == 0:
            filename = "untitled.html"
        if not (re.search("\.", filename)):
            filename = filename + ".html"
    except Exception as e:
        filename = None
        # print url
        log.warning("could not extracting filename from url : " + str(e) + "\n")

    return filename