#!/usr/local/bin/python
# encoding: utf-8
"""
*Take key-values from a yaml file including a tablename(s) and add them to a mysql database table*
Usage:
yaml2db [-d] -s <pathToSettingsFile> <pathToYaml>
yaml2db [-d] --host=<host> --user=<user> --passwd=<passwd> --dbName=<dbName> <pathToYaml>
Options:
pathToYaml path to a single yaml file or directory of yaml files
pathToSettingsFile path to a settings file with logging and database information (yaml file)
--host=<host> the database host
--user=<user> database user
--passwd=<passwd> database user password
--dbName=<dbName> name of the database to add the table content to
-d, --delete delete yaml open(s) once added to datbase
-h, --help show this help message
-v, --version show version
-s, --settings the settings file
"""
from __future__ import print_function
from builtins import object
import sys
import os
import yaml
import re
import glob
import docopt
from fundamentals import tools, times
from fundamentals.mysql import convert_dictionary_to_mysql_table
[docs]
def main(arguments=None):
"""
The main function used when ``yaml_to_database.py`` when installed as a cl tool
"""
# setup the command-line util settings
su = tools(
arguments=arguments,
docString=__doc__,
logLevel="WARNING",
options_first=False,
projectName=False,
)
arguments, settings, log, dbConn = su.setup()
# unpack remaining cl arguments using `exec` to setup the variable names
# automatically
for arg, val in list(arguments.items()):
if arg[0] == "-":
varname = arg.replace("-", "") + "Flag"
else:
varname = arg.replace("<", "").replace(">", "")
if isinstance(val, str):
exec(varname + " = '%s'" % (val,))
else:
exec(varname + " = %s" % (val,))
if arg == "--dbConn":
dbConn = val
log.debug(
"%s = %s"
% (
varname,
val,
)
)
if os.path.isfile(pathToYaml):
from fundamentals.mysql import yaml_to_database
# PARSE YAML FILE CONTENTS AND ADD TO DATABASE
yaml2db = yaml_to_database(log=log, settings=settings, dbConn=dbConn)
yaml2db.add_yaml_file_content_to_database(
filepath=pathToYaml, deleteFile=deleteFlag
)
basename = os.path.basename(pathToYaml)
print("Content of %(basename)s added to database" % locals())
else:
from fundamentals.mysql import yaml_to_database
yaml2db = yaml_to_database(
log=log,
settings=settings,
dbConn=dbConn,
pathToInputDir=pathToYaml,
deleteFiles=deleteFlag,
)
yaml2db.ingest()
print("Content of %(pathToYaml)s directory added to database" % locals())
return
[docs]
class yaml_to_database(object):
"""
*Take key-values from yaml files including a tablename(s) and add them to a mysql database table*
**Key Arguments**
- ``log`` -- logger
- ``settings`` -- the settings dictionary
- ``pathToInputDir`` -- path to the directory containing the yaml files that will be added to the database table(s). Default *False*
- ``dbConn`` -- connection to database to add the content to
- ``deleteFiles`` - - delete the yamls files once their content has been added to the database. Default * False*
**Usage**
To setup your logger, settings and database connections, please use the ``fundamentals`` package (see tutorial here https://fundamentals.readthedocs.io/en/master/initialisation.html).
To initiate a ``yaml2db`` object, use the following:
```python
from fundamentals.mysql import yaml_to_database
yaml2db = yaml_to_database(
log=log,
settings=settings,
dbConn=dbConn,
pathToInputDir="/path/to/yaml/directory",
deleteFiles=False
)
```
And here's an example of the content in a yaml file that this ``yaml2db`` object can parse:
```yaml
title: Why you should do most of your text editing in : Sublime Text | Sublime Text Tips
url: http://sublimetexttips.com/why-you-should-do-most-of-your-text-editing-in-sublime-text/?utm_source=drip&utm_medium=email&utm_campaign=editor-proliferation
kind: webpage
subtype: article
table: web_articles,podcasts
```
"""
# Initialisation
def __init__(
self, log, dbConn, pathToInputDir=False, settings=False, deleteFiles=False
):
self.log = log
log.debug("instansiating a new 'yaml_to_database' object")
self.settings = settings
self.pathToInputDir = pathToInputDir
self.dbConn = dbConn
self.deleteFiles = deleteFiles
# xt-self-arg-tmpx
return None
[docs]
def ingest(self):
"""
*ingest the contents of the directory of yaml files into a database*
**Return**
- None
**Usage**
To import an entire directory of yaml files into a database, use the following:
```python
from fundamentals.mysql import yaml_to_database
yaml2db = yaml_to_database(
log=log,
settings=settings,
dbConn=dbConn,
pathToInputDir="/path/to/yaml/directory",
deleteFiles=False
)
yaml2db.ingest()
```
"""
self.log.debug("starting the ``ingest`` method")
for d in os.listdir(self.pathToInputDir):
if (
os.path.isfile(os.path.join(self.pathToInputDir, d))
and "yaml" in d.lower()
):
self.add_yaml_file_content_to_database(
filepath=os.path.join(self.pathToInputDir, d),
deleteFile=self.deleteFiles,
)
self.log.debug("completed the ``ingest`` method")
return None
[docs]
def add_yaml_file_content_to_database(self, filepath, deleteFile=False):
"""*given a file to a yaml file, add yaml file content to database*
**Key Arguments**
- ``filepath`` -- the path to the yaml file
- ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False*
**Return**
- None
**Usage**
To parse and import the contents of a single yaml file into the database, use the following:
```python
from fundamentals.mysql import yaml_to_database
# PARSE YAML FILE CONTENTS AND ADD TO DATABASE
yaml2db = yaml_to_database(
log=log,
settings=settings,
dbConn=dbConn
)
yaml2db.add_yaml_file_content_to_database(
filepath=${1:"/path/to/file.yaml"},
deleteFile=True
)
```
"""
self.log.debug("completed the ````add_yaml_file_content_to_database`` method")
import codecs
import requests
import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()
try:
self.log.debug("attempting to open the file %s" % (filepath,))
readFile = codecs.open(filepath, encoding="utf-8", mode="r")
thisData = readFile.read()
readFile.close()
except IOError as e:
message = "could not open the file %s" % (filepath,)
self.log.critical(message)
raise IOError(message)
readFile.close()
matchObject = re.finditer(
r"(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)",
thisData,
flags=re.M | re.S, # re.S
)
yamlContent = {}
for match in matchObject:
if match.group("value")[0] == '"' and match.group("value")[-1] == '"':
v = match.group("value")[1:-1]
elif match.group("value")[0] == "'" and match.group("value")[-1] == "'":
v = match.group("value")[1:-1]
else:
v = match.group("value")
yamlContent[match.group("key")] = v
if "table" not in yamlContent:
self.log.warning(
"A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s"
% locals()
)
return None
# NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE
dbTablesTmp = yamlContent["table"].split(",")
del yamlContent["table"]
dbTables = []
dbTables[:] = [d.strip() for d in dbTablesTmp]
# UNSHORTEN URL
try:
r = requests.head(yamlContent["url"], allow_redirects=True)
yamlContent["url"] = r.url
except:
pass
yamlContent["original_yaml_path"] = filepath
if "url" in yamlContent:
uniqueKeyList = ["url"]
else:
uniqueKeyList = []
for t in dbTables:
convert_dictionary_to_mysql_table(
dbConn=self.dbConn,
log=self.log,
dictionary=yamlContent,
dbTableName=t,
uniqueKeyList=uniqueKeyList,
dateModified=True,
returnInsertOnly=False,
replace=True,
)
if deleteFile:
os.remove(filepath)
self.log.debug("completed the ``add_yaml_file_content_to_database`` method")
return None
# use the tab-trigger below for new method
# xt-class-method
if __name__ == "__main__":
main()