# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: registry.py
# Project: utils
# Author: Brian Cherinka
# Created: Tuesday, 20th October 2020 1:44:45 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Tuesday, 20th October 2020 1:44:45 pm
# Modified By: Brian Cherinka
from __future__ import print_function, division, absolute_import
import pathlib
from typing import Union, Type
try:
from astropy.table import Table, Column
except ImportError:
Table = None
Column = None
__all__ = ['list_databases', 'display_table']
db_registry = None
def update_db_registry() -> dict:
""" Updates the global database registry
Uses pathlib to traverse the sdssdb directory structure and
parses content to identify databases and relevant schema for each
database. Assumes a given structure of "orm/database/schema.py"
Returns
-------
dict
A dictionary of all databases and schema organized by ORM
"""
# if global dict already populated, return the cache
if db_registry is not None:
return db_registry
sdssdb_path = pathlib.Path(__file__).parent.parent
registry = {'peewee': {}, 'sqlalchemy': {}}
for i in sdssdb_path.rglob('./'):
# reject if not a directory, paths ending in '_' and if the parent
# directory is not peewee or sqlalchemy
if not i.is_dir() or i.as_posix().endswith('_') or \
i.parent.stem not in ['peewee', 'sqlalchemy']:
continue
# convert to string
path = i.as_posix()
# look for schema.py files
schema = i.glob('[a-z]*.py')
if 'peewee' in path:
registry['peewee'][i.stem] = {'schema': [s.stem for s in schema]}
elif 'sqlalchemy' in path:
registry['sqlalchemy'][i.stem] = {'schema': [s.stem for s in schema]}
return registry
db_registry = update_db_registry()
[docs]
def list_databases(orm: str = None, with_schema: bool = False) -> Union[dict, list]:
""" Return a list of sdssdb databases
Returns a list of available databases in sdssdb. When no orm is specified,
returns a dict of orm:database key:values. If with_schema is specified, also
returns a list of schema for each database.
Parameters
----------
orm : str, optional
The type of ORM to select on, by default None
with_schema : bool, optional
If True, also includes the schemas for each database, by default False
Returns
-------
Union[dict, list]
A list of databases for a given ORM or a dict of database:schema values
or a dict of orm:database values
Raises
------
TypeError
when input orm is not a string
ValueError
when input orm is not either peewee or sqlalchemy
"""
if orm and type(orm) != str: # noqa: E721
raise TypeError(f'Input {orm} must be a string.')
if orm and orm not in ['peewee', 'pw', 'sqla', 'sqlalchemy']:
raise ValueError(f"ORM {orm} can only be 'peewee', 'pw', 'sqla', or 'sqlalchemy'")
if with_schema:
if not orm:
return db_registry
else:
orm = 'peewee' if orm == 'pw' else 'sqlalchemy' if orm == 'sqla' else orm
return db_registry[orm]
if not orm:
return {'peewee': list(db_registry['peewee'].keys()),
'sqlalchemy': list(db_registry['sqlalchemy'].keys())}
else:
orm = 'peewee' if orm == 'pw' else 'sqlalchemy' if orm == 'sqla' else orm
return list(db_registry.get(orm, None).keys())
def _mask_column(column: Type[Column], idx: list, fill: str = '') -> None:
""" Mask out duplicate elements in a given Astropy table.Column
Masks out elements in an table column. Given an array of indices of
unique elements, masks out the inverse with the specified fill value.
Parameters
----------
column : `~astropy.table.Column`
an Astropy table Column to mask
idx : numpy array
The array indices of unique column elements
fill : str, optional
The column mask fill value, by default ''
"""
column.mask[idx] = True
column.mask = ~column.mask
column.fill_value = fill
[docs]
def display_table(pprint: bool = None, mask_dups: bool = False,
fill: str = '', **kwargs) -> Type[Table]:
""" Display sdssdb databases and schema as an Astropy Table
Displays the list of available sdssdb databases organized by ORM
and includes the schema for each database. Produces a table with columns
"orm", "db", and "schema".
Parameters
----------
pprint : bool, optional
Pretty print the Astropy Table, by default None
mask_dups : bool, optional
If True, masks duplicate orm and db entries, by default False
fill : str, optional
The column mask fill value, by default ''
kwargs :
extra kwargs passed to Table.pprint
Returns
-------
`~astropy.table.Table`
an Astropy Table of sdssdb databases
Raises
------
ImportError
when astropy is not installed
"""
if not Table:
raise ImportError('No Table found. Astropy is not installed.')
tt = []
for k, v in db_registry.items():
for i, j in v.items():
for r in j['schema']:
tt.append({'orm': k, 'db': i, 'schema': r})
# create a masked table
t = Table(tt, names=['orm', 'db', 'schema'], masked=True)
# group and sort the table
t = t.group_by(['orm', 'db'])
t.sort(['orm', 'db', 'schema'])
# mask out duplicate rows for columns orm and db
if mask_dups:
og = t.group_by('orm')
dg = t.group_by(['orm', 'db'])
_mask_column(og['orm'], og.groups.indices[:-1], fill=fill)
_mask_column(og['db'], dg.groups.indices[:-1], fill=fill)
t = og.filled()
# pretty print the table
if pprint:
t.pprint(**kwargs)
return
return t