# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: registry.py
# Project: utils
# Author: Brian Cherinka
# Created: Tuesday, 20th October 2020 1:44:45 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Tuesday, 20th October 2020 1:44:45 pm
# Modified By: Brian Cherinka
from __future__ import print_function, division, absolute_import
import pathlib
from typing import Union, Type
try:
from astropy.table import Table, Column
except ImportError:
Table = None
Column = None
__all__ = ["list_databases", "display_table"]
db_registry = None
def update_db_registry() -> dict:
"""Updates the global database registry
Uses pathlib to traverse the sdssdb directory structure and
parses content to identify databases and relevant schema for each
database. Assumes a given structure of "orm/database/schema.py"
Returns
-------
dict
A dictionary of all databases and schema organized by ORM
"""
# if global dict already populated, return the cache
if db_registry is not None:
return db_registry
sdssdb_path = pathlib.Path(__file__).parent.parent
registry = {"peewee": {}, "sqlalchemy": {}}
for i in sdssdb_path.rglob("./"):
# reject if not a directory, paths ending in '_' and if the parent
# directory is not peewee or sqlalchemy
if (
not i.is_dir()
or i.as_posix().endswith("_")
or i.parent.stem not in ["peewee", "sqlalchemy"]
):
continue
# convert to string
path = i.as_posix()
# look for schema.py files
schema = i.glob("[a-z]*.py")
if "peewee" in path:
registry["peewee"][i.stem] = {"schema": [s.stem for s in schema]}
elif "sqlalchemy" in path:
registry["sqlalchemy"][i.stem] = {"schema": [s.stem for s in schema]}
return registry
db_registry = update_db_registry()
[docs]
def list_databases(orm: str = None, with_schema: bool = False) -> Union[dict, list]:
"""Return a list of sdssdb databases
Returns a list of available databases in sdssdb. When no orm is specified,
returns a dict of orm:database key:values. If with_schema is specified, also
returns a list of schema for each database.
Parameters
----------
orm : str, optional
The type of ORM to select on, by default None
with_schema : bool, optional
If True, also includes the schemas for each database, by default False
Returns
-------
Union[dict, list]
A list of databases for a given ORM or a dict of database:schema values
or a dict of orm:database values
Raises
------
TypeError
when input orm is not a string
ValueError
when input orm is not either peewee or sqlalchemy
"""
if orm and type(orm) != str: # noqa: E721
raise TypeError(f"Input {orm} must be a string.")
if orm and orm not in ["peewee", "pw", "sqla", "sqlalchemy"]:
raise ValueError(f"ORM {orm} can only be 'peewee', 'pw', 'sqla', or 'sqlalchemy'")
if with_schema:
if not orm:
return db_registry
else:
orm = "peewee" if orm == "pw" else "sqlalchemy" if orm == "sqla" else orm
return db_registry[orm]
if not orm:
return {
"peewee": list(db_registry["peewee"].keys()),
"sqlalchemy": list(db_registry["sqlalchemy"].keys()),
}
else:
orm = "peewee" if orm == "pw" else "sqlalchemy" if orm == "sqla" else orm
return list(db_registry.get(orm, None).keys())
def _mask_column(column: Type[Column], idx: list, fill: str = "") -> None:
"""Mask out duplicate elements in a given Astropy table.Column
Masks out elements in an table column. Given an array of indices of
unique elements, masks out the inverse with the specified fill value.
Parameters
----------
column : `~astropy.table.Column`
an Astropy table Column to mask
idx : numpy array
The array indices of unique column elements
fill : str, optional
The column mask fill value, by default ''
"""
column.mask[idx] = True
column.mask = ~column.mask
column.fill_value = fill
[docs]
def display_table(
pprint: bool = None, mask_dups: bool = False, fill: str = "", **kwargs
) -> Type[Table]:
"""Display sdssdb databases and schema as an Astropy Table
Displays the list of available sdssdb databases organized by ORM
and includes the schema for each database. Produces a table with columns
"orm", "db", and "schema".
Parameters
----------
pprint : bool, optional
Pretty print the Astropy Table, by default None
mask_dups : bool, optional
If True, masks duplicate orm and db entries, by default False
fill : str, optional
The column mask fill value, by default ''
kwargs :
extra kwargs passed to Table.pprint
Returns
-------
`~astropy.table.Table`
an Astropy Table of sdssdb databases
Raises
------
ImportError
when astropy is not installed
"""
if not Table:
raise ImportError("No Table found. Astropy is not installed.")
tt = []
for k, v in db_registry.items():
for i, j in v.items():
for r in j["schema"]:
tt.append({"orm": k, "db": i, "schema": r})
# create a masked table
t = Table(tt, names=["orm", "db", "schema"], masked=True)
# group and sort the table
t = t.group_by(["orm", "db"])
t.sort(["orm", "db", "schema"])
# mask out duplicate rows for columns orm and db
if mask_dups:
og = t.group_by("orm")
dg = t.group_by(["orm", "db"])
_mask_column(og["orm"], og.groups.indices[:-1], fill=fill)
_mask_column(og["db"], dg.groups.indices[:-1], fill=fill)
t = og.filled()
# pretty print the table
if pprint:
t.pprint(**kwargs)
return
return t