Source code for sdssdb.utils.registry

# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: registry.py
# Project: utils
# Author: Brian Cherinka
# Created: Tuesday, 20th October 2020 1:44:45 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Tuesday, 20th October 2020 1:44:45 pm
# Modified By: Brian Cherinka


from __future__ import print_function, division, absolute_import
import pathlib
from typing import Union, Type

try:
    from astropy.table import Table, Column
except ImportError:
    Table = None
    Column = None

__all__ = ["list_databases", "display_table"]

db_registry = None


def update_db_registry() -> dict:
    """Updates the global database registry

    Uses pathlib to traverse the sdssdb directory structure and
    parses content to identify databases and relevant schema for each
    database.  Assumes a given structure of "orm/database/schema.py"

    Returns
    -------
    dict
        A dictionary of all databases and schema organized by ORM
    """
    # if global dict already populated, return the cache
    if db_registry is not None:
        return db_registry

    sdssdb_path = pathlib.Path(__file__).parent.parent
    registry = {"peewee": {}, "sqlalchemy": {}}
    for i in sdssdb_path.rglob("./"):
        # reject if not a directory, paths ending in '_' and if the parent
        # directory is not peewee or sqlalchemy
        if (
            not i.is_dir()
            or i.as_posix().endswith("_")
            or i.parent.stem not in ["peewee", "sqlalchemy"]
        ):
            continue

        # convert to string
        path = i.as_posix()

        # look for schema.py files
        schema = i.glob("[a-z]*.py")
        if "peewee" in path:
            registry["peewee"][i.stem] = {"schema": [s.stem for s in schema]}
        elif "sqlalchemy" in path:
            registry["sqlalchemy"][i.stem] = {"schema": [s.stem for s in schema]}
    return registry


db_registry = update_db_registry()


[docs] def list_databases(orm: str = None, with_schema: bool = False) -> Union[dict, list]: """Return a list of sdssdb databases Returns a list of available databases in sdssdb. When no orm is specified, returns a dict of orm:database key:values. If with_schema is specified, also returns a list of schema for each database. Parameters ---------- orm : str, optional The type of ORM to select on, by default None with_schema : bool, optional If True, also includes the schemas for each database, by default False Returns ------- Union[dict, list] A list of databases for a given ORM or a dict of database:schema values or a dict of orm:database values Raises ------ TypeError when input orm is not a string ValueError when input orm is not either peewee or sqlalchemy """ if orm and type(orm) != str: # noqa: E721 raise TypeError(f"Input {orm} must be a string.") if orm and orm not in ["peewee", "pw", "sqla", "sqlalchemy"]: raise ValueError(f"ORM {orm} can only be 'peewee', 'pw', 'sqla', or 'sqlalchemy'") if with_schema: if not orm: return db_registry else: orm = "peewee" if orm == "pw" else "sqlalchemy" if orm == "sqla" else orm return db_registry[orm] if not orm: return { "peewee": list(db_registry["peewee"].keys()), "sqlalchemy": list(db_registry["sqlalchemy"].keys()), } else: orm = "peewee" if orm == "pw" else "sqlalchemy" if orm == "sqla" else orm return list(db_registry.get(orm, None).keys())
def _mask_column(column: Type[Column], idx: list, fill: str = "") -> None: """Mask out duplicate elements in a given Astropy table.Column Masks out elements in an table column. Given an array of indices of unique elements, masks out the inverse with the specified fill value. Parameters ---------- column : `~astropy.table.Column` an Astropy table Column to mask idx : numpy array The array indices of unique column elements fill : str, optional The column mask fill value, by default '' """ column.mask[idx] = True column.mask = ~column.mask column.fill_value = fill
[docs] def display_table( pprint: bool = None, mask_dups: bool = False, fill: str = "", **kwargs ) -> Type[Table]: """Display sdssdb databases and schema as an Astropy Table Displays the list of available sdssdb databases organized by ORM and includes the schema for each database. Produces a table with columns "orm", "db", and "schema". Parameters ---------- pprint : bool, optional Pretty print the Astropy Table, by default None mask_dups : bool, optional If True, masks duplicate orm and db entries, by default False fill : str, optional The column mask fill value, by default '' kwargs : extra kwargs passed to Table.pprint Returns ------- `~astropy.table.Table` an Astropy Table of sdssdb databases Raises ------ ImportError when astropy is not installed """ if not Table: raise ImportError("No Table found. Astropy is not installed.") tt = [] for k, v in db_registry.items(): for i, j in v.items(): for r in j["schema"]: tt.append({"orm": k, "db": i, "schema": r}) # create a masked table t = Table(tt, names=["orm", "db", "schema"], masked=True) # group and sort the table t = t.group_by(["orm", "db"]) t.sort(["orm", "db", "schema"]) # mask out duplicate rows for columns orm and db if mask_dups: og = t.group_by("orm") dg = t.group_by(["orm", "db"]) _mask_column(og["orm"], og.groups.indices[:-1], fill=fill) _mask_column(og["db"], dg.groups.indices[:-1], fill=fill) t = og.filled() # pretty print the table if pprint: t.pprint(**kwargs) return return t