Source code for smv.runinfo

#
# This file is licensed under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Easy Python access to SmvRunInfoCollector and related Scala classes.

Todo:
    * document example use
"""
import json
from pprint import pformat
from smv.utils import infer_full_name_from_part


[docs]class SmvRunInfo(object):
    """collection of a module's running info with:
    
        - metadata
        - metahistory
    """
    def __init__(self, meta, metahist):
        self.metadata = meta
        self.metadata_history = metahist

[docs]class SmvRunInfoCollector(object):
    """A list of SmvRunInfos from a run transaction, and methods
        to help reporting on them
    """
    def __init__(self):
        self.runinfos = {}

[docs]    def add_runinfo(self, fqn, meta, meta_hist):
        self.runinfos.update({fqn: SmvRunInfo(meta, meta_hist)})

[docs]    def fqns(self):
        """Returns a list of FQNs for all datasets that ran"""
        return [fqn for fqn in self.runinfos]

    def _infer_fqn(self, ds_name):
        """ds_name for user to use could be partial name, infer full fqn from partial name"""
        return infer_full_name_from_part(self.fqns(), ds_name)

[docs]    def dqm_validation(self, ds_name):
        """Returns the DQM validation result for a given dataset

        Returns:
            A dictionary representation of the dqm validation result

        Raises:
            py4j.protocol.Py4JError: if there is java call error or
                there is no validation result for the specified dataset
                (e.g. caused by a typo in the name)

        """
        metadata = self.metadata(ds_name)
        if (not metadata):
            return {}
        return metadata["_dqmValidation"]

[docs]    def dqm_state(self, ds_name):
        """Returns the DQM state for a given dataset

        Returns:
            A dictionary representation of the dqm state

        Raises:
            py4j.protocol.Py4JError: if there is java call error or
                there is no validation result or dqm state for the
                specified dataset (e.g. caused by a typo in the name)

        """
        validation = self.dqm_validation(ds_name)
        if 'dqmStateSnapshot' in validation:
            return validation['dqmStateSnapshot']
        return {}

[docs]    def metadata(self, ds_name):
        """Returns the metadata for a given dataset as a dict
        """
        return self.runinfos.get(self._infer_fqn(ds_name)).metadata._metadata

[docs]    def metadata_history(self, ds_name):
        """Returns the metadata history for a given dataset as a list(dict)
        """
        return self.runinfos.get(self._infer_fqn(ds_name)).metadata_history._hist_list

[docs]    def show_report(self, ds_name=None, show_history=False):
        """Print detailed report of information collected

            Args:
                ds_name (str): report only of named ds if not None
                show_history (bool): include metadata history in report if True (default False)
        """
        if ds_name is None:
            fqns = self.fqns()
        else:
            fqns = [self._infer_fqn(ds_name)]
        msg = 'datasets: %s' % fqns

        def items_to_report(fqn):
            validation = self.dqm_validation(fqn)
            metadata = self.metadata(fqn)
            # Remove validation results from metadata (if they exist) as we are reporting it above
            try:
                del metadata['_dqmValidation']
            except:
                pass

            items = [("dqm validation", validation), ("metadata", metadata)]
            if show_history:
                history = self.metadata_history(fqn)
                items.append(("metadata history", history))

            return items

        for fqn_to_report in fqns:
            msg += '\n+ %s' % fqn_to_report
            for name, value in items_to_report(fqn_to_report):
                msg += '\n|- {}:'.format(name)
                msg += '\n     ' + pformat(value, indent=5)

        print(msg)