Source code for smv.smvdriver

import sys

from pyspark.sql import SparkSession
from smv import SmvApp

[docs]class SmvDriver(object): """Driver for an SMV application SmvDriver handles the boiler plate around parsing driver args, constructing an SmvApp, and running an application. To use SmvDriver, override `main` and in the main block of your driver script call construct your driver and call `run`. """
[docs] def create_smv_app(self, smv_args, driver_args): """Override this to define how this driver's SmvApp is created Default is just SmvApp.createInstance(smv_args). Note that it's important to use `createInstance` to ensure that the singleton app is set. SmvDriver will parse the full CLI args to distinguish the SMV args from from the args to your driver. Args: smv_args (list(str)): CLI args for SMV - should be passed to `SmvApp`) driver_args (list(str)): CLI args for the driver """ sparkSession = SparkSession.builder.\ enableHiveSupport().\ getOrCreate() # When SmvDriver is in use, user will call smv-run and interact # through command-line, so no need to do py module hotload return SmvApp.createInstance(smv_args, sparkSession, py_module_hotload=False)
[docs] def main(self, app, driver_args): """Override this to define the driver logic Default is to just call `run` onthe `SmvApp`. Args: app (SmvApp): app which was constructed driver_args (list(str)): CLI args for the driver """ app.run()
[docs] def run(self): """Run the driver """ args = sys.argv[1:] try: smv_args_end = args.index("--script") except ValueError: smv_args_end = len(args) smv_args = args[:smv_args_end] # First arg after smv_args_end is --script # Second is the script name # Then the driver args start driver_args = args[smv_args_end+2:] app = self.create_smv_app(smv_args, driver_args) self.main(app, driver_args)