Source code for emat.analysis.cart

import numpy as np
import pandas as pd
from ..scope.box import Box
from ..workbench.analysis import cart
from ..workbench.analysis import scenario_discovery_util as sdutil

[docs]class CART(cart.CART): """ Classification and Regression Tree Algorithm CART can be used in a manner similar to PRIM. It provides access to the underlying tree, but it can also show the boxes described by the tree in a table or graph form similar to prim. Args: x (DataFrame): The independent variables, generally the experimental design inputs. y (array-like, 1 dimension): The dependent variable of interest. mass_min (float, default 0.05): A value between 0 and 1 indicating the minimum fraction of data points in a terminal leaf. mode ({BINARY, CLASSIFICATION, REGRESSION}): Indicates the mode in which CART is used. Binary indicates binary classification, classification is multiclass, and regression is regression. scope (Scope): The EMAT exploratory scope, used primarily to facilitate visualization. """ def __init__(self, x, y, mass_min=0.05, mode=sdutil.RuleInductionType.BINARY, scope=None, explorer=None): super().__init__(x, y, mass_min, mode=mode) self._target_name = getattr(y, 'name', None) self.scope = scope if explorer is not None: self._explorer = explorer elif explorer is False: self._explorer = None else: self._explorer = None if hasattr(x, 'scope') and scope is None: scope = x.scope if scope is not None: from .explore_2.explore_visualizer import Visualizer self._explorer = Visualizer(x, scope=scope)
[docs] def select(self, i): """ Select a leaf from the CART tree. This will update the CART box to this selected box, as well as update the `explorer`, if one is attached to this CART. Args: i (int): The index of the box to select. """ out = CartBox(self, i) explorer = getattr(self, '_explorer', None) if explorer is not None: from .explore_2.explore_base import DataFrameExplorerBase if isinstance(explorer, DataFrameExplorerBase): name_t = f"CART Box {i} Target [{self._target_name}]" name_s = f"CART Box {i} Solution [{self._target_name}]" explorer.new_selection( out, name=name_t, activate=False, ) explorer.new_selection( out.to_emat_box(), name=name_s, activate=False, ) if explorer.active_selection_name() not in (name_t, name_s): explorer.set_active_selection_name(name_t) else: explorer.set_active_selection_name( explorer.active_selection_name(), force_update=True, ) else: # for old explorer interface explorer.set_box(out.to_emat_box()) return out
class _CartEntry(object): '''a descriptor for the current leaf''' # def __init__(self, name): # self.name = name def __set_name__(self, cls, name): self.name = name def __get__(self, instance, _): try: return instance.cart_alg.stats[instance._cur_box][self.name] except KeyError: return instance.cart_alg.stats[instance._cur_box][self.name.replace("_"," ")] def __set__(self, instance, value): raise ValueError("this property cannot be assigned to") class CartBox(): """ Information for a specific CART box, corresponding to a leaf of the tree. """ coverage = _CartEntry() density = _CartEntry() res_dim = _CartEntry() mass = _CartEntry() def __init__(self, cart_alg, n=0): self.cart_alg = cart_alg self._cur_box = n def to_emat_box(self, i=None, name=None, src_name=None): if i is None: i = self._cur_box if name is None: name = f'CART Box {i}' if src_name is not None: name = name + f" [{src_name}]" limits = self.cart_alg.boxes[i] b = Box(name) for col in limits.columns: if isinstance(self.cart_alg.x.dtypes[col], pd.CategoricalDtype): if set(self.cart_alg.x[col].cat.categories) != limits[col].iloc[0]: b.replace_allowed_set(col, limits[col].iloc[0]) else: if limits[col].iloc[0] != self.cart_alg.x[col].min(): b.set_lower_bound(col, limits[col].iloc[0]) if limits[col].iloc[1] != self.cart_alg.x[col].max(): b.set_upper_bound(col, limits[col].iloc[1]) b.coverage = self.cart_alg.stats[i]['coverage'] b.density = self.cart_alg.stats[i]['density'] b.mass = self.cart_alg.stats[i]['mass'] return b def __repr__(self): i = self._cur_box head = f"<{self.__class__.__name__} leaf {i} of {len(self.cart_alg.boxes)}>\n" return head + repr(self.to_emat_box()).split("\n", 1)[1] def to_json(self): state = {} for i in range(len(self.cart_alg.boxes)): state[i] = self.to_emat_box(i, name=str(i)).to_json() import json return json.dumps(state) @property def _explorer(self): return self.cart_alg._explorer @_explorer.setter def _explorer(self, x): self.cart_alg._explorer = x def explore(self, scope=None, data=None): if getattr(self, '_explorer', None) is None: from .explore_2.explore_visualizer import Visualizer if data is None: data = self.cart_alg.x if scope is None: scope = getattr(data, 'scope', None) if scope is None: scope = getattr(self.cart_alg, 'scope', None) if scope is None: raise ValueError("failed to initialize visualizer, cannot find scope") self._explorer = Visualizer(scope=scope, data=data) self._explorer["CART Target"] = self.to_emat_box() return self._explorer def splom(self, rows=None, cols=None): """ Generate a scatter plot matrix showing this CartBox. Args: rows, cols (list-like, optional): The dimensions to display as rows and columns of the resulting scatter plot matrix. If not provided, each defaults to the set of restricted dimensions on the current CartBox. Returns: plotly.FigureWidget """ if rows is None: rows = sorted(self.to_emat_box().demanded_features) if cols is None: cols = sorted(self.to_emat_box().demanded_features) fig = self.explore().splom( f"{rows}|{cols}", rows=rows, cols=cols, ) return fig def hmm(self, rows=None, cols=None): """ Generate a heatmap matrix showing this CartBox. Args: rows, cols (list-like, optional): The dimensions to display as rows and columns of the resulting heatmap matrix. If not provided, each defaults to the set of restricted dimensions on the current CartBox. Returns: plotly.FigureWidget """ if rows is None: rows = sorted(self.to_emat_box().demanded_features) if cols is None: cols = sorted(self.to_emat_box().demanded_features) fig = self.explore().hmm( f"{rows}|{cols}", rows=rows, cols=cols, ) return fig