Python sklearn.base 模块,BaseEstimator() 实例源码
我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用sklearn.base.BaseEstimator()。
def __init__(self, datasets, pipelines):
"""init"""
# check dataset
if not isinstance(datasets, list):
if isinstance(datasets, BaseDataset):
datasets = [datasets]
else:
raise(ValueError("datasets must be a list or a dataset instance"))
for dataset in datasets:
if not(isinstance(dataset, BaseDataset)):
raise(ValueError("datasets must only contains dataset instance"))
self.datasets = datasets
# check pipelines
if not isinstance(pipelines, dict):
raise(ValueError("pipelines must be a dict or a Pipeline instance"))
for name, pipeline in pipelines.items():
if not(isinstance(pipeline, BaseEstimator)):
raise(ValueError("pipelines must only contains Pipelines instance"))
self.pipelines = pipelines
def is_estimator(model):
"""
Determines if a model is an estimator using issubclass and isinstance.
Parameters
----------
estimator : class or instance
The object to test if it is a Scikit-Learn clusterer,especially a
Scikit-Learn estimator or Yellowbrick visualizer
"""
if inspect.isclass(model):
return issubclass(model, BaseEstimator)
return isinstance(model, BaseEstimator)
# Alias for closer name to isinstance and issubclass
def test_subclass(self):
"""
Assert the feature visualizer is in its rightful place
"""
visualizer = FeatureVisualizer()
self.assertisinstance(visualizer, TransformerMixin)
self.assertisinstance(visualizer, BaseEstimator)
self.assertisinstance(visualizer, Visualizer)
# def test_interface(self):
# """
# Test the feature visualizer interface
# """
#
# visualizer = FeatureVisualizer()
# with self.assertRaises(NotImplementedError):
# visualizer.poof()
def test_subclass(self):
"""
Assert the text visualizer is subclassed correctly
"""
visualizer = TextVisualizer()
self.assertisinstance(visualizer, Visualizer)
# def test_interface(self):
# """
# Test the feature visualizer interface
# """
#
# visualizer = TextVisualizer()
# with self.assertRaises(NotImplementedError):
# visualizer.poof()
def test_sample_weight_adaboost_regressor():
"""
AdaBoostRegressor should work without sample_weights in the base estimator
The random weighted sampling is done internally in the _boost method in
AdaBoostRegressor.
"""
class DummyEstimator(BaseEstimator):
def fit(self, X, y):
pass
def predict(self, X):
return np.zeros(X.shape[0])
boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
boost.fit(X, y_regr)
assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
def _check_sklearn_model(model):
if not (isinstance(model, BaseEstimator) and
isinstance(model, RegressorMixin)):
raise RuntimeError('Needs to supply an instance of a scikit-learn '
'compatible regression class.')
def predict_regression(x_test, trained_estimator):
"""
Given feature data and a trained estimator,return a regression prediction
Args:
x_test:
trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator
Returns:
a prediction
"""
validate_estimator(trained_estimator)
prediction = trained_estimator.predict(x_test)
return prediction
def predict_classification(x_test,return a classification prediction
Args:
x_test:
trained_estimator (sklearn.base.BaseEstimator): a trained scikit-learn estimator
Returns:
a prediction
"""
validate_estimator(trained_estimator)
prediction = np.squeeze(trained_estimator.predict_proba(x_test)[:, 1])
return prediction
def validate_estimator(possible_estimator):
"""
Given an object,raise an error if it is not a scikit-learn BaseEstimator
Args:
possible_estimator (object): Object of any type.
Returns:
True or raises error - the True is used only for testing
"""
if not issubclass(type(possible_estimator), BaseEstimator):
raise HealthcareAIError(
'Predictions require an estimator. You passed in {},which is of type: {}'.format(possible_estimator,
type(possible_estimator)))
return True
def default(self, obj):
if isinstance(obj, np.integer):
return int(obj)
elif isinstance(obj, np.dtype):
return str(obj)
elif isinstance(obj, np.floating):
return float(obj)
elif isinstance(obj, np.bool_):
return bool(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
elif isinstance(obj, BaseEstimator): # handle sklearn estimators
return Configuration(obj.__class__.__name__, 0, obj.get_params())
elif isinstance(obj, Configuration):
if "version" in obj.params or "name" in obj.params:
raise ValueError()
out = OrderedDict()
out["name"] = obj.name
if obj.version != 0:
out["version"] = obj.version
out.update(obj.params)
return out
elif isinstance(obj, Configurable):
return obj.get_config()
elif isinstance(obj, set):
return sorted(obj) # Ensure deterministic order
else:
try:
return super().default(obj)
except TypeError:
return str(obj)
def setclassifier(self, estimator=KNeighborsClassifier(n_neighbors=10)):
"""Assign classifier for which decision boundary should be plotted.
Parameters
----------
estimator : BaseEstimator instance,optional (default=KNeighborsClassifier(n_neighbors=10)).
Classifier for which the decision boundary should be plotted. Must have
probability estimates enabled (i.e. estimator.predict_proba must work).
Make sure it is possible for probability estimates to get close to 0.5
(more specifically,as close as specified by acceptance_threshold).
"""
self.classifier = estimator
def _generate_bases_test(est, pd_est):
def test(self):
self.assertTrue(isinstance(pd_est, FrameMixin), pd_est)
self.assertFalse(isinstance(est, FrameMixin))
self.assertTrue(isinstance(pd_est, base.BaseEstimator))
try:
mixins = [
base.ClassifierMixin,
base.ClusterMixin,
base.BiclusterMixin,
base.TransformerMixin,
base.DensityMixin,
base.MetaEstimatorMixin,
base.ClassifierMixin,
base.RegressorMixin]
except:
if _sklearn_ver > 17:
raise
mixins = [
base.ClassifierMixin,
base.RegressorMixin]
for mixin in mixins:
self.assertEqual(
isinstance(pd_est, mixin),
isinstance(est,
mixin)
return test
def test_check_estimator():
# tests that the estimator actually fails on "bad" estimators.
# not a complete test of all checks,which are very extensive.
# check that we have a set_params and can clone
msg = "it does not implement a 'get_params' methods"
assert_raises_regex(TypeError, msg, check_estimator, object)
# check that we have a fit method
msg = "object has no attribute 'fit'"
assert_raises_regex(AttributeError, BaseEstimator)
# check that fit does input validation
msg = "TypeError not raised by fit"
assert_raises_regex(AssertionError, BaseBadClassifier)
# check that predict does input validation (doesn't accept dicts in input)
msg = "Estimator doesn't check for NaN and inf in predict"
assert_raises_regex(AssertionError, NoCheckinPredict)
# check for sparse matrix input handling
name = NoSparseClassifier.__name__
msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data"
# the check for sparse input handling prints to the stdout,
# instead of raising an error,so as not to remove the original traceback.
# that means we need to jump through some hoops to catch it.
old_stdout = sys.stdout
string_buffer = StringIO()
sys.stdout = string_buffer
try:
check_estimator(NoSparseClassifier)
except:
pass
finally:
sys.stdout = old_stdout
assert_true(msg in string_buffer.getvalue())
# doesn't error on actual estimator
check_estimator(AdaBoostClassifier)
check_estimator(MultiTaskElasticNet)
def get_attributes(obj):
if isinstance(obj, TfidfVectorizer):
return get_tfidf_attributes(obj)
elif isinstance(obj, XGBClassifier):
return pickle.dumps(obj)
elif isinstance(obj, BaseEstimator):
return {attr: getattr(obj, attr) for attr in dir(obj)
if not attr.startswith('_') and attr.endswith('_')
and attr not in skip_attributes}
elif obj is not None:
raise TypeError(type(obj))
def set_attributes(parent, field, attributes):
obj = getattr(parent, field)
if isinstance(obj, TfidfVectorizer):
set_ifidf_attributes(obj, attributes)
elif isinstance(obj, XGBClassifier):
setattr(parent, pickle.loads(attributes))
elif isinstance(obj, BaseEstimator):
for k, v in attributes.items():
try:
setattr(obj, k, v)
except AttributeError:
raise AttributeError(
'can\'t set attribute {} on {}'.format(k, obj))
elif obj is not None:
raise TypeError(type(obj))
def fit(self, y=None, **params):
# have to load dataset here,not in init,to
# work with BaseEstimator cloning
self.tsList_ = loadDatasets(self.datasetName, seed=self.seed,
whichExamples=self.whichExamples, instancesPerTs=self.instancesPerTs,
minNumInstances=self.minNumInstances,
maxnumInstances=self.maxnumInstances,
cropDataLength=self.cropDataLength)
return self
def run(self):
from distutils.dir_util import copy_tree
import sklearn
from sklearn import base
from jinja2 import Template
class_template = Template(
open(os.path.join('docs/source/api_class.rst.jinja2')).read())
sklearn_modules = {}
for mod_name in sklearn.__all__:
if mod_name.startswith('_'):
continue
try:
orig = __import__('sklearn.%s' % mod_name, fromlist=[''])
except:
for _ in range(20):
print('Failed to import %s' % orig)
# Tmp Ami
continue
sklearn_modules[mod_name] = []
for name in dir(orig):
c = getattr(orig, name)
try:
if not issubclass(c, base.BaseEstimator):
continue
except TypeError:
continue
sklearn_modules[mod_name].append('ibex.sklearn.%s.%s' % (mod_name, name))
content = class_template.render(
class_name=name,
full_class_name='ibex.sklearn.%s.%s' % (mod_name, name))
f_name = 'docs/source/api_ibex_sklearn_%s_%s.rst' % (mod_name, name.lower())
open(f_name, 'w').write(content)
class_template = Template(
open(os.path.join('docs/source/api.rst.jinja2')).read())
content = class_template.render(
sklearn_modules=sklearn_modules)
f_name = 'docs/source/api.rst'
open(f_name, 'w').write(content)
run_str = 'make text'
subprocess.call(run_str.split(' '), cwd='docs')
run_str = 'make html'
if not self.reduced_checks:
run_str += ' spelling lint linkcheck'
subprocess.check_call(run_str.split(' '), cwd='docs')
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 [email protected] 举报,一经查实,本站将立刻删除。