As of January 1, 2020 this library no longer supports Python 2 on the latest released version. Library versions released prior to that date will continue to be available. For more information please visit Python 2 support on Google Cloud.

Source code for google.cloud.automl_v1beta1.services.tables.tables_client

# -*- coding: utf-8 -*-
#
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A tables helper for the google.cloud.automl_v1beta1 AutoML API"""

import copy
import logging

from google.api_core import exceptions
from google.api_core.gapic_v1 import client_info
from google.protobuf import struct_pb2

import google.cloud.automl_v1beta1
from google.cloud.automl_v1beta1 import AutoMlClient, PredictionServiceClient
from google.cloud.automl_v1beta1 import gapic_version as package_version
from google.cloud.automl_v1beta1.services.tables import gcs_client
from google.cloud.automl_v1beta1.types import data_items

_GAPIC_LIBRARY_VERSION = package_version.__version__
_LOGGER = logging.getLogger(__name__)


[docs]def to_proto_value(value): """translates a Python value to a google.protobuf.Value. Args: value: The Python value to be translated. Returns: Tuple of the translated google.protobuf.Value and error if any. """ # possible Python types (this is a Python3 module): # https://simplejson.readthedocs.io/en/latest/#encoders-and-decoders # JSON Python 2 Python 3 # object dict dict # array list list # string unicode str # number (int) int, long int # number (real) float float # true True True # false False False # null None None if value is None: # translate null to an empty value. return struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE), None elif isinstance(value, bool): # This check needs to happen before isinstance(value, int), # isinstance(value, int) returns True when value is bool. return struct_pb2.Value(bool_value=value), None elif isinstance(value, int) or isinstance(value, float): return struct_pb2.Value(number_value=value), None elif isinstance(value, str): return struct_pb2.Value(string_value=value), None elif isinstance(value, dict): struct_value = struct_pb2.Struct() for key, v in value.items(): field_value, err = to_proto_value(v) if err is not None: return None, err struct_value.fields[key].CopyFrom(field_value) return struct_pb2.Value(struct_value=struct_value), None elif isinstance(value, list): list_value = [] for v in value: proto_value, err = to_proto_value(v) if err is not None: return None, err list_value.append(proto_value) return ( struct_pb2.Value(list_value=struct_pb2.ListValue(values=list_value)), None, ) else: return None, "unsupport data type: {}".format(type(value))
[docs]class TablesClient(object): """ AutoML Tables API helper. This is intended to simplify usage of the auto-generated python client, in particular for the `AutoML Tables product <https://cloud.google.com/automl-tables/>`_. """ def __init__( self, *, project=None, region="us-central1", credentials=None, client=None, prediction_client=None, gcs_client=None, **kwargs, ): """Constructor. Example for US region: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... Example for EU region: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client_options = {'api_endpoint': 'eu-automl.googleapis.com:443'} >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='eu', client_options=client_options) ... Args: project (Optional[str]): The project ID of the GCP project all future calls will default to. Most methods take `project` as an optional parameter, and can override your choice of `project` supplied here. region (Optional[str]): The region all future calls will default to. Most methods take `region` as an optional parameter, and can override your choice of `region` supplied here. Note, only `us-central1` is supported to-date. credentials (Optional[google.auth.credentials.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If none are specified, the client will attempt to ascertain the credentials from the environment. This argument is mutually exclusive with providing a transport instance to ``transport``; doing so will raise an exception. client (Optional[google.automl_v1beta1.AutoMlClient]): An AutoMl Client to use for requests. prediction_client (Optional[google.automl_v1beta1.PredictionClient]): A Prediction Client to use for requests. gcs_client (Optional[google.automl_v1beta1.GcsClient]): A Storage client to use for requests. client_options (Union[dict, google.api_core.client_options.ClientOptions]): Custom options for the client. client_info (google.api_core.gapic_v1.client_info.ClientInfo): The client info used to send a user-agent string along with API requests. """ version = _GAPIC_LIBRARY_VERSION user_agent = "automl-tables-wrapper/{}".format(version) client_info_ = kwargs.get("client_info") if client_info_ is None: client_info_ = client_info.ClientInfo( user_agent=user_agent, gapic_version=version ) else: client_info_.user_agent = user_agent client_info_.gapic_version = version kwargs.pop("client_info", None) if client is None: self.auto_ml_client = AutoMlClient( credentials=credentials, client_info=client_info_, **kwargs ) else: self.auto_ml_client = client if prediction_client is None: self.prediction_client = PredictionServiceClient( credentials=credentials, client_info=client_info_, **kwargs ) else: self.prediction_client = prediction_client self.project = project self.region = region self.credentials = credentials self.gcs_client = gcs_client def __lookup_by_display_name(self, object_type, items, display_name): relevant_items = [i for i in items if i.display_name == display_name] if len(relevant_items) == 0: raise exceptions.NotFound( "The {} with display_name='{}' was not found.".format( object_type, display_name ) ) elif len(relevant_items) == 1: return relevant_items[0] else: raise ValueError( ( "Multiple {}s match display_name='{}': {}. Please use the `.name` (unique identifier) field instead." ).format( object_type, display_name, ", ".join([str(i) for i in relevant_items]), ) ) def __location_path(self, *, project=None, region=None): if project is None: if self.project is None: raise ValueError( "Either initialize your client with a value for 'project', or provide 'project' as a parameter for this method." ) project = self.project if region is None: if self.region is None: raise ValueError( "Either initialize your client with a value for 'region', or provide 'region' as a parameter for this method." ) region = self.region return f"projects/{project}/locations/{region}" # the returned metadata object doesn't allow for updating fields, so # we need to manually copy user-updated fields over def __update_metadata(self, metadata, k, v): new_metadata = {} new_metadata["ml_use_column_spec_id"] = metadata.ml_use_column_spec_id new_metadata["weight_column_spec_id"] = metadata.weight_column_spec_id new_metadata["target_column_spec_id"] = metadata.target_column_spec_id new_metadata[k] = v return new_metadata def __dataset_from_args( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, ): if dataset is None and dataset_display_name is None and dataset_name is None: raise ValueError( "One of 'dataset', 'dataset_name' or 'dataset_display_name' must be set." ) # we prefer to make a live call here in the case that the # dataset object is out-of-date if dataset is not None: dataset_name = dataset.name return self.get_dataset( dataset_display_name=dataset_display_name, dataset_name=dataset_name, project=project, region=region, ) def __model_from_args( self, *, model=None, model_display_name=None, model_name=None, project=None, region=None, ): if model is None and model_display_name is None and model_name is None: raise ValueError( "One of 'model', 'model_name' or 'model_display_name' must be set." ) # we prefer to make a live call here in the case that the # model object is out-of-date if model is not None: model_name = model.name return self.get_model( model_display_name=model_display_name, model_name=model_name, project=project, region=region, ) def __dataset_name_from_args( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, ): if dataset is None and dataset_display_name is None and dataset_name is None: raise ValueError( "One of 'dataset', 'dataset_name' or 'dataset_display_name' must be set." ) if dataset_name is None: if dataset is None: dataset = self.get_dataset( dataset_display_name=dataset_display_name, project=project, region=region, ) dataset_name = dataset.name else: # we do this to force a NotFound error when needed self.get_dataset(dataset_name=dataset_name, project=project, region=region) return dataset_name def __table_spec_name_from_args( self, *, table_spec_index=0, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, ): dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) table_specs = [t for t in self.list_table_specs(dataset_name=dataset_name)] table_spec_full_id = table_specs[table_spec_index].name return table_spec_full_id def __model_name_from_args( self, *, model=None, model_display_name=None, model_name=None, project=None, region=None, ): if model is None and model_display_name is None and model_name is None: raise ValueError( "One of 'model', 'model_name' or 'model_display_name' must be set." ) if model_name is None: if model is None: model = self.get_model( model_display_name=model_display_name, project=project, region=region, ) model_name = model.name else: # we do this to force a NotFound error when needed self.get_model(model_name=model_name, project=project, region=region) return model_name def __log_operation_info(self, message, op): name = "UNKNOWN" try: if ( op is not None and op.operation is not None and op.operation.name is not None ): name = op.operation.name except AttributeError: pass _LOGGER.info( ( "Operation '{}' is running in the background. The returned Operation '{}' can be used to query or block on the status of this operation. Ending your python session will _not_ cancel this operation. Read the documentation here:\n\n \thttps://googleapis.dev/python/google-api-core/latest/operation.html\n\n for more information on the Operation class." ).format(message, name) ) return op def __column_spec_name_from_args( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, project=None, region=None, ): column_specs = self.list_column_specs( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, project=project, region=region, ) if column_spec_display_name is not None: column_specs = {s.display_name: s for s in column_specs} if column_specs.get(column_spec_display_name) is None: raise exceptions.NotFound( "No column with column_spec_display_name: '{}' found".format( column_spec_display_name ) ) column_spec_name = column_specs[column_spec_display_name].name elif column_spec_name is not None: column_specs = {s.name: s for s in column_specs} if column_specs.get(column_spec_name) is None: raise exceptions.NotFound( "No column with column_spec_name: '{}' found".format( column_spec_name ) ) else: raise ValueError( "Either supply 'column_spec_name' or 'column_spec_display_name' for the column to update" ) return column_spec_name def __ensure_gcs_client_is_initialized(self, credentials, project): """Checks if GCS client is initialized. Initializes it if not. Args: credentials (google.auth.credentials.Credentials): The authorization credentials to attach to requests. These credentials identify this application to the service. If none are specified, the client will attempt to ascertain the credentials from the environment. project (str): The ID of the project to use with the GCS client. If none is specified, the client will attempt to ascertain the credentials from the environment. """ if self.gcs_client is None: self.gcs_client = gcs_client.GcsClient( project=project, credentials=credentials ) def __process_request_kwargs(self, request, **kwargs): """Add request kwargs to the request and return remaining kwargs. Some kwargs are for the request object and others are for the method itself (retry, metdata). Args: request (proto.Message) The request object. Returns: dict: kwargs to be added to the method. """ method_kwargs = copy.deepcopy(kwargs) for key, value in kwargs.items(): try: setattr(request, key, value) method_kwargs.pop(key) except (AttributeError, KeyError): continue return method_kwargs
[docs] def list_datasets(self, *, project=None, region=None, **kwargs): """List all datasets in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> ds = client.list_datasets() >>> >>> for d in ds: ... # do something ... pass ... Args: project (Optional[str]): The ID of the project that owns the datasets. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.api_core.page_iterator.PageIterator` instance. An iterable of :class:`~google.cloud.automl_v1beta1.types.Dataset` instances. You can also iterate over the pages of the response using its `pages` property. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.ListDatasetsRequest( parent=self.__location_path(project=project, region=region), ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.list_datasets(request=request, **method_kwargs)
[docs] def get_dataset( self, *, project=None, region=None, dataset_name=None, dataset_display_name=None, **kwargs, ): """Gets a single dataset in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.get_dataset(dataset_display_name='my_dataset') >>> Args: project (Optional[str]): The ID of the project that owns the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_name (Optional[str]): This is the fully-qualified name generated by the AutoML API for this dataset. This is not to be confused with the human-assigned `dataset_display_name` that is provided when creating a dataset. Either `dataset_name` or `dataset_display_name` must be provided. dataset_display_name (Optional[str]): This is the name you provided for the dataset when first creating it. Either `dataset_name` or `dataset_display_name` must be provided. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance if found, `None` otherwise. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ if dataset_name is None and dataset_display_name is None: raise ValueError( "One of 'dataset_name' or 'dataset_display_name' must be set." ) if dataset_name is not None: request = google.cloud.automl_v1beta1.GetDatasetRequest( name=dataset_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.get_dataset(request=request, **method_kwargs) return self.__lookup_by_display_name( "dataset", self.list_datasets(project=project, region=region), dataset_display_name, )
[docs] def create_dataset( self, dataset_display_name, *, metadata={}, project=None, region=None, **kwargs ): """Create a dataset. Keep in mind, importing data is a separate step. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.create_dataset(dataset_display_name='my_dataset') >>> Args: project (Optional[str]): The ID of the project that will own the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (str): A human-readable name to refer to this dataset by. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.CreateDatasetRequest( parent=self.__location_path(project=project, region=region), dataset={ "display_name": dataset_display_name, "tables_dataset_metadata": metadata, }, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.create_dataset(request=request, **method_kwargs)
[docs] def delete_dataset( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, **kwargs, ): """Deletes a dataset. This does not delete any models trained on this dataset. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> op = client.delete_dataset(dataset_display_name='my_dataset') >>> >>> op.result() # blocks on delete request >>> Args: project (Optional[str]): The ID of the project that owns the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to delete. This must be supplied if `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to delete. This must be supplied if `dataset_display_name` or `dataset` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to delete. This must be supplied if `dataset_display_name` or `dataset_name` are not supplied. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ try: dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) # delete is idempotent except exceptions.NotFound: return None request = google.cloud.automl_v1beta1.DeleteDatasetRequest( name=dataset_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) op = self.auto_ml_client.delete_dataset(request=request, **method_kwargs) self.__log_operation_info("Delete dataset", op) return op
[docs] def import_data( self, *, dataset=None, dataset_display_name=None, dataset_name=None, pandas_dataframe=None, gcs_input_uris=None, bigquery_input_uri=None, project=None, region=None, credentials=None, **kwargs, ): """Imports data into a dataset. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.create_dataset(dataset_display_name='my_dataset') >>> >>> response = client.import_data(dataset=d, ... gcs_input_uris='gs://cloud-ml-tables-data/bank-marketing.csv') ... >>> def callback(operation_future): ... result = operation_future.result() ... >>> response.add_done_callback(callback) >>> Args: project (Optional[str]): The ID of the project that owns the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. credentials (Optional[google.auth.credentials.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If none are specified, the client will attempt to ascertain the credentials from the environment. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to import data into. This must be supplied if `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to import data into. This must be supplied if `dataset_display_name` or `dataset` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to import data into. This must be supplied if `dataset_display_name` or `dataset_name` are not supplied. pandas_dataframe (Optional[pandas.DataFrame]): A Pandas Dataframe object containing the data to import. The data will be converted to CSV, and this CSV will be staged to GCS in `gs://{project}-automl-tables-staging/{uploaded_csv_name}` This parameter must be supplied if neither `gcs_input_uris` nor `bigquery_input_uri` is supplied. gcs_input_uris (Optional[Union[str, Sequence[str]]]): Either a single `gs://..` prefixed URI, or a list of URIs referring to GCS-hosted CSV files containing the data to import. This must be supplied if neither `bigquery_input_uri` nor `pandas_dataframe` is supplied. bigquery_input_uri (Optional[str]): A URI pointing to the BigQuery table containing the data to import. This must be supplied if neither `gcs_input_uris` nor `pandas_dataframe` is supplied. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) request = {} if pandas_dataframe is not None: project = project or self.project region = region or self.region credentials = credentials or self.credentials self.__ensure_gcs_client_is_initialized(credentials, project) self.gcs_client.ensure_bucket_exists(project, region) gcs_input_uri = self.gcs_client.upload_pandas_dataframe(pandas_dataframe) request = {"gcs_source": {"input_uris": [gcs_input_uri]}} elif gcs_input_uris is not None: if type(gcs_input_uris) != list: gcs_input_uris = [gcs_input_uris] request = {"gcs_source": {"input_uris": gcs_input_uris}} elif bigquery_input_uri is not None: request = {"bigquery_source": {"input_uri": bigquery_input_uri}} else: raise ValueError( "One of 'gcs_input_uris', or 'bigquery_input_uri', or 'pandas_dataframe' must be set." ) req = google.cloud.automl_v1beta1.ImportDataRequest( name=dataset_name, input_config=request ) method_kwargs = self.__process_request_kwargs(req, **kwargs) op = self.auto_ml_client.import_data(request=req, **method_kwargs) self.__log_operation_info("Data import", op) return op
[docs] def export_data( self, *, dataset=None, dataset_display_name=None, dataset_name=None, gcs_output_uri_prefix=None, bigquery_output_uri=None, project=None, region=None, **kwargs, ): """Exports data from a dataset. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.create_dataset(dataset_display_name='my_dataset') >>> >>> response = client.export_data(dataset=d, ... gcs_output_uri_prefix='gs://cloud-ml-tables-data/bank-marketing.csv') ... >>> def callback(operation_future): ... result = operation_future.result() ... >>> response.add_done_callback(callback) >>> Args: project (Optional[str]): The ID of the project that owns the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to export data from. This must be supplied if `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to export data from. This must be supplied if `dataset_display_name` or `dataset` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to export data from. This must be supplied if `dataset_display_name` or `dataset_name` are not supplied. gcs_output_uri_prefix (Optional[Union[str, Sequence[str]]]): A single `gs://..` prefixed URI to export to. This must be supplied if `bigquery_output_uri` is not. bigquery_output_uri (Optional[str]): A URI pointing to the BigQuery table containing the data to export. This must be supplied if `gcs_output_uri_prefix` is not. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) request = {} if gcs_output_uri_prefix is not None: request = {"gcs_destination": {"output_uri_prefix": gcs_output_uri_prefix}} elif bigquery_output_uri is not None: request = {"bigquery_destination": {"output_uri": bigquery_output_uri}} else: raise ValueError( "One of 'gcs_output_uri_prefix', or 'bigquery_output_uri' must be set." ) req = google.cloud.automl_v1beta1.ExportDataRequest( name=dataset_name, output_config=request ) method_kwargs = self.__process_request_kwargs(req, **kwargs) op = self.auto_ml_client.export_data(request=req, **method_kwargs) self.__log_operation_info("Export data", op) return op
[docs] def get_table_spec(self, table_spec_name, *, project=None, region=None, **kwargs): """Gets a single table spec in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.get_table_spec('my_table_spec') >>> Args: table_spec_name (str): This is the fully-qualified name generated by the AutoML API for this table spec. project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.TableSpec` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.GetTableSpecRequest( name=table_spec_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.get_table_spec(request=request, **method_kwargs)
[docs] def list_table_specs( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, **kwargs, ): """Lists table specs. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> for s in client.list_table_specs(dataset_display_name='my_dataset') ... # process the spec ... pass ... Args: project (Optional[str]): The ID of the project that owns the dataset. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to read specs from. This must be supplied if `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to read specs from. This must be supplied if `dataset_display_name` or `dataset` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to read specs from. This must be supplied if `dataset_display_name` or `dataset_name` are not supplied. Returns: A :class:`~google.api_core.page_iterator.PageIterator` instance. An iterable of :class:`~google.cloud.automl_v1beta1.types.TableSpec` instances. You can also iterate over the pages of the response using its `pages` property. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) request = google.cloud.automl_v1beta1.ListTableSpecsRequest( parent=dataset_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.list_table_specs(request=request, **method_kwargs)
[docs] def get_column_spec(self, column_spec_name, *, project=None, region=None, **kwargs): """Gets a single column spec in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.get_column_spec('my_column_spec') >>> Args: column_spec_name (str): This is the fully-qualified name generated by the AutoML API for this column spec. project (Optional[str]): The ID of the project that owns the column. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.ColumnSpec` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.GetColumnSpecRequest( name=column_spec_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.get_column_spec(request=request, **method_kwargs)
[docs] def list_column_specs( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, project=None, region=None, **kwargs, ): """Lists column specs. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> for s in client.list_column_specs(dataset_display_name='my_dataset') ... # process the spec ... pass ... Args: project (Optional[str]): The ID of the project that owns the columns. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose specs you want to read. If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` was provided, we use this index to determine which table to read column specs from. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to read specs from. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to read specs from. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to read specs from. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to read specs from. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to read specs from. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to read specs from. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.api_core.page_iterator.PageIterator` instance. An iterable of :class:`~google.cloud.automl_v1beta1.types.ColumnSpec` instances. You can also iterate over the pages of the response using its `pages` property. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ if table_spec_name is None: table_specs = [ t for t in self.list_table_specs( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, project=project, region=region, ) ] table_spec_name = table_specs[table_spec_index].name request = google.cloud.automl_v1beta1.ListColumnSpecsRequest( parent=table_spec_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.list_column_specs(request=request, **method_kwargs)
[docs] def update_column_spec( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, type_code=None, nullable=None, project=None, region=None, **kwargs, ): """Updates a column's specs. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.update_column_spec(dataset_display_name='my_dataset', ... column_spec_display_name='Outcome', ... type_code=automl_v1beta1.TypeCode.CATEGORY) ... Args: dataset (Optional[Dataset]): The `Dataset` instance you want to update specs on. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update specs on. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update specs on. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update specs on. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update specs one. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update specs on. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose specs you want to update. If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` was provided, we use this index to determine which table to update column specs on. column_spec_name (Optional[str]): The name AutoML-assigned name for the column you want to update. column_spec_display_name (Optional[str]): The human-readable name of the column you want to update. If this is supplied in place of `column_spec_name`, you also need to provide either a way to lookup the source dataset (using one of the `dataset*` kwargs), or the `table_spec_name` of the table this column belongs to. type_code (Optional[str]): The desired 'type_code' of the column. For more information on the available types, please see the documentation: https://cloud.google.com/automl-tables/docs/reference/rpc/google.cloud.automl.v1beta1#typecode nullable (Optional[bool]): Set to `True` or `False` to specify if this column's value must expected to be present in all rows or not. project (Optional[str]): The ID of the project that owns the columns. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.ColumnSpec` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ column_spec_name = self.__column_spec_name_from_args( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, column_spec_name=column_spec_name, column_spec_display_name=column_spec_display_name, project=project, region=region, ) # type code must always be set if type_code is None: # this index is safe, we would have already thrown a NotFound # had the column_spec_name not existed type_code = { s.name: s for s in self.list_column_specs( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, project=project, region=region, ) }[column_spec_name].data_type.type_code data_type = {} if nullable is not None: data_type["nullable"] = nullable data_type["type_code"] = google.cloud.automl_v1beta1.TypeCode(type_code) request = google.cloud.automl_v1beta1.UpdateColumnSpecRequest( column_spec={"name": column_spec_name, "data_type": data_type} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_column_spec(request=request, **method_kwargs)
[docs] def set_target_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, project=None, region=None, **kwargs, ): """Sets the target column for a given table. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.set_target_column(dataset_display_name='my_dataset', ... column_spec_display_name='Income') ... Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. column_spec_name (Optional[str]): The name AutoML-assigned name for the column you want to set as the target column. column_spec_display_name (Optional[str]): The human-readable name of the column you want to set as the target column. If this is supplied in place of `column_spec_name`, you also need to provide either a way to lookup the source dataset (using one of the `dataset*` kwargs), or the `table_spec_name` of the table this column belongs to. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose target column you want to set . If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` or `column_spec_name` was provided, we use this index to determine which table to set the target column on. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the target column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the target column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the target column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the target column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the target column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the target column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ column_spec_name = self.__column_spec_name_from_args( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, column_spec_name=column_spec_name, column_spec_display_name=column_spec_display_name, project=project, region=region, ) column_spec_id = column_spec_name.rsplit("/", 1)[-1] dataset = self.__dataset_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) metadata = dataset.tables_dataset_metadata metadata = self.__update_metadata( metadata, "target_column_spec_id", column_spec_id ) request = google.cloud.automl_v1beta1.UpdateDatasetRequest( dataset={"name": dataset.name, "tables_dataset_metadata": metadata} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_dataset(request=request, **method_kwargs)
[docs] def set_time_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, project=None, region=None, **kwargs, ): """Sets the time column which designates which data will be of type timestamp and will be used for the timeseries data. This column must be of type timestamp. Example: >>> from google.cloud import automl_v1beta1 >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.set_time_column(dataset_display_name='my_dataset', ... column_spec_display_name='Unix Time') ... Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. column_spec_name (Optional[str]): The name AutoML-assigned name for the column you want to set as the time column. column_spec_display_name (Optional[str]): The human-readable name of the column you want to set as the time column. If this is supplied in place of `column_spec_name`, you also need to provide either a way to lookup the source dataset (using one of the `dataset*` kwargs), or the `table_spec_name` of the table this column belongs to. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose time column you want to set . If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` or `column_spec_name` was provided, we use this index to determine which table to set the time column on. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.TableSpec` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ column_spec_name = self.__column_spec_name_from_args( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, column_spec_name=column_spec_name, column_spec_display_name=column_spec_display_name, project=project, region=region, ) column_spec_id = column_spec_name.rsplit("/", 1)[-1] dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) table_spec_full_id = self.__table_spec_name_from_args(dataset_name=dataset_name) request = google.cloud.automl_v1beta1.UpdateTableSpecRequest( table_spec={ "name": table_spec_full_id, "time_column_spec_id": column_spec_id, } ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_table_spec(request=request, **method_kwargs)
[docs] def clear_time_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, **kwargs, ): """Clears the time column which designates which data will be of type timestamp and will be used for the timeseries data. Example: >>> from google.cloud import automl_v1beta1 >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.clear_time_column(dataset_display_name='my_dataset') >>> Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the time column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the time column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.TableSpec` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) table_spec_full_id = self.__table_spec_name_from_args(dataset_name=dataset_name) my_table_spec = {"name": table_spec_full_id, "time_column_spec_id": None} request = google.cloud.automl_v1beta1.UpdateTableSpecRequest( table_spec=my_table_spec ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_table_spec(request=request, **method_kwargs)
[docs] def set_weight_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, project=None, region=None, **kwargs, ): """Sets the weight column for a given table. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.set_weight_column(dataset_display_name='my_dataset', ... column_spec_display_name='Income') ... Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. column_spec_name (Optional[str]): The name AutoML-assigned name for the column you want to set as the weight column. column_spec_display_name (Optional[str]): The human-readable name of the column you want to set as the weight column. If this is supplied in place of `column_spec_name`, you also need to provide either a way to lookup the source dataset (using one of the `dataset*` kwargs), or the `table_spec_name` of the table this column belongs to. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose weight column you want to set . If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` or `column_spec_name` was provided, we use this index to determine which table to set the weight column on. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ column_spec_name = self.__column_spec_name_from_args( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, column_spec_name=column_spec_name, column_spec_display_name=column_spec_display_name, project=project, region=region, ) column_spec_id = column_spec_name.rsplit("/", 1)[-1] dataset = self.__dataset_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) metadata = dataset.tables_dataset_metadata metadata = self.__update_metadata( metadata, "weight_column_spec_id", column_spec_id ) request = google.cloud.automl_v1beta1.UpdateDatasetRequest( dataset={"name": dataset.name, "tables_dataset_metadata": metadata} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_dataset(request=request, **method_kwargs)
[docs] def clear_weight_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, **kwargs, ): """Clears the weight column for a given dataset. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.clear_weight_column(dataset_display_name='my_dataset') >>> Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the weight column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the weight column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset = self.__dataset_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) metadata = dataset.tables_dataset_metadata metadata = self.__update_metadata(metadata, "weight_column_spec_id", None) request = google.cloud.automl_v1beta1.UpdateDatasetRequest( dataset={"name": dataset.name, "tables_dataset_metadata": metadata} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_dataset(request=request, **method_kwargs)
[docs] def set_test_train_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, table_spec_name=None, table_spec_index=0, column_spec_name=None, column_spec_display_name=None, project=None, region=None, **kwargs, ): """Sets the test/train (ml_use) column which designates which data belongs to the test and train sets. This column must be categorical. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.set_test_train_column(dataset_display_name='my_dataset', ... column_spec_display_name='TestSplit') ... Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. column_spec_name (Optional[str]): The name AutoML-assigned name for the column you want to set as the test/train column. column_spec_display_name (Optional[str]): The human-readable name of the column you want to set as the test/train column. If this is supplied in place of `column_spec_name`, you also need to provide either a way to lookup the source dataset (using one of the `dataset*` kwargs), or the `table_spec_name` of the table this column belongs to. table_spec_name (Optional[str]): The AutoML-assigned name for the table whose test/train column you want to set . If not supplied, the client can determine this name from a source `Dataset` object. table_spec_index (Optional[int]): If no `table_spec_name` or `column_spec_name` was provided, we use this index to determine which table to set the test/train column on. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ column_spec_name = self.__column_spec_name_from_args( dataset=dataset, dataset_display_name=dataset_display_name, dataset_name=dataset_name, table_spec_name=table_spec_name, table_spec_index=table_spec_index, column_spec_name=column_spec_name, column_spec_display_name=column_spec_display_name, project=project, region=region, ) column_spec_id = column_spec_name.rsplit("/", 1)[-1] dataset = self.__dataset_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) metadata = dataset.tables_dataset_metadata metadata = self.__update_metadata( metadata, "ml_use_column_spec_id", column_spec_id ) request = google.cloud.automl_v1beta1.UpdateDatasetRequest( dataset={"name": dataset.name, "tables_dataset_metadata": metadata} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_dataset(request=request, **method_kwargs)
[docs] def clear_test_train_column( self, *, dataset=None, dataset_display_name=None, dataset_name=None, project=None, region=None, **kwargs, ): """Clears the test/train (ml_use) column which designates which data belongs to the test and train sets. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.clear_test_train_column(dataset_display_name='my_dataset') >>> Args: project (Optional[str]): The ID of the project that owns the table. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset` or `dataset_display_name` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to update the test/train column of. If no `table_spec_name` is supplied, this will be used together with `table_spec_index` to infer the name of table to update the test/train column of. This must be supplied if `table_spec_name`, `dataset_name` or `dataset_display_name` are not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.Dataset` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ dataset = self.__dataset_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) metadata = dataset.tables_dataset_metadata metadata = self.__update_metadata(metadata, "ml_use_column_spec_id", None) request = google.cloud.automl_v1beta1.UpdateDatasetRequest( dataset={"name": dataset.name, "tables_dataset_metadata": metadata} ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.update_dataset(request=request, **method_kwargs)
[docs] def list_models(self, *, project=None, region=None, **kwargs): """List all models in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> ms = client.list_models() >>> >>> for m in ms: ... # do something ... pass ... Args: project (Optional[str]): The ID of the project that owns the models. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.api_core.page_iterator.PageIterator` instance. An iterable of :class:`~google.cloud.automl_v1beta1.types.Model` instances. You can also iterate over the pages of the response using its `pages` property. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.ListModelsRequest( parent=self.__location_path(project=project, region=region), ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.list_models(request=request, **method_kwargs)
[docs] def list_model_evaluations( self, *, project=None, region=None, model=None, model_display_name=None, model_name=None, **kwargs, ): """List all model evaluations for a given model. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> ms = client.list_model_evaluations(model_display_name='my_model') >>> >>> for m in ms: ... # do something ... pass ... Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_display_name (Optional[str]): The human-readable name given to the model you want to list evaluations for. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to list evaluations for. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to list evaluations for. This must be supplied if `model_display_name` or `model_name` are not supplied. Returns: A :class:`~google.api_core.page_iterator.PageIterator` instance. An iterable of :class:`~google.cloud.automl_v1beta1.types.ModelEvaluation` instances. You can also iterate over the pages of the response using its `pages` property. For a regression model, there will only be one evaluation. For a classification model there will be on for each classification label, as well as one for micro-averaged metrics. See more documentation here: https://cloud.google.com/automl-tables/docs/evaluate#automl-tables-list-model-evaluations-cli-curl:w Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ model_name = self.__model_name_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) request = google.cloud.automl_v1beta1.ListModelEvaluationsRequest( parent=model_name, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.list_model_evaluations( request=request, **method_kwargs )
[docs] def create_model( self, model_display_name, *, dataset=None, dataset_display_name=None, dataset_name=None, train_budget_milli_node_hours=None, optimization_objective=None, project=None, region=None, model_metadata=None, include_column_spec_names=None, exclude_column_spec_names=None, disable_early_stopping=False, **kwargs, ): """Create a model. This will train your model on the given dataset. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> m = client.create_model( ... 'my_model', ... dataset_display_name='my_dataset', ... train_budget_milli_node_hours=1000 ... ) >>> >>> m.result() # blocks on result >>> Args: project (Optional[str]): The ID of the project that will own the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_display_name (str): A human-readable name to refer to this model by. train_budget_milli_node_hours (int): The amount of time (in thousandths of an hour) to spend training. This value must be between 1,000 and 72,000 inclusive (between 1 and 72 hours). optimization_objective (str): The metric AutoML tables should optimize for. dataset_display_name (Optional[str]): The human-readable name given to the dataset you want to train your model on. This must be supplied if `dataset` or `dataset_name` are not supplied. dataset_name (Optional[str]): The AutoML-assigned name given to the dataset you want to train your model on. This must be supplied if `dataset_display_name` or `dataset` are not supplied. dataset (Optional[Dataset]): The `Dataset` instance you want to train your model on. This must be supplied if `dataset_display_name` or `dataset_name` are not supplied. model_metadata (Optional[Dict]): Optional model metadata to supply to the client. include_column_spec_names(Optional[str]): The list of the names of the columns you want to include to train your model on. exclude_column_spec_names(Optional[str]): The list of the names of the columns you want to exclude and not train your model on. disable_early_stopping(Optional[bool]): True if disable early stopping. By default, the early stopping feature is enabled, which means that AutoML Tables might stop training before the entire training budget has been used. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ if model_metadata is None: model_metadata = {} if ( train_budget_milli_node_hours is None or train_budget_milli_node_hours < 1000 or train_budget_milli_node_hours > 72000 ): raise ValueError( "'train_budget_milli_node_hours' must be a value between 1,000 and 72,000 inclusive" ) if exclude_column_spec_names not in [ None, [], ] and include_column_spec_names not in [None, []]: raise ValueError( "Cannot set both 'exclude_column_spec_names' and 'include_column_spec_names'" ) dataset_name = self.__dataset_name_from_args( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, project=project, region=region, ) model_metadata["train_budget_milli_node_hours"] = train_budget_milli_node_hours if optimization_objective is not None: model_metadata["optimization_objective"] = optimization_objective if disable_early_stopping: model_metadata["disable_early_stopping"] = True dataset_id = dataset_name.rsplit("/", 1)[-1] columns = [ s for s in self.list_column_specs( dataset=dataset, dataset_name=dataset_name, dataset_display_name=dataset_display_name, ) ] final_columns = [] if include_column_spec_names: for c in columns: if c.display_name in include_column_spec_names: final_columns.append(c) model_metadata["input_feature_column_specs"] = final_columns elif exclude_column_spec_names: for a in columns: if a.display_name not in exclude_column_spec_names: final_columns.append(a) model_metadata["input_feature_column_specs"] = final_columns req = google.cloud.automl_v1beta1.CreateModelRequest( parent=self.__location_path(project=project, region=region), model=google.cloud.automl_v1beta1.Model( display_name=model_display_name, dataset_id=dataset_id, tables_model_metadata=google.cloud.automl_v1beta1.TablesModelMetadata( model_metadata ), ), ) method_kwargs = self.__process_request_kwargs(req, **kwargs) op = self.auto_ml_client.create_model(request=req, **method_kwargs) self.__log_operation_info("Model creation", op) return op
[docs] def delete_model( self, *, model=None, model_display_name=None, model_name=None, project=None, region=None, **kwargs, ): """Deletes a model. Note this will not delete any datasets associated with this model. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> op = client.delete_model(model_display_name='my_model') >>> >>> op.result() # blocks on delete request >>> Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_display_name (Optional[str]): The human-readable name given to the model you want to delete. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to delete. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to delete. This must be supplied if `model_display_name` or `model_name` are not supplied. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ try: model_name = self.__model_name_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) # delete is idempotent except exceptions.NotFound: return None request = google.cloud.automl_v1beta1.DeleteModelRequest(name=model_name) method_kwargs = self.__process_request_kwargs(request, **kwargs) op = self.auto_ml_client.delete_model(request=request, **method_kwargs) self.__log_operation_info("Delete model", op) return op
[docs] def get_model_evaluation( self, model_evaluation_name, *, project=None, region=None, **kwargs ): """Gets a single evaluation model in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.get_model_evaluation('my_model_evaluation') >>> Args: model_evaluation_name (str): This is the fully-qualified name generated by the AutoML API for this model evaluation. project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. Returns: A :class:`~google.cloud.automl_v1beta1.types.ModelEvaluation` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ request = google.cloud.automl_v1beta1.GetModelEvaluationRequest( name=model_evaluation_name ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.auto_ml_client.get_model_evaluation( request=request, **method_kwargs )
[docs] def get_model( self, *, project=None, region=None, model_name=None, model_display_name=None, **kwargs, ): """Gets a single model in a particular project and region. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> d = client.get_model(model_display_name='my_model') >>> Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_name (Optional[str]): This is the fully-qualified name generated by the AutoML API for this model. This is not to be confused with the human-assigned `model_display_name` that is provided when creating a model. Either `model_name` or `model_display_name` must be provided. model_display_name (Optional[str]): This is the name you provided for the model when first creating it. Either `model_name` or `model_display_name` must be provided. Returns: A :class:`~google.cloud.automl_v1beta1.types.Model` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ if model_name is None and model_display_name is None: raise ValueError("One of 'model_name' or 'model_display_name' must be set.") if model_name is not None: return self.auto_ml_client.get_model(name=model_name) return self.__lookup_by_display_name( "model", self.list_models(project=project, region=region), model_display_name, )
# TODO(jonathanskim): allow deployment from just model ID
[docs] def deploy_model( self, *, model=None, model_name=None, model_display_name=None, project=None, region=None, **kwargs, ): """Deploys a model. This allows you make online predictions using the model you've deployed. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> op = client.deploy_model(model_display_name='my_model') >>> >>> op.result() # blocks on deploy request >>> Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_display_name (Optional[str]): The human-readable name given to the model you want to deploy. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to deploy. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to deploy. This must be supplied if `model_display_name` or `model_name` are not supplied. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ model_name = self.__model_name_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) request = google.cloud.automl_v1beta1.DeployModelRequest(name=model_name) method_kwargs = self.__process_request_kwargs(request, **kwargs) op = self.auto_ml_client.deploy_model(request=request, **method_kwargs) self.__log_operation_info("Deploy model", op) return op
[docs] def undeploy_model( self, *, model=None, model_name=None, model_display_name=None, project=None, region=None, **kwargs, ): """Undeploys a model. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> op = client.undeploy_model(model_display_name='my_model') >>> >>> op.result() # blocks on undeploy request >>> Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. model_display_name (Optional[str]): The human-readable name given to the model you want to undeploy. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to undeploy. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to undeploy. This must be supplied if `model_display_name` or `model_name` are not supplied. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ model_name = self.__model_name_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) request = google.cloud.automl_v1beta1.UndeployModelRequest(name=model_name) method_kwargs = self.__process_request_kwargs(request=request, **kwargs) op = self.auto_ml_client.undeploy_model(request=request, **method_kwargs) self.__log_operation_info("Undeploy model", op) return op
## TODO(lwander): support pandas DataFrame as input type
[docs] def predict( self, inputs, *, model=None, model_name=None, model_display_name=None, feature_importance=False, project=None, region=None, **kwargs, ): """Makes a prediction on a deployed model. This will fail if the model was not deployed. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.predict(inputs={'Age': 30, 'Income': 12, 'Category': 'A'} ... model_display_name='my_model') ... >>> client.predict([30, 12, 'A'], model_display_name='my_model') >>> Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. inputs (Union[List[str], Dict[str, str]]): Either the sorted list of column values to predict with, or a key-value map of column display name to value to predict with. model_display_name (Optional[str]): The human-readable name given to the model you want to predict with. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to predict with. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to predict with . This must be supplied if `model_display_name` or `model_name` are not supplied. feature_importance (bool): True if enable feature importance explainability. The default is False. Returns: A :class:`~google.cloud.automl_v1beta1.types.PredictResponse` instance. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ model = self.__model_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) column_specs = model.tables_model_metadata.input_feature_column_specs if type(inputs) == dict: inputs = [inputs.get(c.display_name, None) for c in column_specs] if len(inputs) != len(column_specs): raise ValueError( ( "Dimension mismatch, the number of provided inputs ({}) does not match that of the model ({})" ).format(len(inputs), len(column_specs)) ) values = [] for i, c in zip(inputs, column_specs): value_type, err = to_proto_value(i) if err is not None: raise ValueError(err) values.append(value_type) row = data_items.Row() # append each row value separately until issue is resovled # https://github.com/googleapis/proto-plus-python/issues/104 for v in values: row.values.append(v) payload = data_items.ExamplePayload(row=row) params = None if feature_importance: params = {"feature_importance": "true"} request = google.cloud.automl_v1beta1.PredictRequest( name=model.name, payload=payload, params=params, ) method_kwargs = self.__process_request_kwargs(request, **kwargs) return self.prediction_client.predict(request=request, **method_kwargs)
[docs] def batch_predict( self, *, pandas_dataframe=None, bigquery_input_uri=None, bigquery_output_uri=None, gcs_input_uris=None, gcs_output_uri_prefix=None, model=None, model_name=None, model_display_name=None, project=None, region=None, credentials=None, inputs=None, params={}, **kwargs, ): """Makes a batch prediction on a model. This does _not_ require the model to be deployed. Example: >>> from google.cloud import automl_v1beta1 >>> >>> from google.oauth2 import service_account >>> >>> client = automl_v1beta1.TablesClient( ... credentials=service_account.Credentials.from_service_account_file('~/.gcp/account.json'), ... project='my-project', region='us-central1') ... >>> client.batch_predict( ... gcs_input_uris='gs://inputs/input.csv', ... gcs_output_uri_prefix='gs://outputs/', ... model_display_name='my_model' ... ).result() ... Args: project (Optional[str]): The ID of the project that owns the model. If you have initialized the client with a value for `project` it will be used if this parameter is not supplied. Keep in mind, the service account this client was initialized with must have access to this project. region (Optional[str]): If you have initialized the client with a value for `region` it will be used if this parameter is not supplied. credentials (Optional[google.auth.credentials.Credentials]): The authorization credentials to attach to requests. These credentials identify this application to the service. If none are specified, the client will attempt to ascertain the credentials from the environment. pandas_dataframe (Optional[pandas.DataFrame]): A Pandas Dataframe object containing the data you want to predict off of. The data will be converted to CSV, and this CSV will be staged to GCS in `gs://{project}-automl-tables-staging/{uploaded_csv_name}` This must be supplied if neither `gcs_input_uris` nor `bigquery_input_uri` is supplied. gcs_input_uris (Optional(Union[List[str], str])) Either a list of or a single GCS URI containing the data you want to predict off of. This must be supplied if neither `pandas_dataframe` nor `bigquery_input_uri` is supplied. gcs_output_uri_prefix (Optional[str]) The folder in GCS you want to write output to. This must be supplied if `bigquery_output_uri` is not. bigquery_input_uri (Optional[str]) The BigQuery table to input data from. This must be supplied if neither `pandas_dataframe` nor `gcs_input_uris` is supplied. bigquery_output_uri (Optional[str]) The BigQuery table to output data to. This must be supplied if `gcs_output_uri_prefix` is not. model_display_name (Optional[str]): The human-readable name given to the model you want to predict with. This must be supplied if `model` or `model_name` are not supplied. model_name (Optional[str]): The AutoML-assigned name given to the model you want to predict with. This must be supplied if `model_display_name` or `model` are not supplied. model (Optional[model]): The `model` instance you want to predict with . This must be supplied if `model_display_name` or `model_name` are not supplied. params (Optional[dict]): Additional domain-specific parameters for the predictions, any string must be up to 25000 characters long. Returns: google.api_core.operation.Operation: An operation future that can be used to check for completion synchronously or asynchronously. Raises: google.api_core.exceptions.GoogleAPICallError: If the request failed for any reason. google.api_core.exceptions.RetryError: If the request failed due to a retryable error and retry attempts failed. ValueError: If required parameters are missing. """ model_name = self.__model_name_from_args( model=model, model_name=model_name, model_display_name=model_display_name, project=project, region=region, ) input_request = None if pandas_dataframe is not None: project = project or self.project region = region or self.region credentials = credentials or self.credentials self.__ensure_gcs_client_is_initialized(credentials, project) self.gcs_client.ensure_bucket_exists(project, region) gcs_input_uri = self.gcs_client.upload_pandas_dataframe(pandas_dataframe) input_request = {"gcs_source": {"input_uris": [gcs_input_uri]}} elif gcs_input_uris is not None: if type(gcs_input_uris) != list: gcs_input_uris = [gcs_input_uris] input_request = {"gcs_source": {"input_uris": gcs_input_uris}} elif bigquery_input_uri is not None: input_request = {"bigquery_source": {"input_uri": bigquery_input_uri}} else: raise ValueError( "One of 'gcs_input_uris'/'bigquery_input_uris' must be set" ) output_request = None if gcs_output_uri_prefix is not None: output_request = { "gcs_destination": {"output_uri_prefix": gcs_output_uri_prefix} } elif bigquery_output_uri is not None: output_request = { "bigquery_destination": {"output_uri": bigquery_output_uri} } else: raise ValueError( "One of 'gcs_output_uri_prefix'/'bigquery_output_uri' must be set" ) req = google.cloud.automl_v1beta1.BatchPredictRequest( name=model_name, input_config=input_request, output_config=output_request, params=params, ) method_kwargs = self.__process_request_kwargs(req, **kwargs) op = self.prediction_client.batch_predict(request=req, **method_kwargs) self.__log_operation_info("Batch predict", op) return op