As of January 1, 2020 this library no longer supports Python 2 on the latest released version.
Library versions released prior to that date will continue to be available. For more information please
visit Python 2 support on Google Cloud.
Source code for google.ai.generativelanguage_v1alpha.types.retriever
# -*- coding: utf-8 -*-
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import annotations
from typing import MutableMapping, MutableSequence
import google.protobuf.timestamp_pb2 as timestamp_pb2 # type: ignore
import proto # type: ignore
__protobuf__ = proto.module(
package="google.ai.generativelanguage.v1alpha",
manifest={
"Corpus",
"Document",
"StringList",
"CustomMetadata",
"MetadataFilter",
"Condition",
"Chunk",
"ChunkData",
},
)
[docs]class Corpus(proto.Message):
r"""A ``Corpus`` is a collection of ``Document``\ s. A project can
create up to 5 corpora.
Attributes:
name (str):
Immutable. Identifier. The ``Corpus`` resource name. The ID
(name excluding the "corpora/" prefix) can contain up to 40
characters that are lowercase alphanumeric or dashes (-).
The ID cannot start or end with a dash. If the name is empty
on create, a unique name will be derived from
``display_name`` along with a 12 character random suffix.
Example: ``corpora/my-awesome-corpora-123a456b789c``
display_name (str):
Optional. The human-readable display name for the
``Corpus``. The display name must be no more than 512
characters in length, including spaces. Example: "Docs on
Semantic Retriever".
create_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Corpus`` was
created.
update_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Corpus`` was last
updated.
"""
name: str = proto.Field(
proto.STRING,
number=1,
)
display_name: str = proto.Field(
proto.STRING,
number=2,
)
create_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=3,
message=timestamp_pb2.Timestamp,
)
update_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=4,
message=timestamp_pb2.Timestamp,
)
[docs]class Document(proto.Message):
r"""A ``Document`` is a collection of ``Chunk``\ s. A ``Corpus`` can
have a maximum of 10,000 ``Document``\ s.
Attributes:
name (str):
Immutable. Identifier. The ``Document`` resource name. The
ID (name excluding the "corpora/\*/documents/" prefix) can
contain up to 40 characters that are lowercase alphanumeric
or dashes (-). The ID cannot start or end with a dash. If
the name is empty on create, a unique name will be derived
from ``display_name`` along with a 12 character random
suffix. Example:
``corpora/{corpus_id}/documents/my-awesome-doc-123a456b789c``
display_name (str):
Optional. The human-readable display name for the
``Document``. The display name must be no more than 512
characters in length, including spaces. Example: "Semantic
Retriever Documentation".
custom_metadata (MutableSequence[google.ai.generativelanguage_v1alpha.types.CustomMetadata]):
Optional. User provided custom metadata stored as key-value
pairs used for querying. A ``Document`` can have a maximum
of 20 ``CustomMetadata``.
update_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Document`` was last
updated.
create_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Document`` was
created.
"""
name: str = proto.Field(
proto.STRING,
number=1,
)
display_name: str = proto.Field(
proto.STRING,
number=2,
)
custom_metadata: MutableSequence["CustomMetadata"] = proto.RepeatedField(
proto.MESSAGE,
number=3,
message="CustomMetadata",
)
update_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=4,
message=timestamp_pb2.Timestamp,
)
create_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=5,
message=timestamp_pb2.Timestamp,
)
[docs]class StringList(proto.Message):
r"""User provided string values assigned to a single metadata
key.
Attributes:
values (MutableSequence[str]):
The string values of the metadata to store.
"""
values: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=1,
)
[docs]class CustomMetadata(proto.Message):
r"""User provided metadata stored as key-value pairs.
This message has `oneof`_ fields (mutually exclusive fields).
For each oneof, at most one member field can be set at the same time.
Setting any member of the oneof automatically clears all other
members.
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
Attributes:
string_value (str):
The string value of the metadata to store.
This field is a member of `oneof`_ ``value``.
string_list_value (google.ai.generativelanguage_v1alpha.types.StringList):
The StringList value of the metadata to
store.
This field is a member of `oneof`_ ``value``.
numeric_value (float):
The numeric value of the metadata to store.
This field is a member of `oneof`_ ``value``.
key (str):
Required. The key of the metadata to store.
"""
string_value: str = proto.Field(
proto.STRING,
number=2,
oneof="value",
)
string_list_value: "StringList" = proto.Field(
proto.MESSAGE,
number=6,
oneof="value",
message="StringList",
)
numeric_value: float = proto.Field(
proto.FLOAT,
number=7,
oneof="value",
)
key: str = proto.Field(
proto.STRING,
number=1,
)
[docs]class MetadataFilter(proto.Message):
r"""User provided filter to limit retrieval based on ``Chunk`` or
``Document`` level metadata values. Example (genre = drama OR genre
= action): key = "document.custom_metadata.genre" conditions =
[{string_value = "drama", operation = EQUAL}, {string_value =
"action", operation = EQUAL}]
Attributes:
key (str):
Required. The key of the metadata to filter
on.
conditions (MutableSequence[google.ai.generativelanguage_v1alpha.types.Condition]):
Required. The ``Condition``\ s for the given key that will
trigger this filter. Multiple ``Condition``\ s are joined by
logical ORs.
"""
key: str = proto.Field(
proto.STRING,
number=1,
)
conditions: MutableSequence["Condition"] = proto.RepeatedField(
proto.MESSAGE,
number=2,
message="Condition",
)
[docs]class Condition(proto.Message):
r"""Filter condition applicable to a single key.
This message has `oneof`_ fields (mutually exclusive fields).
For each oneof, at most one member field can be set at the same time.
Setting any member of the oneof automatically clears all other
members.
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
Attributes:
string_value (str):
The string value to filter the metadata on.
This field is a member of `oneof`_ ``value``.
numeric_value (float):
The numeric value to filter the metadata on.
This field is a member of `oneof`_ ``value``.
operation (google.ai.generativelanguage_v1alpha.types.Condition.Operator):
Required. Operator applied to the given
key-value pair to trigger the condition.
"""
[docs] class Operator(proto.Enum):
r"""Defines the valid operators that can be applied to a
key-value pair.
Values:
OPERATOR_UNSPECIFIED (0):
The default value. This value is unused.
LESS (1):
Supported by numeric.
LESS_EQUAL (2):
Supported by numeric.
EQUAL (3):
Supported by numeric & string.
GREATER_EQUAL (4):
Supported by numeric.
GREATER (5):
Supported by numeric.
NOT_EQUAL (6):
Supported by numeric & string.
INCLUDES (7):
Supported by string only when ``CustomMetadata`` value type
for the given key has a ``string_list_value``.
EXCLUDES (8):
Supported by string only when ``CustomMetadata`` value type
for the given key has a ``string_list_value``.
"""
OPERATOR_UNSPECIFIED = 0
LESS = 1
LESS_EQUAL = 2
EQUAL = 3
GREATER_EQUAL = 4
GREATER = 5
NOT_EQUAL = 6
INCLUDES = 7
EXCLUDES = 8
string_value: str = proto.Field(
proto.STRING,
number=1,
oneof="value",
)
numeric_value: float = proto.Field(
proto.FLOAT,
number=6,
oneof="value",
)
operation: Operator = proto.Field(
proto.ENUM,
number=5,
enum=Operator,
)
[docs]class Chunk(proto.Message):
r"""A ``Chunk`` is a subpart of a ``Document`` that is treated as an
independent unit for the purposes of vector representation and
storage. A ``Corpus`` can have a maximum of 1 million ``Chunk``\ s.
Attributes:
name (str):
Immutable. Identifier. The ``Chunk`` resource name. The ID
(name excluding the "corpora/*/documents/*/chunks/" prefix)
can contain up to 40 characters that are lowercase
alphanumeric or dashes (-). The ID cannot start or end with
a dash. If the name is empty on create, a random
12-character unique ID will be generated. Example:
``corpora/{corpus_id}/documents/{document_id}/chunks/123a456b789c``
data (google.ai.generativelanguage_v1alpha.types.ChunkData):
Required. The content for the ``Chunk``, such as the text
string. The maximum number of tokens per chunk is 2043.
custom_metadata (MutableSequence[google.ai.generativelanguage_v1alpha.types.CustomMetadata]):
Optional. User provided custom metadata stored as key-value
pairs. The maximum number of ``CustomMetadata`` per chunk is
20.
create_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Chunk`` was
created.
update_time (google.protobuf.timestamp_pb2.Timestamp):
Output only. The Timestamp of when the ``Chunk`` was last
updated.
state (google.ai.generativelanguage_v1alpha.types.Chunk.State):
Output only. Current state of the ``Chunk``.
"""
[docs] class State(proto.Enum):
r"""States for the lifecycle of a ``Chunk``.
Values:
STATE_UNSPECIFIED (0):
The default value. This value is used if the
state is omitted.
STATE_PENDING_PROCESSING (1):
``Chunk`` is being processed (embedding and vector storage).
STATE_ACTIVE (2):
``Chunk`` is processed and available for querying.
STATE_FAILED (10):
``Chunk`` failed processing.
"""
STATE_UNSPECIFIED = 0
STATE_PENDING_PROCESSING = 1
STATE_ACTIVE = 2
STATE_FAILED = 10
name: str = proto.Field(
proto.STRING,
number=1,
)
data: "ChunkData" = proto.Field(
proto.MESSAGE,
number=2,
message="ChunkData",
)
custom_metadata: MutableSequence["CustomMetadata"] = proto.RepeatedField(
proto.MESSAGE,
number=3,
message="CustomMetadata",
)
create_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=4,
message=timestamp_pb2.Timestamp,
)
update_time: timestamp_pb2.Timestamp = proto.Field(
proto.MESSAGE,
number=5,
message=timestamp_pb2.Timestamp,
)
state: State = proto.Field(
proto.ENUM,
number=6,
enum=State,
)
[docs]class ChunkData(proto.Message):
r"""Extracted data that represents the ``Chunk`` content.
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
Attributes:
string_value (str):
The ``Chunk`` content as a string. The maximum number of
tokens per chunk is 2043.
This field is a member of `oneof`_ ``data``.
"""
string_value: str = proto.Field(
proto.STRING,
number=1,
oneof="data",
)
__all__ = tuple(sorted(__protobuf__.manifest))