Class: Google::Cloud::DocumentAI::V1::DocumentSchema

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/documentai/v1/document_schema.rb

Overview

The schema defines the output of the processed document by a processor.

Defined Under Namespace

Classes: EntityType, Metadata

Instance Attribute Summary collapse

Instance Attribute Details

#description::String

Returns Description of the schema.

Returns:

  • (::String)

    Description of the schema.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'proto_docs/google/cloud/documentai/v1/document_schema.rb', line 37

class DocumentSchema
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # EntityType is the wrapper of a label of the corresponding model with
  # detailed attributes and limitations for entity-based processors. Multiple
  # types can also compose a dependency tree to represent nested types.
  # @!attribute [rw] enum_values
  #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::EnumValues]
  #     If specified, lists all the possible values for this entity.  This
  #     should not be more than a handful of values.  If the number of values
  #     is >10 or could change frequently use the `EntityType.value_ontology`
  #     field and specify a list of all possible values in a value ontology
  #     file.
  # @!attribute [rw] display_name
  #   @return [::String]
  #     User defined name for the type.
  # @!attribute [rw] name
  #   @return [::String]
  #     Name of the type. It must be unique within the schema file and
  #     cannot be a "Common Type".  The following naming conventions are used:
  #
  #     - Use `snake_casing`.
  #     - Name matching is case-sensitive.
  #     - Maximum 64 characters.
  #     - Must start with a letter.
  #     - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
  #       compatibility internal infrastructure and tooling can handle any ascii
  #       character.)
  #     - The `/` is sometimes used to denote a property of a type.  For example
  #       `line_item/amount`.  This convention is deprecated, but will still be
  #       honored for backward compatibility.
  # @!attribute [rw] base_types
  #   @return [::Array<::String>]
  #     The entity type that this type is derived from.  For now, one and only
  #     one should be set.
  # @!attribute [rw] properties
  #   @return [::Array<::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property>]
  #     Description the nested structure, or composition of an entity.
  class EntityType
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Defines the a list of enum values.
    # @!attribute [rw] values
    #   @return [::Array<::String>]
    #     The individual values that this enum values type can include.
    class EnumValues
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Defines properties that can be part of the entity type.
    # @!attribute [rw] name
    #   @return [::String]
    #     The name of the property.  Follows the same guidelines as the
    #     EntityType name.
    # @!attribute [rw] display_name
    #   @return [::String]
    #     User defined name for the property.
    # @!attribute [rw] value_type
    #   @return [::String]
    #     A reference to the value type of the property.  This type is subject
    #     to the same conventions as the `Entity.base_types` field.
    # @!attribute [rw] occurrence_type
    #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property::OccurrenceType]
    #     Occurrence type limits the number of instances an entity type appears
    #     in the document.
    class Property
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods

      # Types of occurrences of the entity type in the document.  This
      # represents the number of instances, not mentions, of an entity.
      # For example, a bank statement might only have one
      # `account_number`, but this account number can be mentioned in several
      # places on the document.  In this case, the `account_number` is
      # considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
      # expect a bank statement to contain the status of multiple different
      # accounts for the customers, the occurrence type is set to
      # `REQUIRED_MULTIPLE`.
      module OccurrenceType
        # Unspecified occurrence type.
        OCCURRENCE_TYPE_UNSPECIFIED = 0

        # There will be zero or one instance of this entity type.  The same
        # entity instance may be mentioned multiple times.
        OPTIONAL_ONCE = 1

        # The entity type will appear zero or multiple times.
        OPTIONAL_MULTIPLE = 2

        # The entity type will only appear exactly once.  The same
        # entity instance may be mentioned multiple times.
        REQUIRED_ONCE = 3

        # The entity type will appear once or more times.
        REQUIRED_MULTIPLE = 4
      end
    end
  end

  # Metadata for global schema behavior.
  # @!attribute [rw] document_splitter
  #   @return [::Boolean]
  #     If true, a `document` entity type can be applied to subdocument
  #     (splitting). Otherwise, it can only be applied to the entire document
  #     (classification).
  # @!attribute [rw] document_allow_multiple_labels
  #   @return [::Boolean]
  #     If true, on a given page, there can be multiple `document` annotations
  #     covering it.
  # @!attribute [rw] prefixed_naming_on_properties
  #   @return [::Boolean]
  #     If set, all the nested entities must be prefixed with the parents.
  # @!attribute [rw] skip_naming_validation
  #   @return [::Boolean]
  #     If set, we will skip the naming format validation in the schema. So the
  #     string values in `DocumentSchema.EntityType.name` and
  #     `DocumentSchema.EntityType.Property.name` will not be checked.
  class Metadata
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#display_name::String

Returns Display name to show to users.

Returns:

  • (::String)

    Display name to show to users.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'proto_docs/google/cloud/documentai/v1/document_schema.rb', line 37

class DocumentSchema
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # EntityType is the wrapper of a label of the corresponding model with
  # detailed attributes and limitations for entity-based processors. Multiple
  # types can also compose a dependency tree to represent nested types.
  # @!attribute [rw] enum_values
  #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::EnumValues]
  #     If specified, lists all the possible values for this entity.  This
  #     should not be more than a handful of values.  If the number of values
  #     is >10 or could change frequently use the `EntityType.value_ontology`
  #     field and specify a list of all possible values in a value ontology
  #     file.
  # @!attribute [rw] display_name
  #   @return [::String]
  #     User defined name for the type.
  # @!attribute [rw] name
  #   @return [::String]
  #     Name of the type. It must be unique within the schema file and
  #     cannot be a "Common Type".  The following naming conventions are used:
  #
  #     - Use `snake_casing`.
  #     - Name matching is case-sensitive.
  #     - Maximum 64 characters.
  #     - Must start with a letter.
  #     - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
  #       compatibility internal infrastructure and tooling can handle any ascii
  #       character.)
  #     - The `/` is sometimes used to denote a property of a type.  For example
  #       `line_item/amount`.  This convention is deprecated, but will still be
  #       honored for backward compatibility.
  # @!attribute [rw] base_types
  #   @return [::Array<::String>]
  #     The entity type that this type is derived from.  For now, one and only
  #     one should be set.
  # @!attribute [rw] properties
  #   @return [::Array<::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property>]
  #     Description the nested structure, or composition of an entity.
  class EntityType
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Defines the a list of enum values.
    # @!attribute [rw] values
    #   @return [::Array<::String>]
    #     The individual values that this enum values type can include.
    class EnumValues
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Defines properties that can be part of the entity type.
    # @!attribute [rw] name
    #   @return [::String]
    #     The name of the property.  Follows the same guidelines as the
    #     EntityType name.
    # @!attribute [rw] display_name
    #   @return [::String]
    #     User defined name for the property.
    # @!attribute [rw] value_type
    #   @return [::String]
    #     A reference to the value type of the property.  This type is subject
    #     to the same conventions as the `Entity.base_types` field.
    # @!attribute [rw] occurrence_type
    #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property::OccurrenceType]
    #     Occurrence type limits the number of instances an entity type appears
    #     in the document.
    class Property
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods

      # Types of occurrences of the entity type in the document.  This
      # represents the number of instances, not mentions, of an entity.
      # For example, a bank statement might only have one
      # `account_number`, but this account number can be mentioned in several
      # places on the document.  In this case, the `account_number` is
      # considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
      # expect a bank statement to contain the status of multiple different
      # accounts for the customers, the occurrence type is set to
      # `REQUIRED_MULTIPLE`.
      module OccurrenceType
        # Unspecified occurrence type.
        OCCURRENCE_TYPE_UNSPECIFIED = 0

        # There will be zero or one instance of this entity type.  The same
        # entity instance may be mentioned multiple times.
        OPTIONAL_ONCE = 1

        # The entity type will appear zero or multiple times.
        OPTIONAL_MULTIPLE = 2

        # The entity type will only appear exactly once.  The same
        # entity instance may be mentioned multiple times.
        REQUIRED_ONCE = 3

        # The entity type will appear once or more times.
        REQUIRED_MULTIPLE = 4
      end
    end
  end

  # Metadata for global schema behavior.
  # @!attribute [rw] document_splitter
  #   @return [::Boolean]
  #     If true, a `document` entity type can be applied to subdocument
  #     (splitting). Otherwise, it can only be applied to the entire document
  #     (classification).
  # @!attribute [rw] document_allow_multiple_labels
  #   @return [::Boolean]
  #     If true, on a given page, there can be multiple `document` annotations
  #     covering it.
  # @!attribute [rw] prefixed_naming_on_properties
  #   @return [::Boolean]
  #     If set, all the nested entities must be prefixed with the parents.
  # @!attribute [rw] skip_naming_validation
  #   @return [::Boolean]
  #     If set, we will skip the naming format validation in the schema. So the
  #     string values in `DocumentSchema.EntityType.name` and
  #     `DocumentSchema.EntityType.Property.name` will not be checked.
  class Metadata
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#entity_types::Array<::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType>

Returns Entity types of the schema.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'proto_docs/google/cloud/documentai/v1/document_schema.rb', line 37

class DocumentSchema
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # EntityType is the wrapper of a label of the corresponding model with
  # detailed attributes and limitations for entity-based processors. Multiple
  # types can also compose a dependency tree to represent nested types.
  # @!attribute [rw] enum_values
  #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::EnumValues]
  #     If specified, lists all the possible values for this entity.  This
  #     should not be more than a handful of values.  If the number of values
  #     is >10 or could change frequently use the `EntityType.value_ontology`
  #     field and specify a list of all possible values in a value ontology
  #     file.
  # @!attribute [rw] display_name
  #   @return [::String]
  #     User defined name for the type.
  # @!attribute [rw] name
  #   @return [::String]
  #     Name of the type. It must be unique within the schema file and
  #     cannot be a "Common Type".  The following naming conventions are used:
  #
  #     - Use `snake_casing`.
  #     - Name matching is case-sensitive.
  #     - Maximum 64 characters.
  #     - Must start with a letter.
  #     - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
  #       compatibility internal infrastructure and tooling can handle any ascii
  #       character.)
  #     - The `/` is sometimes used to denote a property of a type.  For example
  #       `line_item/amount`.  This convention is deprecated, but will still be
  #       honored for backward compatibility.
  # @!attribute [rw] base_types
  #   @return [::Array<::String>]
  #     The entity type that this type is derived from.  For now, one and only
  #     one should be set.
  # @!attribute [rw] properties
  #   @return [::Array<::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property>]
  #     Description the nested structure, or composition of an entity.
  class EntityType
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Defines the a list of enum values.
    # @!attribute [rw] values
    #   @return [::Array<::String>]
    #     The individual values that this enum values type can include.
    class EnumValues
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Defines properties that can be part of the entity type.
    # @!attribute [rw] name
    #   @return [::String]
    #     The name of the property.  Follows the same guidelines as the
    #     EntityType name.
    # @!attribute [rw] display_name
    #   @return [::String]
    #     User defined name for the property.
    # @!attribute [rw] value_type
    #   @return [::String]
    #     A reference to the value type of the property.  This type is subject
    #     to the same conventions as the `Entity.base_types` field.
    # @!attribute [rw] occurrence_type
    #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property::OccurrenceType]
    #     Occurrence type limits the number of instances an entity type appears
    #     in the document.
    class Property
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods

      # Types of occurrences of the entity type in the document.  This
      # represents the number of instances, not mentions, of an entity.
      # For example, a bank statement might only have one
      # `account_number`, but this account number can be mentioned in several
      # places on the document.  In this case, the `account_number` is
      # considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
      # expect a bank statement to contain the status of multiple different
      # accounts for the customers, the occurrence type is set to
      # `REQUIRED_MULTIPLE`.
      module OccurrenceType
        # Unspecified occurrence type.
        OCCURRENCE_TYPE_UNSPECIFIED = 0

        # There will be zero or one instance of this entity type.  The same
        # entity instance may be mentioned multiple times.
        OPTIONAL_ONCE = 1

        # The entity type will appear zero or multiple times.
        OPTIONAL_MULTIPLE = 2

        # The entity type will only appear exactly once.  The same
        # entity instance may be mentioned multiple times.
        REQUIRED_ONCE = 3

        # The entity type will appear once or more times.
        REQUIRED_MULTIPLE = 4
      end
    end
  end

  # Metadata for global schema behavior.
  # @!attribute [rw] document_splitter
  #   @return [::Boolean]
  #     If true, a `document` entity type can be applied to subdocument
  #     (splitting). Otherwise, it can only be applied to the entire document
  #     (classification).
  # @!attribute [rw] document_allow_multiple_labels
  #   @return [::Boolean]
  #     If true, on a given page, there can be multiple `document` annotations
  #     covering it.
  # @!attribute [rw] prefixed_naming_on_properties
  #   @return [::Boolean]
  #     If set, all the nested entities must be prefixed with the parents.
  # @!attribute [rw] skip_naming_validation
  #   @return [::Boolean]
  #     If set, we will skip the naming format validation in the schema. So the
  #     string values in `DocumentSchema.EntityType.name` and
  #     `DocumentSchema.EntityType.Property.name` will not be checked.
  class Metadata
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#metadata::Google::Cloud::DocumentAI::V1::DocumentSchema::Metadata

Returns Metadata of the schema.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'proto_docs/google/cloud/documentai/v1/document_schema.rb', line 37

class DocumentSchema
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # EntityType is the wrapper of a label of the corresponding model with
  # detailed attributes and limitations for entity-based processors. Multiple
  # types can also compose a dependency tree to represent nested types.
  # @!attribute [rw] enum_values
  #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::EnumValues]
  #     If specified, lists all the possible values for this entity.  This
  #     should not be more than a handful of values.  If the number of values
  #     is >10 or could change frequently use the `EntityType.value_ontology`
  #     field and specify a list of all possible values in a value ontology
  #     file.
  # @!attribute [rw] display_name
  #   @return [::String]
  #     User defined name for the type.
  # @!attribute [rw] name
  #   @return [::String]
  #     Name of the type. It must be unique within the schema file and
  #     cannot be a "Common Type".  The following naming conventions are used:
  #
  #     - Use `snake_casing`.
  #     - Name matching is case-sensitive.
  #     - Maximum 64 characters.
  #     - Must start with a letter.
  #     - Allowed characters: ASCII letters `[a-z0-9_-]`.  (For backward
  #       compatibility internal infrastructure and tooling can handle any ascii
  #       character.)
  #     - The `/` is sometimes used to denote a property of a type.  For example
  #       `line_item/amount`.  This convention is deprecated, but will still be
  #       honored for backward compatibility.
  # @!attribute [rw] base_types
  #   @return [::Array<::String>]
  #     The entity type that this type is derived from.  For now, one and only
  #     one should be set.
  # @!attribute [rw] properties
  #   @return [::Array<::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property>]
  #     Description the nested structure, or composition of an entity.
  class EntityType
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Defines the a list of enum values.
    # @!attribute [rw] values
    #   @return [::Array<::String>]
    #     The individual values that this enum values type can include.
    class EnumValues
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods
    end

    # Defines properties that can be part of the entity type.
    # @!attribute [rw] name
    #   @return [::String]
    #     The name of the property.  Follows the same guidelines as the
    #     EntityType name.
    # @!attribute [rw] display_name
    #   @return [::String]
    #     User defined name for the property.
    # @!attribute [rw] value_type
    #   @return [::String]
    #     A reference to the value type of the property.  This type is subject
    #     to the same conventions as the `Entity.base_types` field.
    # @!attribute [rw] occurrence_type
    #   @return [::Google::Cloud::DocumentAI::V1::DocumentSchema::EntityType::Property::OccurrenceType]
    #     Occurrence type limits the number of instances an entity type appears
    #     in the document.
    class Property
      include ::Google::Protobuf::MessageExts
      extend ::Google::Protobuf::MessageExts::ClassMethods

      # Types of occurrences of the entity type in the document.  This
      # represents the number of instances, not mentions, of an entity.
      # For example, a bank statement might only have one
      # `account_number`, but this account number can be mentioned in several
      # places on the document.  In this case, the `account_number` is
      # considered a `REQUIRED_ONCE` entity type. If, on the other hand, we
      # expect a bank statement to contain the status of multiple different
      # accounts for the customers, the occurrence type is set to
      # `REQUIRED_MULTIPLE`.
      module OccurrenceType
        # Unspecified occurrence type.
        OCCURRENCE_TYPE_UNSPECIFIED = 0

        # There will be zero or one instance of this entity type.  The same
        # entity instance may be mentioned multiple times.
        OPTIONAL_ONCE = 1

        # The entity type will appear zero or multiple times.
        OPTIONAL_MULTIPLE = 2

        # The entity type will only appear exactly once.  The same
        # entity instance may be mentioned multiple times.
        REQUIRED_ONCE = 3

        # The entity type will appear once or more times.
        REQUIRED_MULTIPLE = 4
      end
    end
  end

  # Metadata for global schema behavior.
  # @!attribute [rw] document_splitter
  #   @return [::Boolean]
  #     If true, a `document` entity type can be applied to subdocument
  #     (splitting). Otherwise, it can only be applied to the entire document
  #     (classification).
  # @!attribute [rw] document_allow_multiple_labels
  #   @return [::Boolean]
  #     If true, on a given page, there can be multiple `document` annotations
  #     covering it.
  # @!attribute [rw] prefixed_naming_on_properties
  #   @return [::Boolean]
  #     If set, all the nested entities must be prefixed with the parents.
  # @!attribute [rw] skip_naming_validation
  #   @return [::Boolean]
  #     If set, we will skip the naming format validation in the schema. So the
  #     string values in `DocumentSchema.EntityType.name` and
  #     `DocumentSchema.EntityType.Property.name` will not be checked.
  class Metadata
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end