Class: Google::Cloud::Bigquery::External::CsvSource

Inherits:
DataSource
  • Object
show all
Defined in:
lib/google/cloud/bigquery/external.rb

Overview

CsvSource

CsvSource is a subclass of DataSource and represents a CSV external data source that can be queried from directly, such as Google Cloud Storage or Google Drive, even though the data is not stored in BigQuery. Instead of loading or streaming the data, this object references the external data source.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.autodetect = true
  csv.skip_leading_rows = 1
end

data = bigquery.query "SELECT * FROM my_ext_table",
                      external: { my_ext_table: csv_table }

# Iterate over the first page of results
data.each do |row|
  puts row[:name]
end
# Retrieve the next page of results
data = data.next if data.next?

Instance Method Summary collapse

Methods inherited from DataSource

#autodetect, #autodetect=, #avro?, #backup?, #bigtable?, #compression, #compression=, #csv?, #format, #ignore_unknown, #ignore_unknown=, #json?, #max_bad_records, #max_bad_records=, #sheets?, #urls

Instance Method Details

#delimiterString

The separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Returns:

  • (String)


795
796
797
# File 'lib/google/cloud/bigquery/external.rb', line 795

def delimiter
  @gapi.csv_options.field_delimiter
end

#delimiter=(new_delimiter) ⇒ Object

Set the separator for fields in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"

Parameters:

  • new_delimiter (String)

    New delimiter value



816
817
818
819
# File 'lib/google/cloud/bigquery/external.rb', line 816

def delimiter= new_delimiter
  frozen_check!
  @gapi.csv_options.field_delimiter = new_delimiter
end

#encodingString

The character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Returns:

  • (String)


706
707
708
# File 'lib/google/cloud/bigquery/external.rb', line 706

def encoding
  @gapi.csv_options.encoding
end

#encoding=(new_encoding) ⇒ Object

Set the character encoding of the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"

Parameters:

  • new_encoding (String)

    New encoding value



727
728
729
730
# File 'lib/google/cloud/bigquery/external.rb', line 727

def encoding= new_encoding
  frozen_check!
  @gapi.csv_options.encoding = new_encoding
end

#fieldsArray<Schema::Field>

The fields of the schema.

Returns:



978
979
980
# File 'lib/google/cloud/bigquery/external.rb', line 978

def fields
  schema.fields
end

#headersArray<Symbol>

The names of the columns in the schema.

Returns:

  • (Array<Symbol>)

    An array of column names.



987
988
989
# File 'lib/google/cloud/bigquery/external.rb', line 987

def headers
  schema.headers
end

#iso8859_1?Boolean

Checks if the character encoding of the data is "ISO-8859-1".

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "ISO-8859-1"
end

csv_table.encoding #=> "ISO-8859-1"
csv_table.iso8859_1? #=> true

Returns:

  • (Boolean)


774
775
776
# File 'lib/google/cloud/bigquery/external.rb', line 774

def iso8859_1?
  encoding == "ISO-8859-1"
end

#jagged_rowsBoolean

Indicates if BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Returns:

  • (Boolean)


617
618
619
# File 'lib/google/cloud/bigquery/external.rb', line 617

def jagged_rows
  @gapi.csv_options.allow_jagged_rows
end

#jagged_rows=(new_jagged_rows) ⇒ Object

Set whether BigQuery should accept rows that are missing trailing optional columns.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true

Parameters:

  • new_jagged_rows (Boolean)

    New jagged_rows value



639
640
641
642
# File 'lib/google/cloud/bigquery/external.rb', line 639

def jagged_rows= new_jagged_rows
  frozen_check!
  @gapi.csv_options.allow_jagged_rows = new_jagged_rows
end

#param_typesHash

The types of the fields in the data in the schema, using the same format as the optional query parameter types.

Returns:

  • (Hash)

    A hash with field names as keys, and types as values.



997
998
999
# File 'lib/google/cloud/bigquery/external.rb', line 997

def param_types
  schema.param_types
end

#quoteString

The value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Returns:

  • (String)


838
839
840
# File 'lib/google/cloud/bigquery/external.rb', line 838

def quote
  @gapi.csv_options.quote
end

#quote=(new_quote) ⇒ Object

Set the value that is used to quote data sections in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"

Parameters:

  • new_quote (String)

    New quote value



859
860
861
862
# File 'lib/google/cloud/bigquery/external.rb', line 859

def quote= new_quote
  frozen_check!
  @gapi.csv_options.quote = new_quote
end

#quoted_newlinesBoolean

Indicates if BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Returns:

  • (Boolean)


662
663
664
# File 'lib/google/cloud/bigquery/external.rb', line 662

def quoted_newlines
  @gapi.csv_options.allow_quoted_newlines
end

#quoted_newlines=(new_quoted_newlines) ⇒ Object

Set whether BigQuery should allow quoted data sections that contain newline characters in a CSV file.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true

Parameters:

  • new_quoted_newlines (Boolean)

    New quoted_newlines value



684
685
686
687
# File 'lib/google/cloud/bigquery/external.rb', line 684

def quoted_newlines= new_quoted_newlines
  frozen_check!
  @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
end

#schema(replace: false) {|schema| ... } ⇒ Google::Cloud::Bigquery::Schema

The schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.schema do |schema|
    schema.string "name", mode: :required
    schema.string "email", mode: :required
    schema.integer "age", mode: :required
    schema.boolean "active", mode: :required
  end
end

Parameters:

  • replace (Boolean) (defaults to: false)

    Whether to replace the existing schema with the new schema. If true, the fields will replace the existing schema. If false, the fields will be added to the existing schema. The default value is false.

Yields:

  • (schema)

    a block for setting the schema

Yield Parameters:

  • schema (Schema)

    the object accepting the schema

Returns:



936
937
938
939
940
941
942
943
944
945
# File 'lib/google/cloud/bigquery/external.rb', line 936

def schema replace: false
  @schema ||= Schema.from_gapi @gapi.schema
  if replace
    frozen_check!
    @schema = Schema.from_gapi
  end
  @schema.freeze if frozen?
  yield @schema if block_given?
  @schema
end

#schema=(new_schema) ⇒ Object

Set the schema for the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_shema = bigquery.schema do |schema|
  schema.string "name", mode: :required
  schema.string "email", mode: :required
  schema.integer "age", mode: :required
  schema.boolean "active", mode: :required
end

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url
csv_table.schema = csv_shema

Parameters:

  • new_schema (Schema)

    The schema object.



968
969
970
971
# File 'lib/google/cloud/bigquery/external.rb', line 968

def schema= new_schema
  frozen_check!
  @schema = new_schema
end

#skip_leading_rowsInteger

The number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Returns:

  • (Integer)


882
883
884
# File 'lib/google/cloud/bigquery/external.rb', line 882

def skip_leading_rows
  @gapi.csv_options.skip_leading_rows
end

#skip_leading_rows=(row_count) ⇒ Object

Set the number of rows at the top of a CSV file that BigQuery will skip when reading the data.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1

Parameters:

  • row_count (Integer)

    New skip_leading_rows value



904
905
906
907
# File 'lib/google/cloud/bigquery/external.rb', line 904

def skip_leading_rows= row_count
  frozen_check!
  @gapi.csv_options.skip_leading_rows = row_count
end

#utf8?Boolean

Checks if the character encoding of the data is "UTF-8". This is the default.

Examples:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
csv_table.utf8? #=> true

Returns:

  • (Boolean)


751
752
753
754
# File 'lib/google/cloud/bigquery/external.rb', line 751

def utf8?
  return true if encoding.nil?
  encoding == "UTF-8"
end