As of January 1, 2020 this library no longer supports Python 2 on the latest released version. Library versions released prior to that date will continue to be available. For more information please visit Python 2 support on Google Cloud.

Running Queries¶

Querying data¶

Run a query and wait for it to finish with the query() method:

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

query = """
    SELECT name, SUM(number) as total_people
    FROM `bigquery-public-data.usa_names.usa_1910_2013`
    WHERE state = 'TX'
    GROUP BY name, state
    ORDER BY total_people DESC
    LIMIT 20
"""
query_job = client.query(query)  # Make an API request.

print("The query data:")
for row in query_job:
    # Row values can be accessed by field name or index.
    print("name={}, count={}".format(row[0], row["total_people"]))

Run a dry run query¶

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)

# Start the query, passing in the extra configuration.
query_job = client.query(
    (
        "SELECT name, COUNT(*) as name_count "
        "FROM `bigquery-public-data.usa_names.usa_1910_2013` "
        "WHERE state = 'WA' "
        "GROUP BY name"
    ),
    job_config=job_config,
)  # Make an API request.

# A dry run query completes immediately.
print("This query will process {} bytes.".format(query_job.total_bytes_processed))

Writing query results to a destination table¶

See BigQuery documentation for more information on writing query results.

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set table_id to the ID of the destination table.
# table_id = "your-project.your_dataset.your_table_name"

job_config = bigquery.QueryJobConfig(destination=table_id)

sql = """
    SELECT corpus
    FROM `bigquery-public-data.samples.shakespeare`
    GROUP BY corpus;
"""

# Start the query, passing in the extra configuration.
query_job = client.query(sql, job_config=job_config)  # Make an API request.
query_job.result()  # Wait for the job to complete.

print("Query results loaded to the table {}".format(table_id))

Run a query using a named query parameter¶

See BigQuery documentation for more information on parameterized queries.

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

query = """
    SELECT word, word_count
    FROM `bigquery-public-data.samples.shakespeare`
    WHERE corpus = @corpus
    AND word_count >= @min_word_count
    ORDER BY word_count DESC;
"""
job_config = bigquery.QueryJobConfig(
    query_parameters=[
        bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
        bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
    ]
)
query_job = client.query(query, job_config=job_config)  # Make an API request.

for row in query_job:
    print("{}: \t{}".format(row.word, row.word_count))

Run a script¶

See BigQuery documentation for more information on scripting in BigQuery standard SQL.

from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# Run a SQL script.
sql_script = """
-- Declare a variable to hold names as an array.
DECLARE top_names ARRAY<STRING>;

-- Build an array of the top 100 names from the year 2017.
SET top_names = (
SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE year = 2000
);

-- Which names appear as words in Shakespeare's plays?
SELECT
name AS shakespeare_name
FROM UNNEST(top_names) AS name
WHERE name IN (
SELECT word
FROM `bigquery-public-data.samples.shakespeare`
);
"""
parent_job = client.query(sql_script)

# Wait for the whole script to finish.
rows_iterable = parent_job.result()
print("Script created {} child jobs.".format(parent_job.num_child_jobs))

# Fetch result rows for the final sub-job in the script.
rows = list(rows_iterable)
print(
    "{} of the top 100 names from year 2000 also appear in Shakespeare's works.".format(
        len(rows)
    )
)

# Fetch jobs created by the SQL script.
child_jobs_iterable = client.list_jobs(parent_job=parent_job)
for child_job in child_jobs_iterable:
    child_rows = list(child_job.result())
    print(
        "Child job with ID {} produced {} row(s).".format(
            child_job.job_id, len(child_rows)
        )
    )

Fork me on GitHub