As of January 1, 2020 this library no longer supports Python 2 on the latest released version.
Library versions released prior to that date will continue to be available. For more information please
visit Python 2 support on Google Cloud.
Running Queries¶
Querying data¶
Run a query and wait for it to finish with the
query_and_wait()
method:
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
query = """
SELECT name, SUM(number) as total_people
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE state = 'TX'
GROUP BY name, state
ORDER BY total_people DESC
LIMIT 20
"""
rows = client.query_and_wait(query) # Make an API request.
print("The query data:")
for row in rows:
# Row values can be accessed by field name or index.
print("name={}, count={}".format(row[0], row["total_people"]))
Run a dry run query¶
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
job_config = bigquery.QueryJobConfig(dry_run=True, use_query_cache=False)
# Start the query, passing in the extra configuration.
query_job = client.query(
(
"SELECT name, COUNT(*) as name_count "
"FROM `bigquery-public-data.usa_names.usa_1910_2013` "
"WHERE state = 'WA' "
"GROUP BY name"
),
job_config=job_config,
) # Make an API request.
# A dry run query completes immediately.
print("This query will process {} bytes.".format(query_job.total_bytes_processed))
Writing query results to a destination table¶
See BigQuery documentation for more information on writing query results.
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# TODO(developer): Set table_id to the ID of the destination table.
# table_id = "your-project.your_dataset.your_table_name"
job_config = bigquery.QueryJobConfig(destination=table_id)
sql = """
SELECT corpus
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
# Start the query, passing in the extra configuration.
client.query_and_wait(
sql, job_config=job_config
) # Make an API request and wait for the query to finish.
print("Query results loaded to the table {}".format(table_id))
Run a query using a named query parameter¶
See BigQuery documentation for more information on parameterized queries.
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
query = """
SELECT word, word_count
FROM `bigquery-public-data.samples.shakespeare`
WHERE corpus = @corpus
AND word_count >= @min_word_count
ORDER BY word_count DESC;
"""
job_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ScalarQueryParameter("corpus", "STRING", "romeoandjuliet"),
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
)
results = client.query_and_wait(
query, job_config=job_config
) # Make an API request.
for row in results:
print("{}: \t{}".format(row.word, row.word_count))
Run a script¶
See BigQuery documentation for more information on scripting in BigQuery standard SQL.
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
# Run a SQL script.
sql_script = """
-- Declare a variable to hold names as an array.
DECLARE top_names ARRAY<STRING>;
-- Build an array of the top 100 names from the year 2017.
SET top_names = (
SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
FROM `bigquery-public-data.usa_names.usa_1910_2013`
WHERE year = 2000
);
-- Which names appear as words in Shakespeare's plays?
SELECT
name AS shakespeare_name
FROM UNNEST(top_names) AS name
WHERE name IN (
SELECT word
FROM `bigquery-public-data.samples.shakespeare`
);
"""
parent_job = client.query(sql_script)
# Wait for the whole script to finish.
rows_iterable = parent_job.result()
print("Script created {} child jobs.".format(parent_job.num_child_jobs))
# Fetch result rows for the final sub-job in the script.
rows = list(rows_iterable)
print(
"{} of the top 100 names from year 2000 also appear in Shakespeare's works.".format(
len(rows)
)
)
# Fetch jobs created by the SQL script.
child_jobs_iterable = client.list_jobs(parent_job=parent_job)
for child_job in child_jobs_iterable:
child_rows = list(child_job.result())
print(
"Child job with ID {} produced {} row(s).".format(
child_job.job_id, len(child_rows)
)
)