Skip to content

Commit d8733b6

Browse files
committed
Merge branch 'scheduler' of https://github.com/GoogleCloudPlatform/python-docs-samples into scheduler
2 parents f939ae5 + 708ea5d commit d8733b6

35 files changed

+1270
-665
lines changed

.kokoro/presubmit_tests_trace.cfg

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Download secrets from Cloud Storage.
4+
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
5+
6+
# Tell the trampoline which build file to use.
7+
env_vars: {
8+
key: "TRAMPOLINE_BUILD_FILE"
9+
value: "github/python-docs-samples/.kokoro/system_tests.sh"
10+
}
11+
12+
env_vars: {
13+
key: "NOX_SESSION"
14+
value: "trace and py36 and not appengine"
15+
}

.kokoro/system_tests_trace.cfg

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Format: //devtools/kokoro/config/proto/build.proto
2+
3+
# Download secrets from Cloud Storage.
4+
gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
5+
6+
# Tell the trampoline which build file to use.
7+
env_vars: {
8+
key: "TRAMPOLINE_BUILD_FILE"
9+
value: "github/python-docs-samples/.kokoro/system_tests.sh"
10+
}
11+
12+
env_vars: {
13+
key: "NOX_SESSION"
14+
value: "trace and py36 and not appengine"
15+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
google-endpoints==4.7.0
1+
google-endpoints==4.8.0
22
google-endpoints-api-management==1.11.0

bigquery/cloud-client/user_credentials.py

Lines changed: 39 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,18 @@
2323
import argparse
2424

2525

26-
def run_query(credentials, project, query):
27-
from google.cloud import bigquery
28-
29-
client = bigquery.Client(project=project, credentials=credentials)
30-
query_job = client.query(query)
31-
32-
# Print the results.
33-
for row in query_job.result(): # Wait for the job to complete.
34-
print(row)
35-
36-
37-
def authenticate_and_query(project, query, launch_browser=True):
26+
def main(project, launch_browser=True):
27+
# [START bigquery_auth_user_flow]
3828
from google_auth_oauthlib import flow
3929

30+
# TODO: Uncomment the line below to set the `launch_browser` variable.
31+
# launch_browser = True
32+
#
33+
# The `launch_browser` boolean variable indicates if a local server is used
34+
# as the callback URL in the auth flow. A value of `True` is recommended,
35+
# but a local server does not work if accessing the application remotely,
36+
# such as over SSH or from a remote Jupyter notebook.
37+
4038
appflow = flow.InstalledAppFlow.from_client_secrets_file(
4139
'client_secrets.json',
4240
scopes=['https://www.googleapis.com/auth/bigquery'])
@@ -46,7 +44,33 @@ def authenticate_and_query(project, query, launch_browser=True):
4644
else:
4745
appflow.run_console()
4846

49-
run_query(appflow.credentials, project, query)
47+
credentials = appflow.credentials
48+
# [END bigquery_auth_user_flow]
49+
50+
# [START bigquery_auth_user_query]
51+
from google.cloud import bigquery
52+
53+
# TODO: Uncomment the line below to set the `project` variable.
54+
# project = 'user-project-id'
55+
#
56+
# The `project` variable defines the project to be billed for query
57+
# processing. The user must have the bigquery.jobs.create permission on
58+
# this project to run a query. See:
59+
# https://cloud.google.com/bigquery/docs/access-control#permissions
60+
61+
client = bigquery.Client(project=project, credentials=credentials)
62+
63+
query_string = """SELECT name, SUM(number) as total
64+
FROM `bigquery-public-data.usa_names.usa_1910_current`
65+
WHERE name = 'William'
66+
GROUP BY name;
67+
"""
68+
query_job = client.query(query_string)
69+
70+
# Print the results.
71+
for row in query_job.result(): # Wait for the job to complete.
72+
print("{}: {}".format(row['name'], row['total']))
73+
# [END bigquery_auth_user_query]
5074

5175

5276
if __name__ == '__main__':
@@ -58,9 +82,8 @@ def authenticate_and_query(project, query, launch_browser=True):
5882
help='Use a local server flow to authenticate. ',
5983
action='store_true')
6084
parser.add_argument('project', help='Project to use for BigQuery billing.')
61-
parser.add_argument('query', help='BigQuery SQL Query.')
6285

6386
args = parser.parse_args()
6487

65-
authenticate_and_query(
66-
args.project, args.query, launch_browser=args.launch_browser)
88+
main(
89+
args.project, launch_browser=args.launch_browser)

bigquery/cloud-client/user_credentials_test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import mock
1919
import pytest
2020

21-
from user_credentials import authenticate_and_query
21+
from user_credentials import main
2222

2323

2424
PROJECT = os.environ['GCLOUD_PROJECT']
@@ -36,6 +36,7 @@ def mock_flow():
3636

3737

3838
def test_auth_query_console(mock_flow, capsys):
39-
authenticate_and_query(PROJECT, 'SELECT 1+1;', launch_browser=False)
39+
main(PROJECT, launch_browser=False)
4040
out, _ = capsys.readouterr()
41-
assert '2' in out
41+
# Fun fact: William P. Wood was the 1st director of the US Secret Service.
42+
assert 'William' in out

composer/tools/copy_environment.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import contextlib
3434
import json
3535
import os
36+
import platform
3637
import re
3738
import shutil
3839
import subprocess
@@ -542,7 +543,11 @@ def import_data(
542543
if proxy_subprocess:
543544
proxy_subprocess.kill()
544545
if fuse_dir:
545-
subprocess.call(["fusermount", "-u", fuse_dir])
546+
if platform.system().lower().startswith('darwin'):
547+
# Mac OSX does not have fusermount
548+
subprocess.call(["umount", fuse_dir])
549+
else:
550+
subprocess.call(["fusermount", "-u", fuse_dir])
546551
if tmp_dir_name:
547552
shutil.rmtree(tmp_dir_name)
548553

dataproc/python-api-walkthrough.md

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
# Use the Python Client Library to call Cloud Dataproc APIs
2+
3+
Estimated completion time: <walkthrough-tutorial-duration duration="5"></walkthrough-tutorial-duration>
4+
5+
## Overview
6+
7+
This [Cloud Shell](https://cloud.google.com/shell/docs/) walkthrough leads you
8+
through the steps to use the
9+
[Google APIs Client Library for Python](http://code.google.com/p/google-api-python-client/ )
10+
to programmatically interact with [Cloud Dataproc](https://cloud.google.com/dataproc/docs/).
11+
12+
As you follow this walkthrough, you run Python code that calls
13+
[Cloud Dataproc REST API](https://cloud.google.com//dataproc/docs/reference/rest/)
14+
methods to:
15+
16+
* create a Cloud Dataproc cluster
17+
* submit a small PySpark word sort job to run on the cluster
18+
* get job status
19+
* tear down the cluster after job completion
20+
21+
## Using the walkthrough
22+
23+
The `submit_job_to_cluster.py file` used in this walkthrough is opened in the
24+
Cloud Shell editor when you launch the walkthrough. You can view
25+
the code as your follow the walkthrough steps.
26+
27+
**For more information**: See [Cloud Dataproc&rarr;Use the Python Client Library](https://cloud.google.com/dataproc/docs/tutorials/python-library-example) for
28+
an explanation of how the code works.
29+
30+
**To reload this walkthrough:** Run the following command from the
31+
`~/python-docs-samples/dataproc` directory in Cloud Shell:
32+
33+
cloudshell launch-tutorial python-api-walkthrough.md
34+
35+
**To copy and run commands**: Click the "Paste in Cloud Shell" button
36+
(<walkthrough-cloud-shell-icon></walkthrough-cloud-shell-icon>)
37+
on the side of a code box, then press `Enter` to run the command.
38+
39+
## Prerequisites (1)
40+
41+
1. Create or select a Google Cloud Platform project to use for this tutorial.
42+
* <walkthrough-project-billing-setup permissions=""></walkthrough-project-billing-setup>
43+
44+
1. Enable the Cloud Dataproc, Compute Engine, and Cloud Storage APIs in your project.
45+
* <walkthrough-enable-apis apis="dataproc,compute_component,storage-component.googleapis.com"></walkthrough-enable-apis>
46+
47+
## Prerequisites (2)
48+
49+
1. This walkthrough uploads a PySpark file (`pyspark_sort.py`) to a
50+
[Cloud Storage bucket](https://cloud.google.com/storage/docs/key-terms#buckets) in
51+
your project.
52+
* You can use the [Cloud Storage browser page](https://console.cloud.google.com/storage/browser)
53+
in Google Cloud Platform Console to view existing buckets in your project.
54+
55+
&nbsp;&nbsp;&nbsp;&nbsp;**OR**
56+
57+
* To create a new bucket, run the following command. Your bucket name must be unique.
58+
```bash
59+
gsutil mb -p {{project-id}} gs://your-bucket-name
60+
```
61+
62+
1. Set environment variables.
63+
64+
* Set the name of your bucket.
65+
```bash
66+
BUCKET=your-bucket-name
67+
```
68+
69+
## Prerequisites (3)
70+
71+
1. Set up a Python
72+
[virtual environment](https://virtualenv.readthedocs.org/en/latest/)
73+
in Cloud Shell.
74+
75+
* Create the virtual environment.
76+
```bash
77+
virtualenv ENV
78+
```
79+
* Activate the virtual environment.
80+
```bash
81+
source ENV/bin/activate
82+
```
83+
84+
1. Install library dependencies in Cloud Shell.
85+
```bash
86+
pip install -r requirements.txt
87+
```
88+
89+
## Create a cluster and submit a job
90+
91+
1. Set a name for your new cluster.
92+
```bash
93+
CLUSTER=new-cluster-name
94+
```
95+
96+
1. Set a [zone](https://cloud.google.com/compute/docs/regions-zones/#available)
97+
where your new cluster will be located. You can change the
98+
"us-central1-a" zone that is pre-set in the following command.
99+
```bash
100+
ZONE=us-central1-a
101+
```
102+
103+
1. Run `submit_job.py` with the `--create_new_cluster` flag
104+
to create a new cluster and submit the `pyspark_sort.py` job
105+
to the cluster.
106+
107+
```bash
108+
python submit_job_to_cluster.py \
109+
--project_id={{project-id}} \
110+
--cluster_name=$CLUSTER \
111+
--zone=$ZONE \
112+
--gcs_bucket=$BUCKET \
113+
--create_new_cluster
114+
```
115+
116+
## Job Output
117+
118+
Job output in Cloud Shell shows cluster creation, job submission,
119+
job completion, and then tear-down of the cluster.
120+
121+
...
122+
Creating cluster...
123+
Cluster created.
124+
Uploading pyspark file to GCS
125+
new-cluster-name - RUNNING
126+
Submitted job ID ...
127+
Waiting for job to finish...
128+
Job finished.
129+
Downloading output file
130+
.....
131+
['Hello,', 'dog', 'elephant', 'panther', 'world!']
132+
...
133+
Tearing down cluster
134+
```
135+
## Congratulations on Completing the Walkthrough!
136+
<walkthrough-conclusion-trophy></walkthrough-conclusion-trophy>
137+
138+
---
139+
140+
### Next Steps:
141+
142+
* **View job details from the Console.** View job details by selecting the
143+
PySpark job from the Cloud Dataproc
144+
[Jobs page](https://console.cloud.google.com/dataproc/jobs)
145+
in the Google Cloud Platform Console.
146+
147+
* **Delete resources used in the walkthrough.**
148+
The `submit_job.py` job deletes the cluster that it created for this
149+
walkthrough.
150+
151+
If you created a bucket to use for this walkthrough,
152+
you can run the following command to delete the
153+
Cloud Storage bucket (the bucket must be empty).
154+
```bash
155+
gsutil rb gs://$BUCKET
156+
```
157+
You can run the following command to delete the bucket **and all
158+
objects within it. Note: the deleted objects cannot be recovered.**
159+
```bash
160+
gsutil rm -r gs://$BUCKET
161+
```
162+
163+
* **For more information.** See the [Cloud Dataproc documentation](https://cloud.google.com/dataproc/docs/)
164+
for API reference and product feature information.
165+

datastore/cloud-client/snippets.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ def ancestor_query(client):
331331
client.put(task)
332332

333333
# [START datastore_ancestor_query]
334+
# Query filters are omitted in this example as any ancestor queries with a
335+
# non-key filter require a composite index.
334336
ancestor = client.key('TaskList', 'default')
335337
query = client.query(kind='Task', ancestor=ancestor)
336338
# [END datastore_ancestor_query]
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
google-cloud-firestore==0.30.0
1+
google-cloud-firestore==0.31.0

0 commit comments

Comments
 (0)