Skip to content

Commit c68e7e7

Browse files
committed
temporary commit
1 parent 5ec0018 commit c68e7e7

File tree

3 files changed

+266
-0
lines changed

3 files changed

+266
-0
lines changed

dlp/inspect_file.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
18+
# [START inspect_file]
19+
def inspect_file(filename, info_types=None, min_likelihood=None,
20+
max_findings=None, include_quote=True, mime_type=None):
21+
"""Uses the Data Loss Prevention API to analyze a file for protected data.
22+
Args:
23+
filename: The path to the file to inspect.
24+
info_types: A list of strings representing info types to look for.
25+
A full list of info type categories can be fetched from the API with
26+
the .list_root_categories(language_code) client method, and a list
27+
of types in a category with .list_info_types(category,
28+
language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME',
29+
'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'.
30+
If info_types is omitted, the API will use a limited default set.
31+
min_likelihood: A string representing the minimum likelihood threshold
32+
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
33+
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
34+
max_findings: The maximum number of findings to report; 0 = no maximum.
35+
include_quote: Boolean for whether to display a quote of the detected
36+
information in the results.
37+
mime_type: The MIME type of the file. If not specified, the type is
38+
inferred via the Python standard library's mimetypes module.
39+
Returns:
40+
None; the response from the API is printed to the terminal.
41+
"""
42+
43+
import mimetypes
44+
45+
# Import the client library
46+
import google.cloud.dlp
47+
48+
# Instantiate a client.
49+
dlp = google.cloud.dlp.DlpServiceClient()
50+
51+
# Prepare info_type by converting the list of strings into a list of
52+
# dictionaries (protos are also accepted).
53+
if info_types is not None:
54+
info_types = [{'name': info_type} for info_type in info_types]
55+
56+
# Construct the configuration dictionary. Keys which are None may
57+
# optionally be omitted entirely.
58+
inspect_config = {
59+
'info_types': info_types,
60+
'min_likelihood': min_likelihood,
61+
'max_findings': max_findings,
62+
'include_quote': include_quote,
63+
}
64+
65+
# If mime_type is not specified, guess it from the filename.
66+
if mime_type is None:
67+
mime_guess = mimetypes.MimeTypes().guess_type(filename)
68+
mime_type = mime_guess[0] or 'application/octet-stream'
69+
70+
# Construct the items list by reading the file as a binary string.
71+
with open(filename, mode='rb') as f:
72+
items = [{'type': mime_type, 'data': f.read()}]
73+
74+
# Call the API.
75+
response = dlp.inspect_content(inspect_config, items)
76+
77+
# Print out the results.
78+
if response.results[0].findings:
79+
for finding in response.results[0].findings:
80+
try:
81+
print('Quote: {}'.format(finding.quote))
82+
except AttributeError:
83+
pass
84+
print('Info type: {}'.format(finding.info_type.name))
85+
print('Likelihood: {}'.format(finding.likelihood))
86+
else:
87+
print('No findings.')
88+
# [END inspect_file]
89+
90+
91+
if __name__ == '__main__':
92+
inspect_file("/usr/local/google/home/gorcester/Downloads/wQOVLom8Gsa.png", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"])

dlp/inspect_gcs_file.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
18+
# [START inspect_gcs_file]
19+
def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None,
20+
max_findings=None):
21+
"""Uses the Data Loss Prevention API to analyze a string for protected data.
22+
Args:
23+
bucket: The name of the GCS bucket containing the file, as a string.
24+
filename: The name of the file in the bucket, including the path, as a
25+
string; e.g. 'images/myfile.png'.
26+
info_types: A list of strings representing info types to look for.
27+
A full list of info type categories can be fetched from the API with
28+
the .list_root_categories(language_code) client method, and a list
29+
of types in a category with .list_info_types(category,
30+
language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME',
31+
'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'.
32+
If info_types is omitted, the API will use a limited default set.
33+
min_likelihood: A string representing the minimum likelihood threshold
34+
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
35+
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
36+
max_findings: The maximum number of findings to report; 0 = no maximum.
37+
Returns:
38+
None; the response from the API is printed to the terminal.
39+
"""
40+
41+
# Import the client library
42+
import google.cloud.dlp
43+
44+
# Instantiate a client.
45+
dlp = google.cloud.dlp.DlpServiceClient()
46+
47+
# Prepare info_type by converting the list of strings into a list of
48+
# dictionaries (protos are also accepted).
49+
if info_types is not None:
50+
info_types = [{'name': info_type} for info_type in info_types]
51+
52+
# Construct the configuration dictionary. Keys which are None may
53+
# optionally be omitted entirely.
54+
inspect_config = {
55+
'info_types': info_types,
56+
'min_likelihood': min_likelihood,
57+
'max_findings': max_findings,
58+
}
59+
60+
# Construct a cloud_storage_options dictionary with the file's URL.
61+
url = 'gs://{}/{}'.format(bucket, filename)
62+
storage_config = {'cloud_storage_options':
63+
{'file_set':
64+
{'url': url}
65+
}
66+
}
67+
68+
operation = dlp.create_inspect_operation(inspect_config, storage_config,
69+
None)
70+
71+
# Get the operation result name, which can be used to look up the full
72+
# results. This call blocks until the operation is complete; to avoid
73+
# blocking, use operation.add_done_callback(fn) instead.
74+
operation_result = operation.result()
75+
76+
response = dlp.list_inspect_findings(operation_result.name)
77+
78+
# TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct
79+
if response.result.findings:
80+
for finding in response.result.findings:
81+
try:
82+
print('Quote: {}'.format(finding.quote))
83+
except AttributeError:
84+
pass
85+
print('Info type: {}'.format(finding.info_type.name))
86+
print('Likelihood: {}'.format(finding.likelihood))
87+
else:
88+
print('No findings.')
89+
# [END inspect_gcs_file]
90+
91+
if __name__ == '__main__':
92+
inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"])

dlp/inspect_string.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
18+
# [START inspect_string]
19+
def inspect_string(item, info_types=None, min_likelihood=None,
20+
max_findings=None, include_quote=True):
21+
"""Uses the Data Loss Prevention API to analyze a string for protected data.
22+
Args:
23+
item: The string to inspect.
24+
info_types: A list of strings representing info types to look for.
25+
A full list of info type categories can be fetched from the API with
26+
the .list_root_categories(language_code) client method, and a list
27+
of types in a category with .list_info_types(category,
28+
language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME',
29+
'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'.
30+
If info_types is omitted, the API will use a limited default set.
31+
min_likelihood: A string representing the minimum likelihood threshold
32+
that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED',
33+
'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'.
34+
max_findings: The maximum number of findings to report; 0 = no maximum.
35+
include_quote: Boolean for whether to display a quote of the detected
36+
information in the results.
37+
Returns:
38+
None; the response from the API is printed to the terminal.
39+
"""
40+
41+
# Import the client library
42+
import google.cloud.dlp
43+
44+
# Instantiate a client.
45+
dlp = google.cloud.dlp.DlpServiceClient()
46+
47+
# Prepare info_type by converting the list of strings into a list of
48+
# dictionaries (protos are also accepted).
49+
if info_types is not None:
50+
info_types = [{'name': info_type} for info_type in info_types]
51+
52+
# Construct the configuration dictionary. Keys which are None may
53+
# optionally be omitted entirely.
54+
inspect_config = {
55+
'info_types': info_types,
56+
'min_likelihood': min_likelihood,
57+
'max_findings': max_findings,
58+
'include_quote': include_quote,
59+
}
60+
61+
# Construct the items list (in this case, only one item, in string form).
62+
items = [{'type': 'text/plain', 'value': item}]
63+
64+
# Call the API.
65+
response = dlp.inspect_content(inspect_config, items)
66+
67+
# Print out the results.
68+
if response.results[0].findings:
69+
for finding in response.results[0].findings:
70+
try:
71+
print('Quote: {}'.format(finding.quote))
72+
except AttributeError:
73+
pass
74+
print('Info type: {}'.format(finding.info_type.name))
75+
print('Likelihood: {}'.format(finding.likelihood))
76+
else:
77+
print('No findings.')
78+
# [END inspect_string]
79+
80+
81+
if __name__ == '__main__':
82+
inspect_string("I'm Gary and my email is gary@example.com", ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"])

0 commit comments

Comments
 (0)