|
| 1 | +# Copyright 2017 Google Inc. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +from __future__ import print_function |
| 16 | + |
| 17 | + |
| 18 | +# [START inspect_gcs_file] |
| 19 | +def inspect_gcs_file(bucket, filename, info_types=None, min_likelihood=None, |
| 20 | + max_findings=None): |
| 21 | + """Uses the Data Loss Prevention API to analyze a string for protected data. |
| 22 | + Args: |
| 23 | + bucket: The name of the GCS bucket containing the file, as a string. |
| 24 | + filename: The name of the file in the bucket, including the path, as a |
| 25 | + string; e.g. 'images/myfile.png'. |
| 26 | + info_types: A list of strings representing info types to look for. |
| 27 | + A full list of info type categories can be fetched from the API with |
| 28 | + the .list_root_categories(language_code) client method, and a list |
| 29 | + of types in a category with .list_info_types(category, |
| 30 | + language_code). Examples include 'US_MALE_NAME', 'US_FEMALE_NAME', |
| 31 | + 'EMAIL_ADDRESS', 'CANADA_SOCIAL_INSURANCE_NUMBER', 'JAPAN_PASSPORT'. |
| 32 | + If info_types is omitted, the API will use a limited default set. |
| 33 | + min_likelihood: A string representing the minimum likelihood threshold |
| 34 | + that constitutes a match. One of: 'LIKELIHOOD_UNSPECIFIED', |
| 35 | + 'VERY_UNLIKELY', 'UNLIKELY', 'POSSIBLE', 'LIKELY', 'VERY_LIKELY'. |
| 36 | + max_findings: The maximum number of findings to report; 0 = no maximum. |
| 37 | + Returns: |
| 38 | + None; the response from the API is printed to the terminal. |
| 39 | + """ |
| 40 | + |
| 41 | + # Import the client library |
| 42 | + import google.cloud.dlp |
| 43 | + |
| 44 | + # Instantiate a client. |
| 45 | + dlp = google.cloud.dlp.DlpServiceClient() |
| 46 | + |
| 47 | + # Prepare info_type by converting the list of strings into a list of |
| 48 | + # dictionaries (protos are also accepted). |
| 49 | + if info_types is not None: |
| 50 | + info_types = [{'name': info_type} for info_type in info_types] |
| 51 | + |
| 52 | + # Construct the configuration dictionary. Keys which are None may |
| 53 | + # optionally be omitted entirely. |
| 54 | + inspect_config = { |
| 55 | + 'info_types': info_types, |
| 56 | + 'min_likelihood': min_likelihood, |
| 57 | + 'max_findings': max_findings, |
| 58 | + } |
| 59 | + |
| 60 | + # Construct a cloud_storage_options dictionary with the file's URL. |
| 61 | + url = 'gs://{}/{}'.format(bucket, filename) |
| 62 | + storage_config = {'cloud_storage_options': |
| 63 | + {'file_set': |
| 64 | + {'url': url} |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + operation = dlp.create_inspect_operation(inspect_config, storage_config, |
| 69 | + None) |
| 70 | + |
| 71 | + # Get the operation result name, which can be used to look up the full |
| 72 | + # results. This call blocks until the operation is complete; to avoid |
| 73 | + # blocking, use operation.add_done_callback(fn) instead. |
| 74 | + operation_result = operation.result() |
| 75 | + |
| 76 | + response = dlp.list_inspect_findings(operation_result.name) |
| 77 | + |
| 78 | + # TODO DO NOT SUBMIT: haven't successfully gotten results object so not sure this is correct |
| 79 | + if response.result.findings: |
| 80 | + for finding in response.result.findings: |
| 81 | + try: |
| 82 | + print('Quote: {}'.format(finding.quote)) |
| 83 | + except AttributeError: |
| 84 | + pass |
| 85 | + print('Info type: {}'.format(finding.info_type.name)) |
| 86 | + print('Likelihood: {}'.format(finding.likelihood)) |
| 87 | + else: |
| 88 | + print('No findings.') |
| 89 | +# [END inspect_gcs_file] |
| 90 | + |
| 91 | +if __name__ == '__main__': |
| 92 | + inspect_gcs_file('andrewsg-test', 'wQOVLom8Gsa.png', ["EMAIL_ADDRESS", "US_MALE_NAME", "US_FEMALE_NAME"]) |
0 commit comments