add label_text test

dizcology · dizcology · commit 6e96d54b6e99 · 2019-04-04T12:33:56.000-07:00
diff --git a/datalabeling/label_text.py b/datalabeling/label_text.py
@@ -18,53 +18,58 @@
 
 
 # [START datalabeling_label_text_beta]
-def label_text(dataset_resource_name, instruction_resource_name,
-                             annotation_spec_set_resource_name):
+def label_text(dataset_resource_name, instruction_resource_name, annotation_spec_set_resource_name):
     """Labels a text dataset."""
     from google.cloud import datalabeling_v1beta1 as datalabeling
     client = datalabeling.DataLabelingServiceClient()
 
     basic_config = datalabeling.types.HumanAnnotationConfig(
-            instruction=instruction_resource_name,
-            annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME',
-            label_group='YOUR_LABEL_GROUP',
-            replica_count=1)
+        instruction=instruction_resource_name,
+        annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME',
+        label_group='YOUR_LABEL_GROUP',
+        replica_count=1
+    )
 
     feature = datalabeling.enums.LabelTextRequest.Feature.TEXT_ENTITY_EXTRACTION
 
     text_entity_extraction_config = datalabeling.types.TextEntityExtractionConfig(
-            annotation_spec_set=annotation_spec_set_resource_name)
+        annotation_spec_set=annotation_spec_set_resource_name)
 
     response = client.label_text(
-            dataset_resource_name,
-            basic_config,
-            feature,
-            text_entity_extraction_config=text_entity_extraction_config)
+        dataset_resource_name,
+        basic_config,
+        feature,
+        text_entity_extraction_config=text_entity_extraction_config
+    )
 
     print('Label_text operation name: {}'.format(response.operation.name))
     return response
 # [END datalabeling_label_text_beta]
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(
-            description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
 
     parser.add_argument(
-            '--dataset-resource-name',
-            help='Dataset resource name. Required.',
-            required=True)
+        '--dataset-resource-name',
+        help='Dataset resource name. Required.',
+        required=True
+    )
 
     parser.add_argument(
-            '--instruction-resource-name',
-            help='Instruction resource name. Required.',
-            required=True)
+        '--instruction-resource-name',
+        help='Instruction resource name. Required.',
+        required=True
+    )
 
     parser.add_argument(
-            '--annotation-spec-set-resource-name',
-            help='Annotation spec set resource name. Required.',
-            required=True)
+        '--annotation-spec-set-resource-name',
+        help='Annotation spec set resource name. Required.',
+        required=True
+    )
 
     args = parser.parse_args()
 
-    label_text(args.dataset_resource_name, args.instruction_resource_name,
-                         args.annotation_spec_set_resource_name)
+    label_text(args.dataset_resource_name, args.instruction_resource_name, args.annotation_spec_set_resource_name)
diff --git a/datalabeling/label_text_test.py b/datalabeling/label_text_test.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+# Copyright 2019 Google, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pytest
+
+from google.cloud import datalabeling_v1beta1 as datalabeling
+import create_annotation_spec_set
+import create_instruction
+import import_data
+import label_text
+import manage_dataset
+
+PROJECT_ID = os.getenv('GCLOUD_PROJECT')
+
+
+@pytest.fixture(scope='function')
+def dataset():
+    # create a temporary dataset
+    dataset = manage_dataset.create_dataset(PROJECT_ID)
+
+    # import some data to it
+    import_data.import_data(dataset.name, 'TEXT',
+        'gs://cloud-samples-data/datalabeling/text/text_dataset.csv')
+
+    yield dataset
+
+    # tear down
+    manage_dataset.delete_dataset(dataset.name)
+
+
+@pytest.fixture(scope='function')
+def annotation_spec_set():
+    # create a temporary annotation_spec_set
+    annotation_spec_set = create_annotation_spec_set.create_annotation_spec_set(PROJECT_ID)
+
+    yield annotation_spec_set
+
+    # tear down
+    client = datalabeling.DataLabelingServiceClient()
+    client.delete_annotation_spec_set(annotation_spec_set.name)
+
+
+@pytest.fixture(scope='function')
+def instruction():
+    # create a temporary instruction
+    instruction = create_instruction.create_instruction(
+            PROJECT_ID, 'TEXT',
+            'gs://cloud-samples-data/datalabeling/instruction/test.pdf')
+
+    yield instruction
+
+    # tear down
+    client = datalabeling.DataLabelingServiceClient()
+    client.delete_instruction(instruction.name)
+
+
+# Passing in dataset as the last argument in test_label_text since it needs to be deleted before the annotation_spec_set can be deleted.
+@pytest.mark.slow
+def test_label_text(capsys, annotation_spec_set, instruction, dataset):
+
+    # Start labeling.
+    response = label_text.label_text(dataset.name, instruction.name, annotation_spec_set.name)
+    out, _ = capsys.readouterr()
+    assert 'Label_text operation name: ' in out
+    operation_name = response.operation.name
+
+    # Cancels the labeling operation.
+    response.cancel()
+    assert response.cancelled() == True
+
+    client = datalabeling.DataLabelingServiceClient()
+    cancel_response = client.transport._operations_client.cancel_operation(
+            operation_name)