Skip to content

Commit f8720a4

Browse files
committed
update import data and test
1 parent 0bc6269 commit f8720a4

File tree

4 files changed

+60
-43
lines changed

4 files changed

+60
-43
lines changed

datalabeling/export_data.py

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,45 +19,51 @@
1919

2020
# [START datalabeling_export_data_beta]
2121
def export_data(dataset_resource_name, annotated_dataset_resource_name,
22-
export_gcs_uri):
22+
export_gcs_uri):
2323
"""Exports a dataset from the given Google Cloud project."""
2424
from google.cloud import datalabeling_v1beta1 as datalabeling
2525
client = datalabeling.DataLabelingServiceClient()
2626

2727
gcs_destination = datalabeling.types.GcsDestination(
28-
output_uri=export_gcs_uri, mime_type='text/csv')
28+
output_uri=export_gcs_uri, mime_type='text/csv')
2929

3030
output_config = datalabeling.types.OutputConfig(
31-
gcs_destination=gcs_destination)
31+
gcs_destination=gcs_destination)
3232

33-
response = client.export_data(dataset_resource_name,
34-
annotated_dataset_resource_name, output_config)
33+
response = client.export_data(dataset_resource_name, annotated_dataset_resource_name, output_config)
3534

3635
print('Dataset ID: {}\n'.format(response.result().dataset))
3736
print('Output config:')
3837
print('\tGcs destination:')
3938
print('\t\tOutput URI: {}\n'.format(
40-
response.result().output_config.gcs_destination.output_uri))
39+
response.result().output_config.gcs_destination.output_uri))
4140
# [END datalabeling_export_data_beta]
4241

4342
if __name__ == '__main__':
4443
parser = argparse.ArgumentParser(
45-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
44+
description=__doc__,
45+
formatter_class=argparse.RawDescriptionHelpFormatter
46+
)
4647

4748
parser.add_argument(
48-
'--dataset-resource-name',
49-
help='Dataset resource name. Required.',
50-
required=True)
49+
'--dataset-resource-name',
50+
help='Dataset resource name. Required.',
51+
required=True
52+
)
5153

5254
parser.add_argument(
53-
'--annotated-dataset-resource-name',
54-
help='Annotated Dataset resource name. Required.',
55-
required=True)
55+
'--annotated-dataset-resource-name',
56+
help='Annotated Dataset resource name. Required.',
57+
required=True
58+
)
5659

5760
parser.add_argument(
58-
'--export-gcs-uri', help='The export GCS URI. Required.', required=True)
61+
'--export-gcs-uri',
62+
help='The export GCS URI. Required.',
63+
required=True
64+
)
5965

6066
args = parser.parse_args()
6167

62-
export_data(args.dataset_resource_name, args.annotated_dataset_resource_name,
63-
args.export_gcs_uri)
68+
export_data(args.dataset_resource_name,
69+
args.annotated_dataset_resource_name, args.export_gcs_uri)

datalabeling/import_data.py

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,35 +24,44 @@ def import_data(dataset_resource_name, data_type, input_gcs_uri):
2424
client = datalabeling.DataLabelingServiceClient()
2525

2626
gcs_source = datalabeling.types.GcsSource(
27-
input_uri=input_gcs_uri, mime_type='text/csv')
27+
input_uri=input_gcs_uri, mime_type='text/csv')
2828

2929
image_csv_input_config = datalabeling.types.InputConfig(
30-
data_type=data_type, gcs_source=gcs_source)
30+
data_type=data_type, gcs_source=gcs_source)
3131

3232
response = client.import_data(dataset_resource_name, image_csv_input_config)
3333

34+
result = response.result()
35+
3436
# The format of resource name: project_id/{project_id}/datasets/{dataset_id}
35-
print('Dataset resource name: {}\n'.format(response.result().dataset))
37+
print('Dataset resource name: {}\n'.format(result.dataset))
38+
39+
return result
3640
# [END datalabeling_import_data_beta]
3741

3842
if __name__ == '__main__':
3943
parser = argparse.ArgumentParser(
40-
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
44+
description=__doc__,
45+
formatter_class=argparse.RawDescriptionHelpFormatter
46+
)
4147

4248
parser.add_argument(
43-
'--dataset-resource-name',
44-
help='Dataset resource name. Required.',
45-
required=True)
49+
'--dataset-resource-name',
50+
help='Dataset resource name. Required.',
51+
required=True
52+
)
4653

4754
parser.add_argument(
48-
'--data-type',
49-
help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.',
50-
required=True)
55+
'--data-type',
56+
help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.',
57+
required=True
58+
)
5159

5260
parser.add_argument(
53-
'--input-gcs-uri',
54-
help='The GCS URI of the input dataset. Required.',
55-
required=True)
61+
'--input-gcs-uri',
62+
help='The GCS URI of the input dataset. Required.',
63+
required=True
64+
)
5665

5766
args = parser.parse_args()
5867

datalabeling/import_data_test.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,22 @@
2121
import manage_dataset
2222

2323
PROJECT_ID = os.getenv('GCLOUD_PROJECT')
24+
INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv'
2425

2526

26-
@pytest.mark.slow
27-
def test_import_data(capsys):
28-
# Generates a dataset_resource_name.
29-
manage_dataset.create_dataset(PROJECT_ID)
30-
out, _ = capsys.readouterr()
31-
create_dataset_output = out.splitlines()
32-
dataset_resource_name = create_dataset_output[0].split()[4]
27+
@pytest.fixture(scope='function')
28+
def dataset():
29+
# create a temporary dataset
30+
dataset = manage_dataset.create_dataset(PROJECT_ID)
31+
32+
yield dataset
33+
34+
# tear down
35+
manage_dataset.delete_dataset(dataset.name)
3336

34-
# Starts to test the import_data.
35-
import_data.import_data(
36-
dataset_resource_name, 'IMAGE',
37-
'gs://cloud-samples-data/datalabeling/image/image_dataset.csv')
37+
38+
@pytest.mark.slow
39+
def test_import_data(capsys, dataset):
40+
import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI)
3841
out, _ = capsys.readouterr()
3942
assert 'Dataset resource name: ' in out
40-
41-
# Deletes the created dataset.
42-
manage_dataset.delete_dataset(dataset_resource_name)

datalabeling/manage_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def create_dataset(project_id):
3838
print('Create time:')
3939
print('\tseconds: {}'.format(response.create_time.seconds))
4040
print('\tnanos: {}'.format(response.create_time.nanos))
41+
42+
return response
4143
# [END datalabeling_create_dataset_beta]
4244

4345

0 commit comments

Comments
 (0)