chore: update S3 snippets (windmill-labs#449)

gbouv · web-flow · commit edb6ddecf07d · 2023-12-22T16:28:16.000+01:00
diff --git a/blog/2023-11-24-data-pipeline-orchestrator/index.mdx b/blog/2023-11-24-data-pipeline-orchestrator/index.mdx
@@ -158,16 +158,22 @@ And more to come! With both Windmill providing the boilerplate code, and Polars
 In the end, a canonical pipeline step in Windmill will look something like this:
 
 ```python
+#requirements:
+#polars==0.19.19
+#s3fs==2023.12.0
+#wmill>=1.229.0
+
 import polars as pl
 import s3fs
 import datetime
 import wmill
 
-s3object = dict
-def main(input_dataset: s3object):
+
+def main(input_dataset: S3Object):
     # initialization: connect Polars to the workspace bucket
     s3_resource = wmill.get_resource("/path/to/resource")
-    s3 = s3fs.S3FileSystem(wmill.polars_connection_settings("/path/to/resource")["s3fs_args"])
+    s3fs_args = wmill.polars_connection_settings().s3fs_args
+    s3 = s3fs.S3FileSystem(**s3fs_args)
 
     # reading data from s3:
     bucket = s3_resource["bucket"]
@@ -194,9 +200,7 @@ def main(input_dataset: s3object):
         output.write_parquet(output_dataset)
 
     # returning the URI of the output for next steps to process it
-    return s3object({
-        "s3": output_dataset_uri
-    })
+    return S3Object(s3=output_dataset_uri)
 ```
 
 The example uses Polars. If you're more into SQL you can use DuckDB, but the code will have the same structure: initialization, reading from S3, transforming, writing back to S3.
diff --git a/docs/core_concepts/11_persistent_storage/index.mdx b/docs/core_concepts/11_persistent_storage/index.mdx
@@ -333,18 +333,21 @@ Then from Windmill, just [fill the S3 resource type](../../integrations/s3.md).
 <TabItem value="deno" label="TypeScript (Deno)" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
 
 ```ts
-import * as wmill from 'npm:windmill-client@1';
+import type { S3Object } from 'npm:windmill-client@^1.229.0';
+import * as wmill from 'npm:windmill-client@^1.229.0';
 import { S3Client } from 'https://deno.land/x/s3_lite_client@0.2.0/mod.ts';
 
-type s3object = object;
+export async function main(inputFile: S3Object) {
+	// this will default to the workspace s3 resource
+	let args = await wmill.denoS3LightClientSettings();
+	// this will use the designated resource
+	// let args = await wmill.denoS3LightClientSettings("<PATH_TO_S3_RESOURCE>");
+	const s3Client = new S3Client(args);
 
-export async function main(inputFile: s3object) {
-	const s3Resource = await wmill.getResource('<PATH_TO_S3_RESOURCE>');
-	const s3Client = new S3Client(s3Resource);
 	const outputFile = 'output/hello.txt';
 
 	// read object from S3
-	const getObjectResponse = await s3Client.getObject(inputFile['s3']);
+	const getObjectResponse = await s3Client.getObject(inputFile.s3);
 	const inputObjContent = await getObjectResponse.text();
 	console.log(inputObjContent);
 
@@ -356,31 +359,35 @@ export async function main(inputFile: s3object) {
 		console.log(obj.key);
 	}
 
-	return {
+	const result: S3Object = {
 		s3: outputFile
 	};
+	return result;
 }
 ```
 
 </TabItem>
 <TabItem value="python" label="Python" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
 
 ```python
+#requirements:
+#boto3==1.34.4
+#wmill>=1.229.0
+
 import wmill
+from wmill import S3Object
 import boto3
 
-s3object = dict
 
+def main(input_file: S3Object):
+    bucket = wmill.get_resource("<PATH_TO_S3_RESOURCE>")["bucket"]
+
+    # this will default to the workspace s3 resource
+    args = wmill.boto3_connection_settings()
+    # this will use the designated resource
+    # args = wmill.boto3_connection_settings("<PATH_TO_S3_RESOURCE>")
+    s3client = boto3.client("s3", **args)
 
-def main(input_file: s3object):
-    s3_resource = wmill.get_resource("<PATH_TO_S3_RESOURCE>")
-    bucket = s3_resource["bucket"]
-    s3client = boto3.client(
-        "s3",
-        region_name=s3_resource["region"],
-        aws_access_key_id=s3_resource["accessKey"],
-        aws_secret_access_key=s3_resource["secretKey"],
-    )
     output_file = "output/hello.txt"
 
     # read object from S3 and print its content
@@ -406,10 +413,9 @@ def main(input_file: s3object):
     # see https://boto3.amazonaws.com/v1/documentation/api/latest/guide/s3-examples.html
     # and https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html
     # for more code examples (listing object, deleting files, etc)
-
     return [
-        s3object({"s3": output_file}),
-        s3object({"s3": uploaded_file}),
+        S3Object(s3=output_file),
+        S3Object(s3=uploaded_file),
     ]
 ```
 
@@ -426,22 +432,26 @@ You can link a Windmill workspace to an S3 bucket and use it as source and/or ta
 <TabItem value="polars" label="Polars" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
 
 ```python
+#requirements:
+#polars==0.19.19
+#s3fs==2023.12.0
+#wmill>=1.229.0
+
 import wmill
+from wmill import S3Object
 import polars as pl
 import s3fs
 
-s3object = dict
 
+def main(input_file: S3Object):
+    bucket = wmill.get_resource("u/admin/windmill-cloud-demo")["bucket"]
 
-def main(input_file: s3object):
-    s3 = s3fs.S3FileSystem(
-        # this will default to the workspace s3 resource
-        **wmill.polars_connection_settings()["s3fs_args"]
-        # this will use the designated resource
-        # **wmill.polars_connection_settings("<PATH_TO_S3_RESOURCE>")["s3fs_args"]
-    )
+    # this will default to the workspace s3 resource
+    args = wmill.polars_connection_settings().s3fs_args
+    # this will use the designated resource
+    # args = wmill.polars_connection_settings("<PATH_TO_S3_RESOURCE>").s3fs_args
+    s3 = s3fs.S3FileSystem(**args)
 
-    bucket = "<S3_BUCKET_NAME>"
     input_uri = "s3://{}/{}".format(bucket, input_file["s3"])
     output_file = "output/result.parquet"
     output_uri = "s3://{}/{}".format(bucket, output_file)
@@ -463,29 +473,38 @@ def main(input_file: s3object):
 
         # persist the output dataframe back to S3 and return it
         output_df.write_parquet(output_s3)
-    return s3object({"s3": output_file})
+
+    return S3Object(s3=output_file)
 ```
 
 </TabItem>
 <TabItem value="duckdb" label="DuckDB" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
 
 ```python
+#requirements:
+#wmill>=1.229.0
+#duckdb==0.9.1
+
 import wmill
+from wmill import S3Object
 import duckdb
 
-s3object = dict
 
+def main(input_file: S3Object):
+    bucket = wmill.get_resource("u/admin/windmill-cloud-demo")["bucket"]
 
-def main(input_file: s3object):
     # create a DuckDB database in memory
     # see https://duckdb.org/docs/api/python/dbapi
     conn = duckdb.connect()
-    # connect duck db to the S3 bucket - this will default to the workspace s3 resource
-    conn.execute(wmill.duckdb_connection_settings()["connection_settings_str"])
+
+    # this will default to the workspace s3 resource
+    args = wmill.duckdb_connection_settings().connection_settings_str
     # this will use the designated resource
-    # conn.execute(wmill.duckdb_connection_settings("<PATH_TO_S3_RESOURCE>")["connection_settings_str"])
+    # args = wmill.duckdb_connection_settings("<PATH_TO_S3_RESOURCE>").connection_settings_str
+
+    # connect duck db to the S3 bucket - this will default to the workspace s3 resource
+    conn.execute(args)
 
-    bucket = "<S3_BUCKET_NAME>"
     input_uri = "s3://{}/{}".format(bucket, input_file["s3"])
     output_file = "output/result.parquet"
     output_uri = "s3://{}/{}".format(bucket, output_file)
@@ -512,7 +531,7 @@ def main(input_file: s3object):
     )
 
     conn.close()
-    return s3object({"s3": output_file})
+    return S3Object(s3=output_file)
 ```
 
 </TabItem>