suse-coder
diff --git a/‎docs/core_concepts/11_persistent_storage/index.mdx
Lines changed: 262 additions & 3 deletions b/‎docs/core_concepts/11_persistent_storage/index.mdx
Lines changed: 262 additions & 3 deletions
diff --git a/‎docs/core_concepts/18_files_binary_data/index.mdx
Lines changed: 2 additions & 2 deletions b/‎docs/core_concepts/18_files_binary_data/index.mdx
Lines changed: 2 additions & 2 deletions
@@ -238,7 +238,7 @@ For best performance, [install MinIO locally](https://min.io/docs/minio/kubernet
 
 [MinIO](https://min.io/) is an open-source, high-performance, and scalable object storage server that is compatible with Amazon S3 APIs, designed for building private and public cloud storage solutions.
 
-Then from Windmill, just [fill the S3 resource type](../../integrations/s3.md).
+Then from Windmill, just [fill the S3 resource type](../../integrations/s3.mdx).
 
 #### Azure Blob
 
@@ -254,7 +254,7 @@ Then from Windmill, just [fill the S3 resource type](../../integrations/s3.md).
 
 ### Connect your Windmill workspace to your S3 bucket or your Azure Blob storage
 
-Once you've created an [S3 or Azure Blob resource](../../integrations/s3.md) in Windmill, go to the workspace settings > S3 Storage. Select the resource and click Save.
+Once you've created an [S3 or Azure Blob resource](../../integrations/s3.mdx) in Windmill, go to the workspace settings > S3 Storage. Select the resource and click Save.
 
 ![S3 storage workspace settings](./workspace_settings.png)
 
@@ -275,7 +275,266 @@ When a script outputs a S3 file, it can be downloaded or previewed directly in W
 
 ![S3 file download](./file_download.png)
 
-For more info on how to use files and S3 files in Windmill, see [Handling files and binary data](/docs/core_concepts/files_binary_data)
+#### Read a file from S3 within a script
+
+<Tabs className="unique-tabs">
+
+<TabItem value="bun" label="TypeScript (Bun)" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```ts
+import * as wmill from 'windmill-client';
+import { S3Object } from 'windmill-client';
+
+export async function main(input_file: S3Object) {
+	// Load the entire file_content as a Uint8Array
+	const file_content = await wmill.loadS3File(input_file);
+
+	const decoder = new TextDecoder();
+	const file_content_str = decoder.decode(file_content);
+	console.log(file_content_str);
+
+	// Or load the file lazily as a Blob
+	let fileContentBlob = await wmill.loadS3FileStream(inputFile);
+	console.log(await fileContentBlob.text());
+}
+```
+
+</TabItem>
+
+<TabItem value="deno" label="TypeScript (Deno)" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```ts
+import * as wmill from 'npm:windmill-client@1.253.7';
+import S3Object from 'npm:windmill-client@1.253.7';
+
+export async function main(input_file: S3Object) {
+	// Load the entire file_content as a Uint8Array
+	const file_content = await wmill.loadS3File(input_file);
+
+	const decoder = new TextDecoder();
+	const file_content_str = decoder.decode(file_content);
+	console.log(file_content_str);
+
+	// Or load the file lazily as a Blob
+	let fileContentBlob = await wmill.loadS3FileStream(inputFile);
+	console.log(await fileContentBlob.text());
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```python
+#requirements:
+#wmill>=1.251.7
+import wmill
+from wmill import S3Object
+
+def main(input_file: S3Object):
+    # Load the entire file_content as a bytes array
+    file_content = wmill.load_s3_file(input_file)
+    print(file_content.decode('utf-8'))
+
+    # Or load the file lazily as a Buffered reader:
+    with wmill.load_s3_file_reader(input_file) as file_reader:
+        print(file_reader.read())
+```
+
+</TabItem>
+</Tabs>
+
+![Read S3 file](../18_files_binary_data/s3_file_input.png)
+
+#### Create a file in S3 within a script
+
+<Tabs className="unique-tabs">
+
+<TabItem value="bun" label="TypeScript (Bun)" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```ts
+import * as wmill from 'windmill-client';
+import { S3Object } from 'windmill-client';
+
+export async function main(s3_file_path: string) {
+	const s3_file_output: S3Object = {
+		s3: s3_file_path
+	};
+
+	const file_content = 'Hello Windmill!';
+	// file_content can be either a string or ReadableStream<Uint8Array>
+	await wmill.writeS3File(s3_file_output, file_content);
+	return s3_file_output;
+}
+```
+
+</TabItem>
+
+<TabItem value="deno" label="TypeScript (Deno)" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```ts
+import * as wmill from 'npm:windmill-client@1.253.7';
+import S3Object from 'npm:windmill-client@1.253.7';
+
+export async function main(s3_file_path: string) {
+	const s3_file_output: S3Object = {
+		s3: s3_file_path
+	};
+
+	const file_content = 'Hello Windmill!';
+	// file_content can be either a string or ReadableStream<Uint8Array>
+	await wmill.writeS3File(s3_file_output, file_content);
+	return s3_file_output;
+}
+```
+
+</TabItem>
+
+<TabItem value="python" label="Python" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```python
+#requirements:
+#wmill>=1.251.7
+import wmill
+from wmill import S3Object
+
+def main(s3_file_path: str):
+    s3_file_output = S3Object(s3=s3_file_path)
+
+    file_content = b"Hello Windmill!"
+	# file_content can be either bytes or a BufferedReader
+    file_content = wmill.write_s3_file(s3_file_output, file_content)
+    return s3_file_output
+```
+
+</TabItem>
+</Tabs>
+
+![Write to S3 file](../18_files_binary_data/s3_file_output.png)
+
+:::info
+Certain file types, typically parquet files, can be directly rendered by Windmill
+:::
+
+For more info on how to use files and S3 files in Windmill, see [Handling files and binary data](/docs/core_concepts/files_binary_data).
+
+### Windmill embedded integration with Polars and DuckDB for data pipelines
+
+ETLs can be easily implemented in Windmill using its integration with Polars and DuckDB for facilitate working with tabular data. In this case, you don't need to manually interact with the S3 bucket, Polars/DuckDB does it natively and in a efficient way. Reading and Writing datasets to S3 can be done seamlessly.
+
+<Tabs className="unique-tabs">
+<TabItem value="polars" label="Polars" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```python
+#requirements:
+#polars==0.20.2
+#s3fs==2023.12.0
+#wmill>=1.229.0
+
+import wmill
+from wmill import S3Object
+import polars as pl
+import s3fs
+
+
+def main(input_file: S3Object):
+    bucket = wmill.get_resource("<PATH_TO_S3_RESOURCE>")["bucket"]
+
+    # this will default to the workspace s3 resource
+    storage_options = wmill.polars_connection_settings().storage_options
+    # this will use the designated resource
+    # storage_options = wmill.polars_connection_settings("<PATH_TO_S3_RESOURCE>").storage_options
+
+    # input is a parquet file, we use read_parquet in lazy mode.
+    # Polars can read various file types, see
+    # https://pola-rs.github.io/polars/py-polars/html/reference/io.html
+    input_uri = "s3://{}/{}".format(bucket, input_file["s3"])
+    input_df = pl.read_parquet(input_uri, storage_options=storage_options).lazy()
+
+    # process the Polars dataframe. See Polars docs:
+    # for dataframe: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/index.html
+    # for lazy dataframe: https://pola-rs.github.io/polars/py-polars/html/reference/lazyframe/index.html
+    output_df = input_df.collect()
+    print(output_df)
+
+    # To write back the result to S3, Polars needs an s3fs connection
+    s3 = s3fs.S3FileSystem(**wmill.polars_connection_settings().s3fs_args)
+    output_file = "output/result.parquet"
+    output_uri = "s3://{}/{}".format(bucket, output_file)
+    with s3.open(output_uri, mode="wb") as output_s3:
+        # persist the output dataframe back to S3 and return it
+        output_df.write_parquet(output_s3)
+
+    return S3Object(s3=output_file)
+```
+
+</TabItem>
+<TabItem value="duckdb" label="DuckDB" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
+
+```python
+#requirements:
+#wmill>=1.229.0
+#duckdb==0.9.1
+
+import wmill
+from wmill import S3Object
+import duckdb
+
+
+def main(input_file: S3Object):
+    bucket = wmill.get_resource("u/admin/windmill-cloud-demo")["bucket"]
+
+    # create a DuckDB database in memory
+    # see https://duckdb.org/docs/api/python/dbapi
+    conn = duckdb.connect()
+
+    # this will default to the workspace s3 resource
+    args = wmill.duckdb_connection_settings().connection_settings_str
+    # this will use the designated resource
+    # args = wmill.duckdb_connection_settings("<PATH_TO_S3_RESOURCE>").connection_settings_str
+
+    # connect duck db to the S3 bucket - this will default to the workspace s3 resource
+    conn.execute(args)
+
+    input_uri = "s3://{}/{}".format(bucket, input_file["s3"])
+    output_file = "output/result.parquet"
+    output_uri = "s3://{}/{}".format(bucket, output_file)
+
+    # Run queries directly on the parquet file
+    query_result = conn.sql(
+        """
+        SELECT * FROM read_parquet('{}')
+    """.format(
+            input_uri
+        )
+    )
+    query_result.show()
+
+    # Write the result of a query to a different parquet file on S3
+    conn.execute(
+        """
+        COPY (
+            SELECT COUNT(*) FROM read_parquet('{input_uri}')
+        ) TO '{output_uri}' (FORMAT 'parquet');
+    """.format(
+            input_uri=input_uri, output_uri=output_uri
+        )
+    )
+
+    conn.close()
+    return S3Object(s3=output_file)
+```
+
+</TabItem>
+</Tabs>
+
+:::info
+
+Polars and DuckDB need to be configured to access S3 within the Windmill script. The job will need to accessed the S3 resources, which either needs to be accessible to the user running the job, or the S3 resource needs to be [set as public in the workspace settings](/docs/core_concepts/persistent_storage#connect-your-windmill-workspace-to-your-s3-bucket-or-your-azure-blob-storage).
+
+:::
+
+For more info on how Data Pipelines in Windmill, see [Data Pipelines](../27_data_pipelines/index.mdx).
 
 ## Structured Databases: Postgres (Supabase, Neon.tech)
 
 
@@ -193,7 +193,7 @@ Certain file types, typically parquet files, can be directly rendered by Windmil
 
 ### Windmill embedded integration with Polars and DuckDB for data pipelines
 
-ETL can be easily implemented in Windmill using its integration with Polars and DuckDB for facilitate working with tabular data. In this case, you don't need to manually interact with the S3 bucket, Polars/DuckDB does it natively and in a efficient way. Reading and Writing datasets to S3 can be done seamlessly.
+ETLs can be easily implemented in Windmill using its integration with Polars and DuckDB for facilitate working with tabular data. In this case, you don't need to manually interact with the S3 bucket, Polars/DuckDB does it natively and in a efficient way. Reading and Writing datasets to S3 can be done seamlessly.
 
 <Tabs className="unique-tabs">
 <TabItem value="polars" label="Polars" attributes={{className: "text-xs p-4 !mt-0 !ml-0"}}>
@@ -303,7 +303,7 @@ def main(input_file: S3Object):
 
 :::info
 
-Polars and DuckDB needs to be configured to access S3 within the Windmill script. The job will need to accessed the S3 resources, which either needs to be accessible to the user running the job, or the S3 resource needs to be [set as public in the workspace settings](/docs/core_concepts/persistent_storage#connect-your-windmill-workspace-to-your-s3-bucket-or-your-azure-blob-storage).
+Polars and DuckDB need to be configured to access S3 within the Windmill script. The job will need to accessed the S3 resources, which either needs to be accessible to the user running the job, or the S3 resource needs to be [set as public in the workspace settings](/docs/core_concepts/persistent_storage#connect-your-windmill-workspace-to-your-s3-bucket-or-your-azure-blob-storage).
 
 :::