Share via


Copy an object from Amazon S3 to Azure Blob Storage

1. Introduction: StartCopyFromBlob

The Microsoft Azure Storage team has introduced an improvement in asynchronous copy blob https://blogs.msdn.com/b/windowsazurestorage/archive/2012/06/12/introducing-asynchronous-cross-account-copy-blob.aspx

Making copy asynchronous is a major change that greatly differs from previous versions. Previously, the Azure Blob service returns a successful response back to the user only when the copy operation has completed. With new version, the Azure Blob service will instead schedule the copy operation to be completed asynchronously: a success response only indicates that the copy operation has been successfully scheduled. In our example we can focus on  Block Blob only. Asynchronous Blob copy is implemented through StartCopyFromBlob, a method of CloudBlockBlob class belonging to the library Microsoft.WindowsAzure.Storage.Blob

When you call StartCopyFromBlob operation on a blob, Blob Storage Service puts the copy operation in a queue; since it's an asynchronous operation you wouldn't know when it will be processed.

To track the progress of the copy can be done a polling the property CopyState on CloudBlockBlob.

CopyState retuns one of values:

Aborted: The copy operation has been aborted.
Failed: The copy operation encountered an error.
Invalid: The copy status is invalid.
Pending: The copy operation is pending.
Success: The copy operation succeeded.

With asynchronous copy, we can copy content from Amazon S3 to Azure blob basically with the logic:

  1. get the link of the Amazon S3 source container.
  2. set a StartCopyFromBloboperation to a destination Azure blob container.
  3. track the copy status; if it has failed or has been aborted, start a new copy operation.

 

2. Copy from Amazon S3 to Azure Block Blob in .NET

A blog post has been written on how to copy from Amazon S3 to Azure blob in .NET. https://gauravmantri.com/2012/06/14/how-to-copy-an-object-from-amazon-s3-to-windows-azure-blob-storage-using-copy-blob/

I use roughly the same code running in Visual Studio C# console application, uptodate to run on current Azure storage .NET SDK.

using System;

using System.Collections.Generic;

using System.Linq;

using System.Text;

using Microsoft.WindowsAzure.Storage;

using Microsoft.WindowsAzure.Storage.Auth;

using Microsoft.WindowsAzure.Storage.Blob;

using Microsoft.WindowsAzure.Storage.Blob.Protocol;

using System.IO;

using System.Configuration;

namespace AzureBlobStorage01

{

    class Program

    {

// amazonObjUrl : it is the Amazon S3 source URL

        // azureStorageAccoutName: it is the name of destination Azure storage account

        // azureStorageAccoutName: it is the key of destination Azure storage account

        // azureBlobContainerName: it is the name of destination blob container 

        // azureBlobContainerName: it is the name of destination blob

        privatestaticreadonlystring amazonObjectUrl =

                  ConfigurationManager.AppSettings["AmazonSourceObjectURL"];

        privatestaticreadonlystring azureStorageAccountName =

                   ConfigurationManager.AppSettings["DestinationStorageAccountName"];   

        privatestaticreadonlystring azureStorageAccountKey =

                    ConfigurationManager.AppSettings["DestinationStorageAccountKey"];     

        privatestaticreadonlystring azureBlobContainerName =

                    ConfigurationManager.AppSettings["DestinationBlobContainerName"];

        privatestaticreadonlystring azureBlobName =

                    ConfigurationManager.AppSettings["DestinationBlobName"];

        staticvoid Main(string[] args)

        {

            CloudStorageAccount storageAccount =

                      new CloudStorageAccount(new StorageCredentials(azureStorageAccountName, azureStorageAccountKey), true);

            CloudBlobClient cloudBlobClient = storageAccount.CreateCloudBlobClient();

            CloudBlobContainer blobContainer = cloudBlobClient.GetContainerReference(azureBlobContainerName);

            Console.WriteLine("Trying to create the blob container....");

            blobContainer.CreateIfNotExists();

            Console.WriteLine("Blob container created successfully");

            Console.WriteLine("------------------------------------");

            CloudBlockBlob blockBlob = blobContainer.GetBlockBlobReference(azureBlobName);

            Console.WriteLine("Created a reference for block blob in Windows Azure....");

            Console.WriteLine("Blob Uri: " + blockBlob.Uri.AbsoluteUri);

            Console.WriteLine("Now trying to initiate copy....");

            blockBlob.StartCopyFromBlob(new Uri(amazonObjectUrl), null, null, null);

            Console.WriteLine("Copy started....");

            Console.WriteLine("Now tracking blob's copy progress....");

            DateTime startTime = DateTime.UtcNow;

            bool continueLoop = true;

            while (continueLoop)

            {

                Console.WriteLine("");

                Console.WriteLine("Fetching lists of blobs in Azure blob container....");

                IEnumerable<IListBlobItem> blobsList = blobContainer.ListBlobs(null, true, BlobListingDetails.Copy);

                foreach (var blob in blobsList)

                {

                    var tempBlockBlob = (CloudBlockBlob)blob;

                    var destBlob = blob as CloudBlockBlob;

                    if (tempBlockBlob.Name == azureBlobName)

                    {

                        var copyStatus = tempBlockBlob.CopyState;

                        if (copyStatus != null)

                        {

                            Console.WriteLine("Status of blob copy...." + copyStatus.Status);

                            float percentComplete = 100*

                                float.Parse(copyStatus.BytesCopied.ToString()) /

                                float.Parse(copyStatus.TotalBytes.ToString());

                            Console.WriteLine("Total bytes to copy...." + copyStatus.TotalBytes);

                            Console.WriteLine("Total bytes copied....." + copyStatus.BytesCopied);

                            Console.WriteLine("Perc. byte copied......{0:N1}", percentComplete);

                            if (copyStatus.Status != CopyStatus.Pending)

                            {

                                continueLoop = false;

                            }

                        }

                    }

                }

                Console.WriteLine("");

                Console.WriteLine("==============================================");

                System.Threading.Thread.Sleep(1000);

            }

            DateTime endTime = DateTime.UtcNow;

            TimeSpan diffTime = endTime - startTime;

            Console.ForegroundColor = ConsoleColor.Yellow;

            Console.WriteLine("time transfer (D HH:mm:ss): " +

                  diffTime.Days + " " + diffTime.Hours + ":" +

                  diffTime.Minutes + ":" + diffTime.Seconds);

            Console.ResetColor();

            Console.WriteLine("Press any key to terminate the program....");

            Console.ReadLine();

        }

    }

}

 

The content of App.config file In Visual Studio project is shown below:

 

<?xmlversion="1.0"encoding="utf-8" ?>

<configuration>

  <startup>

    <supportedRuntimeversion="v4.0"sku=".NETFramework,Version=v4.5" />

  </startup>

  <appSettings>

    <add key="AmazonsourceObjectURL"value="https://AMAZON_URL"/>

    <add key="DestinationStorageAccountName"value="AZURE_DESTINATION_STORAGE_ACCOUNT_NAME"/>

    <add key="DestinationStorageAccountKey"value="AZURE_DESTINATION_STORAGE_ACCOUNT_KEY"/>

    <add key="DestinationBlobContainerName"value="AZURE_BLOB_CONTAINER_NAME"/>

    <add key="DestinationBlobName"value="BLOB_NAME"/>

  </appSettings>

</configuration>

 

Before running the project set the right values in Visual Studio App.config file.

To compile and run the program you need to reference the Windows Azure Storage Library for .NET available on NuGet: https://www.nuget.org/packages/WindowsAzure.Storage/

Azure Storage library can be installed easily through Visual Studio Package Management; from Visual Studio open TOOLS -> NuGet Package Manager -> Package Manager Console

 Figure1 Visual Studio menu navigation to access to NuGet Package Manager 

In the windows below is shown the Visual Studio Package Management (PM) Console

Figure2 Visual Studio Package Management Console

 

In Package Management (PM) Console write the command:

PM> Install-Package WindowsAzure.Storage

NuGet install following libraries (from log):
Attempting to resolve dependency 'Microsoft.Data.OData (≥ 5.6.0)'.
Attempting to resolve dependency 'System.Spatial (= 5.6.0)'.
Attempting to resolve dependency 'Microsoft.Data.Edm (= 5.6.0)'.
Attempting to resolve dependency 'Newtonsoft.Json (≥ 5.0.6)'.
Attempting to resolve dependency 'Microsoft.Data.Services.Client (≥ 5.6.0)'.
Attempting to resolve dependency 'Microsoft.WindowsAzure.ConfigurationManager (≥ 1.8.0.0)'.

.........

The list of all included libraries included through NuGet is reported in Figure 3

Figure3 list of libraries included through NuGet

 

To read the pairs (key, value) in App.config we need to add to the Visual Studio project the system library: System.Configuration

 

3. Copy from Amazon S3 to Azure Block Blob in Java

Microsoft has published the Windows Azure Storage SDK for Java at GitHub https://github.com/Azure/azure-storage-java and the related documentation is available at the link :https://dl.windowsazure.com/storage/javadoc

Using the Windows Azure Storage SDK for Java we can port the code from.NET (see code above) to Java:

 

// This java program copies the content from Amazon S3 to Azure Storage block blob

// Before running fill up the right input values for:

// azureStorageAccountName : Name of the Azure Storage Account

// azureStorageAccountKey : Primary Key of the Azure Storage Account

// azureBlobContainerName : Name of Azure blob destination container name

// amazonObjectUrl : URL of input file

// azureBlobName : Name of destination blob in Azure Storage Account

package test2;

import com.microsoft.windowsazure.storage.*;

import com.microsoft.windowsazure.storage.blob.*;

import java.net.URISyntaxException;

import java.security.InvalidKeyException;

import java.io.FileNotFoundException;

import java.io.IOException;

import java.net.URI;

import java.util.*;

 

publicclassS3toBlob {

  privatestatic final String amazonObjectUrl = "https://AMAZON_URL/";

  privatestatic final String azureStorageAccountName = "AZURE_DESTINATION_STORAGE_ACCOUNT_NAME";

  privatestatic final String azureStorageAccountKey = "AZURE_DESTINATION_STORAGE_ACCOUNT_KEY";

  privatestatic final String azureBlobContainerName = "AZURE_BLOB_CONTAINER_NAME";

  privatestatic final String azureBlobName = "AZURE_BLOB_NAME";

  privatestatic String storageConnectionString ;

   

  publicstaticvoid main(String[] args) throws

      URISyntaxException,

      StorageException,

      InvalidKeyException,FileNotFoundException,

      IOException {

      try

      {

           //Define the connection-string with your values

           StringBuilder _sb = new StringBuilder();

           _sb.append("DefaultEndpointsProtocol=http;").append("AccountName=");

           _sb.append(azureStorageAccountName);

           _sb.append(";");

           _sb.append("AccountKey=");

           _sb.append(azureStorageAccountKey);

           storageConnectionString = _sb.toString();

                                 

           // Retrieve storage account from connection-string

           // Retrieve storage account from connection-string

           CloudStorageAccount storageAccount = CloudStorageAccount.parse(storageConnectionString);

           CloudBlobClient blobClient = storageAccount.createCloudBlobClient();

           CloudBlobContainer container = blobClient.getContainerReference(azureBlobContainerName);

 

           // Create the container if it does not exist

           container.createIfNotExists();

           CloudBlockBlob blockBlob = container.getBlockBlobReference(azureBlobName);

           System.out.println("Created a reference for block blob in Windows Azure....");

           System.out.println("Blob Uri: " + blockBlob.getUri());

           System.out.println("Now trying to initiate copy....");

                    

           OperationContext op = new OperationContext();

           blockBlob.startCopyFromBlob(new URI(amazonObjectUrl), null, null, null,op);

                    

           System.out.println("Copy started....");

           System.out.println("Now tracking blob's copy progress....");

           Date startTime = new Date();

 

           boolean continueLoop = true;

             

           while (continueLoop)

           {

           System.out.println("");

           System.out.println("Fetching lists of blobs in Azure blob container....");

          Iterable<ListBlobItem> blobsList = container.listBlobs(null, true, EnumSet.of(BlobListingDetails.COPY), null, op);

          for (ListBlobItem blob : blobsList)

                {

          CloudBlockBlob tempBlockBlob = (CloudBlockBlob) blob;           

          System.out.println("Name blockblob in cast: "+ tempBlockBlob.getName().toString());

          System.out.println("URI : "+ tempBlockBlob.getUri().toString());

                   if ((tempBlockBlob.getName()).equals(azureBlobName))

                   {

                      CopyState copyStatus = (CopyState) tempBlockBlob.getCopyState();

         

                      System.out.println("Getting CopyState......");

                      if (tempBlockBlob != null)

                      {

                      System.out.println("Status blob copy......" + tempBlockBlob.getCopyState().getStatus().toString());

                      System.out.println("Total bytes..........." + tempBlockBlob.getCopyState().getTotalBytes());

                      System.out.println("Total bytes to copy..." + tempBlockBlob.getCopyState().getBytesCopied());

                      float percentComplete = 100*

                                ( tempBlockBlob.getCopyState().getBytesCopied()).floatValue() /

                                ( tempBlockBlob.getCopyState().getTotalBytes()).floatValue();

              System.out.print("Perc. byte copied......");

                      System.out.format("%.2f\n", percentComplete);

                       

                          if (copyStatus.getStatus() != CopyStatus.PENDING)

                          {

                              continueLoop = false;

                          }

                      }

             }

              }

              System.out.println("==============================================");

              Thread.sleep(1000);

          }

          Date endTime = new Date();

          long diffTime = endTime.getTime() - startTime.getTime();

          long diffSeconds = diffTime / 1000 % 60;

          long diffMinutes = diffTime / (60 * 1000) % 60;

          long diffHours = diffTime / (60 * 60 * 1000) % 24;

          long diffDays = diffTime / (24 * 60 * 60 * 1000);

 

          System.out.println("time transfer (D HH:mm:ss): "+

                   diffDays + " " +

                   diffHours +":"+

                   diffMinutes+":"+

                   diffSeconds);

          System.out.println("Press any key to terminate the program....");

          System.in.read();

        } catch (StorageException storageException) {

                   System.out.print("StorageException encountered: ");

                   System.out.println(storageException.getMessage());

                   System.exit(-1);

       } catch (URISyntaxException uriSyntaxException) {

                   System.out.print("URISyntaxException encountered: ");

                   System.out.println(uriSyntaxException.getMessage());

                   System.exit(-1);

        } catch (Exception e) {

                   System.out.print("Exception encountered: ");

                   System.out.println(e.getMessage());

                   System.exit(-1);

       }

  }

}