ЗАПИСИ
СЕНТЯБРЬ 11, 2014

Hi,

Just a note for myself for fast creating of AWS EMR cluster with Custom JAR job.

  1. #!/bin/bash
  2.  
  3. jobName="MyReportJob"
  4. clusterName="EmrCluster1"
  5. clusterId=""
  6. activeStepStates="—step-states PENDING RUNNING"
  7.  
  8. bucket="s3://emr-test"
  9. region="us-west-2"
  10. instanceType="m1.medium"
  11. instanceNumber=3
  12.  
  13. resdir="resources/"
  14. filesPattern='*.log'
  15.  
  16. jarPath="my-job-1.0-SNAPSHOT-jar-with-dependencies.jar"
  17. jarRename="my-job-1.0-SNAPSHOT-wdeps.jar"
  18. jarRenamePath="/tmp/$jarRename"
  19.  
  20. # Exit script gently
  21. trap "on_trap" SIGHUP SIGINT SIGTERM
  22. function on_trap() {
  23.     echo "Exiting script as of signal…"
  24.     exit 1
  25. }
  26.  
  27. # Run command securely
  28. function do2() {
  29.     echo "Running command: [$*]"
  30.     $*
  31.     res=$?
  32.     if [ $res -gt 0 ]; then
  33.         echo "ERROR: Command execution failed."
  34.         exit $res
  35.     fi
  36. }
  37.  
  38. # Prepare data
  39. function prepare_data() {
  40.     # Clear previous run results
  41.     for i in logs results; do
  42.         do2 aws s3 rm "$bucket/$i/" —recursive
  43.     done
  44.  
  45.     # Upload log files
  46.     for i in $(ls "$resdir"/$filesPattern); do
  47.         do2 aws s3 cp "$i" "$bucket/data/"$(basename "$i" "$resdir")
  48.     done
  49.  
  50.     # Upload Hadoop job
  51.     do2 rm -f "$jarRenamePath"
  52.     do2 cp "$jarPath" "$jarRenamePath"
  53.     do2 aws s3 cp "$jarRenamePath" "$bucket/job/$jarRename"
  54. }
  55.  
  56. # Create EMR Hadoop cluster
  57. function create_cluster() {
  58.     clusterId=$(aws emr list-clusters —active | grep "$clusterName" | awk '{print $2}')
  59.     if [ -z "$clusterId" ]; then
  60.         echo "Creating $clusterName cluster. Doesn't exists yet."
  61.         # Start Hadoop cluster
  62.         do2 aws emr \
  63.         create-cluster \
  64.         —no-auto-terminate \
  65.         —region "$region" \
  66.         —instance-count $instanceNumber \
  67.         —instance-type "$instanceType" \
  68.         —name "$clusterName" \
  69.         —ami-version 3.0.4 \
  70.         —hadoop-version 2.2.0 \
  71.         —log-uri "$bucket/logs"
  72.  
  73.         clusterId=$(aws emr list-clusters —active | grep "$clusterName" | awk '{print $2}')
  74.         if [ -z "$clusterId" ]; then
  75.             echo "ERROR Cannot create $clusterName cluster: Cannot get cluster ID. Try 'aws emr —list-clusters'."
  76.             exit 1
  77.         fi
  78.     else
  79.         echo "Do not create $clusterName cluster as it's already exists."
  80.     fi
  81. }
  82.  
  83. # Run Hadoop job
  84. function run_app() {
  85.     echo "Running job $jobName for $clusterName [$clusterId] cluster…"
  86.  
  87.     if [ -z "$clusterId" ]; then
  88.         echo "ERROR No cluster ID found [$clusterId]. Try 'aws emr list-clusters'."
  89.         exit 1
  90.     fi
  91.  
  92.     # Check for JAR existance
  93.     jarStepId=$(aws emr list-steps —cluster-id "$clusterId" $activeStepStates | grep "$jobName" | awk '{print $3}')
  94.     if [ ! -z $jarStepId ]; then
  95.         echo "WARN Not going to add Custom JAR step $jobName because it's already exists and is in active state. Try 'aws emr list-steps —cluster-id $clusterId $activeStepStates'."
  96.         return
  97.     fi
  98.  
  99.     # Run uploaded job in Hadoop cluster
  100.     do2 aws emr \
  101.     add-steps \
  102.     —cluster-id "$clusterId" \
  103.     —steps \
  104.     Type=CUSTOM_JAR,Name=$jobName,ActionOnFailure=CANCEL_AND_WAIT,Jar=$bucket/job/$jarRename,Args=$bucket/data,$bucket/results
  105.  
  106.     jarStepId=$(aws emr list-steps —cluster-id "$clusterId" $activeStepStates | grep "$jobName" | awk '{print $3}')
  107.     if [ -z $jarStepId ]; then
  108.         echo "ERROR Cannot find/create Custom JAR step $jobName. Try 'aws emr list-steps —cluster-id $clusterId $activeStepStates'."
  109.         exit 2
  110.     fi
  111.  
  112.     echo "Custom JAR $jobName with step ID [$jarStepId] on cluster $clusterName [$clusterId] has been successfully created."
  113. }
  114.  
  115. # Business logic
  116. if [ $1 = "-d" ]; then
  117.     echo "Preparing data for the job(s)…"
  118.     prepare_data
  119. else
  120.     echo "Do not prepare data for the job(s)."
  121. fi
  122.  
  123. create_cluster
  124. run_app

Нет комметрариев