cloud.elasticity.elastman.Sensor.java Source code

Java tutorial

Introduction

Here is the source code for cloud.elasticity.elastman.Sensor.java

Source

/*
 * This file is part of the ElastMan Elasticity Manager
 * 
 * Copyright (C) 2013 Ahmad Al-Shishtawy
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cloud.elasticity.elastman;

import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.net.Socket;
import java.net.SocketException;
import java.util.ArrayList;

import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * 
 * 
 * @author Ahmad Al-Shishtawy <ahmadas@kth.se>
 *
 */
public class Sensor extends Thread {

    static Logger log = LoggerFactory.getLogger(Sensor.class);

    boolean controlMode;

    ArrayList<MyIO> clientSockets;
    ArrayList<MyIO> deadSockets;

    SummaryStatistics read_op, read_mean, read_stddiv, read_min, read_p95, read_p99, read_max;
    SummaryStatistics mixed_op, mixed_mean, mixed_stddiv, mixed_min, mixed_p95, mixed_p99, mixed_max;
    SummaryStatistics total_op;
    public boolean identifying = true;
    private int warmup = 2; // iterations to wait before controller
    //   private long rebalancing = 0;
    private Actuator actuator = null;
    private double ffThroughputDelta; // larger delta will indicate a spike thus we use FF

    final long period; // in seconds
    long timeStep = 0;
    //   SimpleBinaryClassifier ff = new SimpleBinaryClassifier(1800, 200, 0, 1000); // FF model
    SimpleBinaryClassifier ff; // FF model

    private long nextFF = 0;

    private double inOp;
    private double outOp;

    private PIDController pid;
    private Filter filter;

    private Cluster cluster;

    public Sensor(int sleepSec, boolean controlMode, Cluster cluster) { // if controlMode=false will do identification
        clientSockets = new ArrayList<MyIO>();
        deadSockets = new ArrayList<MyIO>();

        read_op = new SummaryStatistics();
        read_mean = new SummaryStatistics();
        read_stddiv = new SummaryStatistics();
        read_min = new SummaryStatistics();
        read_p95 = new SummaryStatistics();
        read_p99 = new SummaryStatistics();
        read_max = new SummaryStatistics();

        mixed_op = new SummaryStatistics();
        mixed_mean = new SummaryStatistics();
        mixed_stddiv = new SummaryStatistics();
        mixed_min = new SummaryStatistics();
        mixed_p95 = new SummaryStatistics();
        mixed_p99 = new SummaryStatistics();
        mixed_max = new SummaryStatistics();

        total_op = new SummaryStatistics();

        period = sleepSec;
        this.controlMode = controlMode;
        this.cluster = cluster;

        // TODO: change normalized setPoint to real setPoint
        pid = new PIDController(Props.control_inOp, Props.control_outOp, 0, Props.control_kp, Props.control_ki,
                Props.control_kd);

        this.inOp = Props.control_inOp;
        this.outOp = Props.control_outOp;

        actuator = new Actuator(cluster, Props.voldMin, Props.voldMax, Props.voldDeltaMax, Props.createVMs);

        filter = new Filter(Props.filter_alpha);
        warmup = Props.control_warmup;
        dead = Props.control_dead;
        ffThroughputDelta = Props.control_ff_throughputDelta;

        ff = new SimpleBinaryClassifier(Props.control_ffr1, Props.control_ffw1, Props.control_ffr2,
                Props.control_ffw2);

    }

    public synchronized void addClient(Socket cs) {
        try {
            cs.setSoTimeout(40000); // wait max 40 seconds;
        } catch (SocketException e) {
            log.error("Error: Can't set socket time out!");
            log.error(e.getMessage());
        }
        clientSockets.add(new MyIO(cs));
        log.info("Client added: {}", cs.getInetAddress());
    }

    public synchronized void remLastClient() {
        if (clientSockets.size() == 0) {
            log.error("No More Clients!!");
            return;
        }

        MyIO cs = clientSockets.get(clientSockets.size() - 1);
        log.info("Removing Client: " + cs.s.getInetAddress());
        try {
            cs.s.close();
        } catch (IOException e) {
            log.error(e.getMessage());
        }
        clientSockets.remove(clientSockets.size() - 1);

    }

    @Override
    public void run() {

        String filename;
        if (controlMode) {
            filename = "control.dat";
        } else {
            filename = "ident.dat";
        }

        // Open the data file
        FileWriter fstream;
        BufferedWriter out = null;
        try {
            fstream = new FileWriter(filename);
            out = new BufferedWriter(fstream);
        } catch (IOException e1) {
            // TODO Auto-generated catch block
            e1.printStackTrace();
        }

        // Write a header  
        try {
            out.write("T \t" + "PeriodSec \t" + "Clients \t" + "Servers \t" + "TotalOps \t" + "Throughput \t"
                    + "ThroPerServ \t" +

                    "tpsR \t" + "meanR \t" + "stddivR \t" + "minR \t" + "p95R \t" + "p99R \t" + "fp99R \t"
                    + "maxR \t" +

                    "tpSM \t" + "meanM \t" + "stddivM \t" + "minM \t" + "p95M \t" + "p99M \t" + "maxM \t" + "ntp \t"
                    + "nfp99 \t" + "opID \t" + "log\n");

            out.flush();
        } catch (IOException e) {
            log.error(e.getMessage());
        }

        boolean firstInput = true;
        double lastTps = 0;
        boolean bigTPChange = false;

        while (identifying) {
            if (warmup > 0) {
                warmup--;
            } else if (warmup == 0) {
                warmup--;
                //            lastTimeSec = System.nanoTime() / 1000000000; // for the controller  // NOT USED 
            }

            long start = System.nanoTime();

            //// sleep for sampling time then collect data
            try {
                Thread.sleep(period * 1000);
            } catch (InterruptedException e) {
                log.error(e.getMessage());
            }
            timeStep++;
            // loop and fetch data from each YCSB client
            updateMonitoringData();
            long end = System.nanoTime();
            long pInSec = (end - start) / 1000000000; // sampling period in seconds

            final double throughput = total_op.getSum() / pInSec;

            // Throughput per server
            final double tps = throughput / cluster.getActiveVoldVMsCount();
            // Read Throughput per server
            final double rtps = read_op.getSum() / pInSec / cluster.getActiveVoldVMsCount();
            // Write Throughput per server
            final double mtps = mixed_op.getSum() / pInSec / cluster.getActiveVoldVMsCount();

            // calculate a smoothed value of the p99 as well
            filter.step(read_p99.getMean());

            if (firstInput) {
                lastTps = tps;
                firstInput = false;
            }

            log.debug("Summary: " + timeStep + " \t" + pInSec + " \t" + (clientSockets.size() - deadSockets.size())
                    + " \t" + cluster.getActiveVoldVMsCount() + " \t" + total_op.getSum() + " \t"
                    + (long) (throughput) + " \t" + (long) (throughput / cluster.getActiveVoldVMsCount()) + " \t"
                    + rtps + " \t" + (long) read_mean.getMean() + " \t" + (long) read_stddiv.getMean() + " \t"
                    + (long) read_min.getMean() + " \t" + (long) read_p95.getMean() + " \t"
                    + (long) read_p99.getMean() + " \t" + (long) filter.getValue() + " \t"
                    + (long) read_max.getMean() + " \t" + mtps + " \t" + (long) mixed_mean.getMean() + " \t"
                    + (long) mixed_stddiv.getMean() + " \t" + (long) mixed_min.getMean() + " \t"
                    + (long) mixed_p95.getMean() + " \t" + (long) mixed_p99.getMean() + " \t"
                    + (long) mixed_max.getMean() + " \t"
                    + (long) ((throughput / cluster.getActiveVoldVMsCount()) - outOp) + " \t"
                    + (long) (filter.getValue() - inOp));

            try {
                out.write("" + timeStep + " \t" + pInSec + " \t" + (clientSockets.size() - deadSockets.size())
                        + " \t" + cluster.getActiveVoldVMsCount() + " \t" + total_op.getSum() + " \t"
                        + (long) (throughput) + " \t" + (long) (throughput / cluster.getActiveVoldVMsCount())
                        + " \t" + (long) rtps + " \t" + (long) read_mean.getMean() + " \t"
                        + (long) read_stddiv.getMean() + " \t" + (long) read_min.getMean() + " \t"
                        + (long) read_p95.getMean() + " \t" + (long) read_p99.getMean() + " \t"
                        + (long) filter.getValue() + " \t" + (long) read_max.getMean() + " \t" + (long) mtps + " \t"
                        + (long) mixed_mean.getMean() + " \t" + (long) mixed_stddiv.getMean() + " \t"
                        + (long) mixed_min.getMean() + " \t" + (long) mixed_p95.getMean() + " \t"
                        + (long) mixed_p99.getMean() + " \t" + (long) mixed_max.getMean() + " \t"
                        + (long) ((throughput / cluster.getActiveVoldVMsCount()) - outOp) + " \t"
                        + (long) (filter.getValue() - inOp) + " \t");
                if (!controlMode) {
                    out.write("-1 \tIdent\n");
                    out.flush();
                } // else -> later append control log and flush

            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            System.out.println("======================");

            //clear stats
            read_op.clear();
            read_mean.clear();
            read_stddiv.clear();
            read_min.clear();
            read_p95.clear();
            read_p99.clear();
            read_max.clear();

            mixed_op.clear();
            mixed_mean.clear();
            mixed_stddiv.clear();
            mixed_min.clear();
            mixed_p95.clear();
            mixed_p99.clear();
            mixed_max.clear();

            total_op.clear();

            // remove dead clients
            if (deadSockets.size() > 0) {
                clientSockets.removeAll(deadSockets);
                deadSockets.clear();
                System.out.println("Removind Dead Sockets!");
            }
            if (!controlMode && clientSockets.size() == 0) {
                identifying = false; // finished the identification
                System.out.println("Identification completed");
            }
            if (warmup == 0) { // next time the controller will be started!! so initialize;
                pid.reset();
                filter.reset(); // to remove any noise in startup
            }

            if (controlMode && warmup >= 0) {
                try {
                    out.write("0 \tWarmup\n");
                    out.flush();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
            } else if (controlMode && warmup < 0) {

                if (!isRebalancing() && Math.abs(lastTps - tps) > ffThroughputDelta) {
                    bigTPChange = true;
                    System.out.println("Big Throughput Change: " + (lastTps - tps));
                }

                // 0 - check
                cluster.updateVMs();
                if (actuator.isCreateVMs() && cluster.getActiveVoldVMsCount() != cluster.getVoldVMsCount()) { // then there is something wrong (e.g., didn't finish removing nodes)
                    System.out.println("Vold Count Error!!"); // Should never happen unless someone adds VoldVMs externally
                    pid.reset();
                    filter.reset();
                    try {
                        out.write("3 \tRebalanceNotComplete!\n");
                        out.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                // TODO: 1 - Error is very large for first time then do nothing
                //            else if (firstLargeValue && !(Math.abs(lastTps-tps)>ffThroughputDelta))  {   // this is probably noise, Ignore it
                //               // do nothing
                ////               pidReset(filter);
                //               System.out.println("Controller: Very large value for first time! Do nothing!");
                //               try {
                //                  out.write("4 \tFirstLarge\n");
                //                  out.flush();
                //               } catch (IOException e) {
                //                  // TODO Auto-generated catch block
                //                  e.printStackTrace();
                //               }
                //            }
                // 2 - if in dead zone then do nothing
                else if (inOp - 2 * dead <= filter.getValue() && filter.getValue() <= inOp + dead) {
                    System.out.println("Controller: in dead zone! Do nothing!");
                    pid.reset();
                    filter.reset();
                    try {
                        out.write("0 \tDeadZone\n");
                        out.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                }
                // 3 - Rebalancing
                else if (isRebalancing()) {
                    System.out.println("Controller: Rebalancing! Do nothing!");
                    //               pidReset(filter);
                    try {
                        out.write("3 \tRebalancing\n");
                        out.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                    // FIXME: now I give rebalance 2 period to finish.
                    // Should check the real status and update rebalance accordingly
                }
                // 3.5 - if current latency is less than desired and min servers if 3 then do nothing.
                else if (cluster.getActiveVoldVMsCount() <= 3 && filter.getValue() <= inOp + dead) { // should never be < 3
                    System.out.println(
                            "Controller: Having min=3 Vold VMs and the response time is OK! Not running controller");
                    pid.reset();
                    filter.reset();
                    try {
                        out.write("0 \tMinVMs\n");
                        out.flush();
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                    }
                } // 4 - 
                else {
                    boolean usePID = true, ffFail = false;

                    if (timeStep > nextFF && (bigTPChange || (filter.getValue() > (inOp + (inOp * 0.5))
                            || filter.getValue() < (inOp - (inOp * 0.5)))) /*Big change in load use ff*/) {//(filter>(inOp + (inOp*0.5)) || filter<(inOp - (inOp*0.5)))) {
                        usePID = false;
                        bigTPChange = false;
                        //   use binary classifier
                        nextFF = timeStep + 4; // TODO: Fix nextFF
                        System.out.println("Controller: Using FF");
                        double output = ff.classify(rtps, mtps);
                        // calculate number of servers needed to handle current throughput
                        double n = (throughput / output) - cluster.getActiveVoldVMsCount();

                        // TODO: Now I get ceil. Check if there is a better solution
                        int nn = 0;
                        //                  if(n>0) {
                        nn = (int) Math.ceil(n);
                        //                  } else {
                        //                     nn=(int)Math.floor(n);
                        //                  }

                        //int nn = (int)Math.round(n);

                        System.out.println(
                                "Controller: FF output = " + output + " that is " + n + " -> " + nn + " servers");

                        if ((filter.getValue() > (inOp + inOp * 0.5) && nn < 3)
                                || (filter.getValue() < (inOp - inOp * 0.5) && nn > -3)) {//Math.abs(nn)<3) {
                            // Very large error & add/rem few VMs! Must be outside of op region
                            // Fall back to FB
                            usePID = true;
                            ffFail = true;
                        } else {

                            try {
                                out.write("2 \tFF#" + output + "#" + n + "#" + nn + "\n");
                                out.flush();
                                pid.reset();
                                filter.reset();
                            } catch (IOException e) {
                                // TODO Auto-generated catch block
                                e.printStackTrace();
                            }
                            if (nn > 0 || (nn < 0 && cluster.getActiveVoldVMsCount() > 3)) {
                                actuator.scheduleRebalance(nn, false);

                            }
                        }
                    }
                    if (usePID) { // 4 - use PID
                        System.out.println("Controller: Using FB");
                        double delta = pid.step(filter.getValue()); // pid gives throughput per server

                        double output = tps + delta; // this is the new throughput per server
                        if (output < 50) {
                            output = 50;
                            System.err.println("WARNING!!! pid gave negative/small output!!");
                        }

                        // calculate number of servers needed to handle new throughput
                        double n = (throughput / output) - cluster.getActiveVoldVMsCount();

                        // TODO: Now I ceil. Check if there is a better solution
                        int nn = 0;
                        //                  if(n>0) {
                        nn = (int) Math.ceil(n);
                        //                  } else {
                        //                     nn=(int)Math.floor(n);
                        //                  }// int nn = (int)Math.round(n);

                        System.out.println(
                                "Controller: PID output = " + output + " that is " + n + " -> " + nn + " servers");

                        try {
                            out.write("1 \tFB#" + output + "#" + n + "#" + nn);
                            if (ffFail) {
                                out.write("#FFFail");
                            }
                            out.write("\n");
                            out.flush();
                        } catch (IOException e) {
                            // TODO Auto-generated catch block
                            e.printStackTrace();
                        }
                        if (nn > 0 || (nn < 0 && cluster.getActiveVoldVMsCount() > 3)) {
                            actuator.scheduleRebalance(nn, true);
                        }
                    }
                }
            }

            lastTps = tps;
        }
        try {
            out.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    private synchronized void updateMonitoringData() { // sync not to allow adding clients while in the loop
        for (MyIO io : clientSockets) {
            // Read operations
            long NR = 0;
            double meanR = 0, stddivR = 0, minR = 0, p95R = 0, p99R = 0, maxR = 0;
            // Mixed operations (read/write transactions) 
            long NM = 0;
            double meanM = 0, stddivM = 0, minM = 0, p95M = 0, p99M = 0, maxM = 0;

            try {
                io.out.writeInt(0); //ping the client to send data. client is waiting for any int from us
                NR = io.in.readLong();
                meanR = io.in.readDouble();
                stddivR = io.in.readDouble();
                minR = io.in.readDouble();
                p95R = io.in.readDouble();
                p99R = io.in.readDouble();
                maxR = io.in.readDouble();

                NM = io.in.readLong();
                meanM = io.in.readDouble();
                stddivM = io.in.readDouble();
                minM = io.in.readDouble();
                p95M = io.in.readDouble();
                p99M = io.in.readDouble();
                maxM = io.in.readDouble();

                if (NR == 0) { // This is a new client that did not start yet! Don't add this time
                    return;
                }

                // if everything went fine with no exceptions then add to stats, otherwise the socket will be removed after the loop
                read_op.addValue(NR);
                read_mean.addValue(meanR);
                read_stddiv.addValue(stddivR);
                read_min.addValue(minR);
                read_p95.addValue(p95R);
                read_p99.addValue(p99R);
                read_max.addValue(maxR);

                mixed_op.addValue(NM);
                mixed_mean.addValue(meanM);
                mixed_stddiv.addValue(stddivM);
                mixed_min.addValue(minM);
                mixed_p95.addValue(p95M);
                mixed_p99.addValue(p99M);
                mixed_max.addValue(maxM);

                total_op.addValue(NR + NM);

                log.debug((NR + NM) + " \t" + NR + " \t" + meanR + " \t" + stddivR + " \t" + minR + " \t" + p95R
                        + " \t" + p99R + " \t" + maxR + " \t" + NM + " \t" + meanM + " \t" + stddivM + " \t" + minM
                        + " \t" + p95M + " \t" + p99M + " \t" + maxM);
            } catch (IOException e) {

                // TODO: I'm not removing dead clients since the workload generator removes them in remLastClient()
                //System.out.println("Error: Dead Client NOT added to deadlist for removal.");   // Should not happen in normal system
                //deadSockets.add(io);   // to remove dead clients later. The workload generator just stops the VM running the client

                log.error(e.getMessage());
            }

        }

    }

    //   private long lastTimeSec = 0;
    //   private double lastInput = 0;
    //   private double lastError=0; 
    //   private double outMin=-4000, outMax=4000;    // TODO: check values of outMax and inMax

    //   private double mySetpoint = 0; // in nano // TODO: set the setpoint?
    //   private double kp, ki, kd, iTerm=0; // TODO: set the real gains

    private double dead;

    class MyIO {
        public DataInputStream in = null;
        public DataOutputStream out = null;
        public Socket s = null;

        public MyIO(Socket s) {
            this.s = s;
            try {
                in = new DataInputStream(s.getInputStream());
                out = new DataOutputStream(s.getOutputStream());
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }

    private boolean isRebalancing() {
        if (actuator == null) {
            return false;
        }

        return actuator.isRebalancing();
    }

    //   // delete me: Testing pid! A quick hack to test the pid controller
    //   public static void main(String[] args) {
    //      
    //      FileInputStream propFile;
    //      try {
    //         propFile = new FileInputStream( "control.prop");
    //         ElastManServer.controlProps = new Properties(System.getProperties());
    //         ElastManServer.controlProps.load(propFile);
    //      } catch (FileNotFoundException e1) {
    //         System.err.println("Control properties not found!! using defaults");
    //         e1.printStackTrace();
    //      } catch (IOException e) {
    //         System.err.println("Control properties IO error!! using defaults");
    //         e.printStackTrace();
    //      }
    //      
    //      
    //      Scanner scanner = new Scanner(System.in);
    //      Sensor c = new Sensor(0, true);
    //      
    //      System.out.println("Testing PID: " +  c.kp + ", " + c.ki + ", " + c.kd);
    //      System.out.print("Enter First Input: ");
    //      
    //      Double in = scanner.nextDouble();
    ////      c.lastInput = in - c.inOp;
    //      c.pidReset(in);
    //      
    //      while(true) {
    //         Double out = c.pid(in);
    //         System.out.println("Controller says: " + out);
    //         System.out.print("Enter Next Input: ");
    //         in = scanner.nextDouble();
    //      }
    //   }

}