/*
 * Decompiled with CFR 0.152.
 */
package com.linkedin.tony;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.linkedin.tony.Constants;
import com.linkedin.tony.TaskScheduler;
import com.linkedin.tony.TonyConfigurationKeys;
import com.linkedin.tony.TonyPolicyProvider;
import com.linkedin.tony.events.ApplicationFinished;
import com.linkedin.tony.events.ApplicationInited;
import com.linkedin.tony.events.Event;
import com.linkedin.tony.events.EventHandler;
import com.linkedin.tony.events.EventType;
import com.linkedin.tony.events.Metric;
import com.linkedin.tony.events.TaskFinished;
import com.linkedin.tony.events.TaskStarted;
import com.linkedin.tony.models.JobMetadata;
import com.linkedin.tony.rpc.ApplicationRpc;
import com.linkedin.tony.rpc.ApplicationRpcServer;
import com.linkedin.tony.rpc.MetricsRpc;
import com.linkedin.tony.rpc.TaskInfo;
import com.linkedin.tony.rpc.impl.MetricsRpcServer;
import com.linkedin.tony.rpc.impl.TaskStatus;
import com.linkedin.tony.tensorflow.TonySession;
import com.linkedin.tony.util.Utils;
import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.lang.reflect.Method;
import java.net.ServerSocket;
import java.net.SocketException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.stream.Collectors;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.client.api.AMRMClient;
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync;
import org.apache.hadoop.yarn.client.api.async.NMClientAsync;
import org.apache.hadoop.yarn.client.api.async.impl.NMClientAsyncImpl;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.client.ClientToAMTokenIdentifier;
import org.apache.hadoop.yarn.security.client.ClientToAMTokenSecretManager;
import org.apache.hadoop.yarn.util.AbstractLivelinessMonitor;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.UTCClock;

public class ApplicationMaster {
    private static final Log LOG = LogFactory.getLog(ApplicationMaster.class);
    private String appIdString;
    private FileSystem resourceFs;
    private FileSystem historyFs;
    private String tonyHistoryFolder;
    private Path jobDir = null;
    private String user = null;
    private BlockingQueue<Event> eventQueue = new LinkedBlockingQueue<Event>();
    private int amRetryCount;
    private long workerTimeout;
    private String hdfsClasspath;
    private int amPort;
    private ByteBuffer allTokens;
    private Map<String, LocalResource> localResources = new ConcurrentHashMap<String, LocalResource>();
    private Configuration tonyConf = new Configuration(false);
    private ContainerId containerId;
    private Map<String, String> containerEnv = new ConcurrentHashMap<String, String>();
    private Map<String, String> shellEnv = new HashMap<String, String>();
    private Map<Integer, List<Container>> sessionContainersMap = new ConcurrentHashMap<Integer, List<Container>>();
    private Map<String, Map<String, LocalResource>> jobTypeToContainerResources = new HashMap<String, Map<String, LocalResource>>();
    private NMClientAsync nmClientAsync;
    private ExecutorService containersLauncherThreadPool = Executors.newCachedThreadPool();
    private AMRMClientAsync<AMRMClient.ContainerRequest> amRMClient;
    private TonySession session = new TonySession();
    private TonySession.Builder sessionBuilder;
    private Configuration yarnConf;
    private Configuration hdfsConf = new Configuration(false);
    private ApplicationRpcServer applicationRpcServer;
    private boolean secureMode;
    private boolean singleNode;
    private boolean preprocessFinished = false;
    private int preprocessExitCode = 0;
    private String proxyUrl;
    private boolean enablePreprocessing = false;
    private boolean untrackedTaskFailed = false;
    private long appTimeout;
    private volatile boolean clientSignalToStop = false;
    private MetricsRpcServer metricsRpcServer;
    private EventHandler eventHandler;
    private final AbstractLivelinessMonitor<TonySession.TonyTask> hbMonitor;
    private int hbInterval;
    private int maxConsecutiveHBMiss;
    private volatile boolean taskHasMissesHB = false;
    private Thread mainThread;
    private TaskScheduler scheduler;

    private ApplicationMaster() {
        this.yarnConf = new Configuration(false);
        this.hbMonitor = new AbstractLivelinessMonitor<TonySession.TonyTask>("Tony Task liveliness Monitor", (Clock)new UTCClock()){

            protected void expire(TonySession.TonyTask task) {
                ApplicationMaster.this.onTaskDeemedDead(task);
            }

            protected void serviceStart() throws Exception {
                int monitorInterval = ApplicationMaster.this.hbInterval * 3;
                for (Method m : ((Object)((Object)this)).getClass().getDeclaredMethods()) {
                    if (!m.getName().equals("setMonitorInterval")) continue;
                    m.invoke((Object)this, monitorInterval);
                    break;
                }
                this.setExpireInterval(ApplicationMaster.this.hbInterval * Math.max(3, ApplicationMaster.this.maxConsecutiveHBMiss));
                super.serviceStart();
            }
        };
    }

    private boolean init(String[] args) {
        CommandLine cliParser;
        this.tonyConf.addResource(new Path("tony-final.xml"));
        Utils.initYarnConf(this.yarnConf);
        Utils.initHdfsConf(this.hdfsConf);
        try {
            this.resourceFs = FileSystem.get((Configuration)this.hdfsConf);
        }
        catch (IOException e) {
            LOG.error((Object)"Failed to create FileSystem object", (Throwable)e);
            return false;
        }
        this.hbMonitor.init(this.tonyConf);
        Options opts = Utils.getCommonOptions();
        try {
            cliParser = new GnuParser().parse(opts, args);
        }
        catch (ParseException e) {
            LOG.error((Object)"Got exception while parsing options", (Throwable)e);
            return false;
        }
        Map<String, String> envs = System.getenv();
        String[] shellEnvs = this.tonyConf.getStrings("tony.execution.envs");
        this.shellEnv = Utils.parseKeyValue(shellEnvs);
        String[] containerEnvs = this.tonyConf.getStrings("tony.containers.envs");
        this.containerEnv.putAll(Utils.parseKeyValue(containerEnvs));
        this.appTimeout = this.tonyConf.getInt("tony.application.timeout", 0);
        this.workerTimeout = this.tonyConf.getInt("tony.worker.timeout", 0);
        this.hdfsClasspath = cliParser.getOptionValue("hdfs_classpath");
        this.amRetryCount = this.tonyConf.getInt("tony.am.retry-count", 0);
        this.singleNode = Utils.getNumTotalTasks(this.tonyConf) == 0;
        this.secureMode = this.tonyConf.getBoolean("tony.application.security.enabled", true);
        this.enablePreprocessing = this.tonyConf.getBoolean("tony.application.enable-preprocess", false);
        this.containerId = ContainerId.fromString((String)envs.get(ApplicationConstants.Environment.CONTAINER_ID.name()));
        this.appIdString = this.containerId.getApplicationAttemptId().getApplicationId().toString();
        this.hbInterval = this.tonyConf.getInt("tony.task.heartbeat-interval-ms", 1000);
        this.maxConsecutiveHBMiss = this.tonyConf.getInt("tony.task.max-missed-heartbeats", 25);
        this.tonyHistoryFolder = this.tonyConf.get("tony.history.location", "/path/to/tony-history");
        try {
            this.historyFs = new Path(this.tonyHistoryFolder).getFileSystem(this.hdfsConf);
        }
        catch (IOException e) {
            LOG.error((Object)"Failed to create history FileSystem object", (Throwable)e);
            return false;
        }
        this.eventHandler = new EventHandler(this.historyFs, this.eventQueue);
        try {
            this.user = UserGroupInformation.getCurrentUser().getShortUserName();
        }
        catch (IOException e) {
            LOG.warn((Object)"Failed to fetch users", (Throwable)e);
        }
        return true;
    }

    private void buildTonySession() {
        TonySession.Builder builder;
        this.sessionBuilder = builder = new TonySession.Builder().setTonyConf(this.tonyConf).setTaskExecutorJVMArgs(this.tonyConf.get("tony.task.executor.jvm.opts", "-Xmx1536m"));
        this.session = builder.build();
    }

    public static void main(String[] args) throws IOException {
        ApplicationMaster am = new ApplicationMaster();
        boolean succeeded = am.run(args);
        if (succeeded) {
            LOG.info((Object)"Application Master completed successfully. Exiting");
            System.exit(0);
        } else {
            LOG.info((Object)"Application Master failed. Exiting");
            System.exit(-1);
        }
    }

    private boolean run(String[] args) throws IOException {
        boolean succeeded;
        long started = System.currentTimeMillis();
        if (!this.init(args)) {
            return false;
        }
        if (!this.prepare()) {
            return false;
        }
        this.mainThread = Thread.currentThread();
        JobMetadata.Builder metadataBuilder = new JobMetadata.Builder().setId(this.appIdString).setConf(this.yarnConf).setStarted(started).setUser(this.user);
        JobMetadata metadata = metadataBuilder.build();
        this.eventHandler.setUpThread(this.jobDir, metadata);
        this.eventHandler.start();
        do {
            String shouldCrash;
            if ((shouldCrash = System.getenv("TEST_AM_CRASH")) != null && shouldCrash.equals("true")) {
                LOG.fatal((Object)"Error running ApplicationMaster !!");
                return false;
            }
            try {
                this.eventHandler.emitEvent(new Event(EventType.APPLICATION_INITED, (Object)new ApplicationInited(this.appIdString, Utils.getNumTotalTasks(this.tonyConf), Utils.getCurrentHostName(), this.containerId.toString()), System.currentTimeMillis()));
                this.start();
            }
            catch (Exception e) {
                LOG.error((Object)"Exception when we're starting TonyAM", (Throwable)e);
                return false;
            }
            succeeded = this.monitor();
            if (succeeded || this.amRetryCount == 0) {
                LOG.info((Object)("Result: " + succeeded + ", retry count: " + this.amRetryCount));
                break;
            }
            this.reset();
            LOG.info((Object)("Retrying, remaining retry count" + this.amRetryCount));
            --this.amRetryCount;
        } while (!this.singleNode);
        this.stop();
        long completed = System.currentTimeMillis();
        this.printTaskUrls();
        this.eventHandler.emitEvent(new Event(EventType.APPLICATION_FINISHED, (Object)new ApplicationFinished(this.appIdString, this.session.getNumCompletedTasks(), this.session.getNumFailedTasks(), new ArrayList<Metric>()), System.currentTimeMillis()));
        metadata = metadataBuilder.setCompleted(completed).setStatus(succeeded ? "SUCCEEDED" : "FAILED").build();
        this.eventHandler.stop(this.jobDir, metadata);
        return succeeded;
    }

    private boolean prepare() throws IOException {
        String amHostPort;
        RegisterApplicationMasterResponse response;
        String hostNameOrIpFromTokenConf;
        LOG.info((Object)"Preparing application master..");
        NMCallbackHandler containerListener = this.createNMCallbackHandler();
        this.nmClientAsync = new NMClientAsyncImpl((NMClientAsync.CallbackHandler)containerListener);
        this.nmClientAsync.init(this.yarnConf);
        this.nmClientAsync.start();
        String amHostname = Utils.getCurrentHostName();
        this.applicationRpcServer = this.setupRPCService(amHostname);
        this.containerEnv.put("AM_HOST", amHostname);
        this.containerEnv.put("AM_PORT", Integer.toString(this.amPort));
        ServerSocket rpcSocket = new ServerSocket(0);
        int metricsRpcPort = rpcSocket.getLocalPort();
        rpcSocket.close();
        this.metricsRpcServer = new MetricsRpcServer();
        RPC.Builder metricsServerBuilder = new RPC.Builder(this.yarnConf).setProtocol(MetricsRpc.class).setInstance((Object)this.metricsRpcServer).setPort(metricsRpcPort);
        this.containerEnv.put("METRICS_RPC_PORT", Integer.toString(metricsRpcPort));
        RMCallbackHandler allocListener = new RMCallbackHandler();
        this.amRMClient = AMRMClientAsync.createAMRMClientAsync((int)1000, (AMRMClientAsync.CallbackHandler)allocListener);
        this.amRMClient.init(this.yarnConf);
        this.amRMClient.start();
        try {
            hostNameOrIpFromTokenConf = Utils.getHostNameOrIpFromTokenConf(this.yarnConf);
            response = this.amRMClient.registerApplicationMaster(amHostname, this.amPort, null);
            amHostPort = hostNameOrIpFromTokenConf + ":" + this.amPort;
        }
        catch (SocketException | YarnException e) {
            LOG.error((Object)"Exception while preparing AM", e);
            return false;
        }
        if (this.secureMode) {
            ApplicationAttemptId appAttemptID = this.containerId.getApplicationAttemptId();
            ClientToAMTokenIdentifier identifier = new ClientToAMTokenIdentifier(appAttemptID, this.user);
            byte[] secret = response.getClientToAMTokenMasterKey().array();
            ClientToAMTokenSecretManager secretManager = new ClientToAMTokenSecretManager(appAttemptID, secret);
            this.applicationRpcServer.setSecretManager(secretManager);
            metricsServerBuilder.setSecretManager((SecretManager)secretManager);
            Token tensorflowClusterToken = new Token((TokenIdentifier)identifier, (SecretManager)secretManager);
            tensorflowClusterToken.setService(new Text(amHostPort));
            UserGroupInformation.getCurrentUser().addToken(tensorflowClusterToken);
            Token metricsToken = new Token((TokenIdentifier)identifier, (SecretManager)secretManager);
            metricsToken.setService(new Text(hostNameOrIpFromTokenConf + ":" + metricsRpcPort));
            UserGroupInformation.getCurrentUser().addToken(metricsToken);
            this.setupContainerCredentials();
        }
        try {
            this.setupJobDir(this.historyFs, this.tonyHistoryFolder, this.appIdString);
            this.writeConfigFile(this.historyFs, this.jobDir);
        }
        catch (IOException e) {
            LOG.error((Object)"Error while setting up history files", (Throwable)e);
            return false;
        }
        LOG.info((Object)("Starting application RPC server at: " + amHostPort));
        this.applicationRpcServer.start();
        LOG.info((Object)("Starting metrics RPC server at: " + amHostname + ":" + metricsRpcPort));
        RPC.Server metricsServer = metricsServerBuilder.build();
        if (this.yarnConf.getBoolean("hadoop.security.authorization", false)) {
            metricsServer.refreshServiceAclWithLoadedConfiguration(this.yarnConf, (PolicyProvider)new TonyPolicyProvider());
        }
        metricsServer.start();
        this.hbMonitor.start();
        return true;
    }

    private void setupJobDir(FileSystem fs, String histFolder, String appId) {
        Path interm = new Path(histFolder, "intermediate");
        try {
            if (!fs.exists(interm)) {
                LOG.error((Object)("Intermediate directory doesn't exist [" + interm.toString() + "]"));
                return;
            }
        }
        catch (IOException e) {
            LOG.error((Object)"Failed to check intermediate directory existence", (Throwable)e);
            return;
        }
        this.jobDir = new Path(interm, appId);
        Utils.createDirIfNotExists(fs, this.jobDir, Constants.PERM770);
    }

    private void writeConfigFile(FileSystem fs, Path jobDir) throws IOException {
        if (jobDir == null) {
            return;
        }
        Path configFile = new Path(jobDir, "tony-final.xml");
        try (FSDataOutputStream out = fs.create(configFile);){
            this.tonyConf.writeXml((OutputStream)out);
        }
        catch (IOException e) {
            throw new IOException("Failed to write config to XML", e);
        }
    }

    private void start() throws Exception {
        int exitCode = 0;
        if (this.enablePreprocessing || this.singleNode) {
            exitCode = this.doPreprocessingJob();
        }
        if (this.singleNode) {
            if (exitCode != 0) {
                LOG.info((Object)("Single node job exits with " + exitCode + ", exiting."));
                this.session.setFinalStatus(FinalApplicationStatus.FAILED, "Single node training failed..");
            } else {
                LOG.info((Object)("Single node job exits with " + exitCode + ", exiting."));
                this.session.setFinalStatus(FinalApplicationStatus.SUCCEEDED, "Single node job succeeded.");
            }
            return;
        }
        if (exitCode != 0) {
            return;
        }
        this.buildTonySession();
        this.session.setResources(this.yarnConf, this.hdfsConf, this.localResources, this.containerEnv, this.hdfsClasspath);
        this.scheduler = new TaskScheduler(this.session, this.amRMClient, this.localResources, this.resourceFs, this.tonyConf, this.jobTypeToContainerResources);
        this.scheduler.scheduleTasks();
    }

    private void reset() {
        List<Container> containers = this.sessionContainersMap.get(this.session.sessionId);
        for (Container container : containers) {
            this.nmClientAsync.stopContainerAsync(container.getId(), container.getNodeId());
            LOG.info((Object)("Stop a task in container: containerId = " + container.getId() + ", containerNode = " + container.getNodeId().getHost()));
        }
        this.untrackedTaskFailed = false;
        this.session = this.sessionBuilder.build();
        this.applicationRpcServer.reset();
        ++this.session.sessionId;
    }

    private boolean monitor() {
        int attempt = 0;
        this.containerEnv.put("ATTEMPT_NUMBER", String.valueOf(attempt));
        long expireTime = this.appTimeout == 0L ? Long.MAX_VALUE : System.currentTimeMillis() + this.appTimeout;
        int counter = 0;
        while (true) {
            ++counter;
            if (System.currentTimeMillis() > expireTime) {
                LOG.error((Object)"Application times out.");
                break;
            }
            if (this.clientSignalToStop) {
                LOG.info((Object)"Client signals AM to exit.");
                break;
            }
            if (this.session.isTrainingFinished()) {
                LOG.info((Object)"Training has finished.");
                break;
            }
            if (this.preprocessExitCode != 0) {
                LOG.error((Object)("Preprocess failed with exit code: " + this.preprocessExitCode));
                break;
            }
            if (this.singleNode && this.preprocessFinished) {
                LOG.info((Object)("Single node training finished with exit code: " + this.preprocessExitCode));
                break;
            }
            if (this.taskHasMissesHB) {
                LOG.error((Object)"Application failed due to missed heartbeats");
                break;
            }
            if (this.untrackedTaskFailed) {
                LOG.error((Object)"One of the untracked tasks has failed with a non-zero exit code.");
                break;
            }
            if (!this.scheduler.dependencyCheckPassed) {
                LOG.info((Object)"Terminating application due to failure to load dependency graph");
                break;
            }
            int numTotalTrackedTasks = this.session.getTotalTrackedTasks();
            if (numTotalTrackedTasks > 0) {
                int numCompletedTrackedTasks = this.session.getNumCompletedTrackedTasks();
                if (numCompletedTrackedTasks == numTotalTrackedTasks) {
                    Utils.printCompletedTrackedTasks(numCompletedTrackedTasks, numTotalTrackedTasks);
                    break;
                }
                if (counter % 20 == 1) {
                    Utils.printCompletedTrackedTasks(numCompletedTrackedTasks, numTotalTrackedTasks);
                }
            }
            try {
                Thread.sleep(5000L);
            }
            catch (InterruptedException e) {
                LOG.error((Object)"Thread interrupted", (Throwable)e);
            }
        }
        this.session.updateSessionStatus();
        FinalApplicationStatus status = this.session.getFinalStatus();
        String appMessage = this.session.getFinalMessage();
        if (status != FinalApplicationStatus.SUCCEEDED) {
            LOG.info((Object)("Tony session failed: " + appMessage));
        }
        return status == FinalApplicationStatus.SUCCEEDED;
    }

    private Set<TonySession.TonyTask> getUnregisteredTasks() {
        return this.session.getTonyTasks().values().stream().flatMap(Arrays::stream).filter(task -> task != null && task.getHost() == null).collect(Collectors.toSet());
    }

    private void stop() {
        this.stopRunningContainers();
        FinalApplicationStatus status = this.session.getFinalStatus();
        String appMessage = this.session.getFinalMessage();
        try {
            this.amRMClient.unregisterApplicationMaster(status, appMessage, null);
        }
        catch (IOException | YarnException e) {
            LOG.error((Object)"Failed to unregister application", e);
        }
        this.nmClientAsync.stop();
        this.amRMClient.stop();
        boolean result = Utils.poll(() -> this.clientSignalToStop, 1, 15);
        if (!result) {
            LOG.warn((Object)"TonyClient didn't signal Tony AM to stop.");
        }
    }

    private void stopRunningContainers() {
        boolean result;
        List<Container> allContainers = this.sessionContainersMap.get(this.session.sessionId);
        if (allContainers != null) {
            for (Container container : allContainers) {
                TonySession.TonyTask task = this.session.getTask(container.getId());
                if (task == null || task.isCompleted()) continue;
                this.nmClientAsync.stopContainerAsync(container.getId(), container.getNodeId());
            }
        }
        if (!(result = Utils.poll(() -> this.session.getNumCompletedTasks() == this.session.getTotalTasks(), 1, 15))) {
            LOG.warn((Object)("Not all containers were stopped or completed. Only " + this.session.getNumCompletedTasks() + " out of " + this.session.getTotalTasks() + " finished."));
        }
    }

    private int doPreprocessingJob() throws Exception {
        int exitCode;
        Utils.extractResources();
        HashMap<String, String> extraEnv = new HashMap<String, String>(this.shellEnv);
        if (this.singleNode) {
            ServerSocket tbSocket = new ServerSocket(0);
            int tbPort = tbSocket.getLocalPort();
            extraEnv.put("TB_PORT", String.valueOf(tbPort));
            String tbUrl = Utils.getCurrentHostName() + ":" + tbPort;
            this.proxyUrl = Utils.constructUrl(tbUrl);
            LOG.info((Object)("Registering TensorBoard url for single node training: " + tbUrl));
            this.registerTensorBoardUrlToRM(tbUrl);
            tbSocket.close();
        }
        LOG.info((Object)"Start python preprocessing");
        extraEnv.put("PREPROCESSING_JOB", "true");
        extraEnv.put("HOME", System.getProperty("user.dir"));
        String taskCommand = this.tonyConf.get(TonyConfigurationKeys.getExecuteCommandKey("am"), this.tonyConf.get(TonyConfigurationKeys.getContainerExecuteCommandKey()));
        LOG.info((Object)("Executing command: " + taskCommand));
        this.preprocessExitCode = exitCode = Utils.executeShell(taskCommand, this.workerTimeout, extraEnv);
        this.preprocessFinished = true;
        if (exitCode != 0) {
            LOG.error((Object)("Preprocess job exits with " + exitCode + ", exiting."));
            this.session.setFinalStatus(FinalApplicationStatus.FAILED, "Preprocessing job failed.");
            return exitCode;
        }
        try (BufferedReader reader = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(System.getProperty("yarn.app.container.log.dir") + File.separatorChar + "amstdout.log"), StandardCharsets.UTF_8));){
            String line;
            while ((line = reader.readLine()) != null) {
                if (!line.contains("Model parameters: ")) continue;
                String params = line.substring(line.indexOf("Model parameters: ") + "Model parameters: ".length());
                this.containerEnv.put("MODEL_PARAMS", params);
                break;
            }
        }
        return exitCode;
    }

    private void printTaskUrls() {
        if (this.session != null) {
            this.session.getTonyTasks().values().stream().flatMap(Arrays::stream).forEach(task -> {
                if (task != null) {
                    Utils.printTaskUrl(task.getTaskInfo(), LOG);
                }
            });
        }
    }

    private ApplicationRpcServer setupRPCService(String hostname) throws IOException {
        ApplicationRpcServer rpcServer = new ApplicationRpcServer(hostname, new RpcForClient(), this.yarnConf);
        this.amPort = rpcServer.getRpcPort();
        return rpcServer;
    }

    private String registerTensorBoardUrlToRM(String spec) throws Exception {
        if (spec != null && this.appIdString != null) {
            try {
                Method method = AMRMClientAsync.class.getMethod("updateTrackingUrl", String.class);
                method.invoke(this.amRMClient, spec);
            }
            catch (NoSuchMethodException nsme) {
                LOG.warn((Object)"This Hadoop version doesn't have the YARN-7974 patch, TonY won't register TensorBoard URL withapplication's tracking URL");
            }
            return "SUCCEEDED";
        }
        return "FAILED";
    }

    private void setupContainerCredentials() throws IOException {
        Credentials credentials = UserGroupInformation.getCurrentUser().getCredentials();
        DataOutputBuffer dob = new DataOutputBuffer();
        credentials.writeTokenStorageToStream((DataOutputStream)dob);
        this.allTokens = ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
        String submitterUserName = System.getenv(ApplicationConstants.Environment.USER.name());
        UserGroupInformation submitterUgi = UserGroupInformation.createRemoteUser((String)submitterUserName);
        submitterUgi.addCredentials(credentials);
    }

    private NMCallbackHandler createNMCallbackHandler() {
        return new NMCallbackHandler();
    }

    private void onTaskDeemedDead(TonySession.TonyTask task) {
        String msg = "Task with id [" + task.getId() + "] has missed [" + this.maxConsecutiveHBMiss + "] heartbeats. Ending application!";
        LOG.error((Object)msg);
        this.taskHasMissesHB = true;
        this.session.setFinalStatus(FinalApplicationStatus.FAILED, msg);
        this.mainThread.interrupt();
    }

    private void processFinishedContainer(ContainerId containerId, int exitStatus, String diagnosticMessage) {
        TonySession.TonyTask task = this.session.getTask(containerId);
        if (task != null) {
            if (task.getSessionId() != this.session.sessionId) {
                return;
            }
            LOG.info((Object)("Container " + containerId + " for task " + task + " finished with exitStatus " + exitStatus + "."));
            this.session.onTaskCompleted(task.getJobName(), task.getTaskIndex(), exitStatus);
            this.scheduler.registerDependencyCompleted(task.getJobName());
            if (0 != exitStatus) {
                this.eventHandler.emitEvent(new Event(EventType.TASK_FINISHED, (Object)new TaskFinished(task.getJobName(), Integer.parseInt(task.getTaskIndex()), task.getTaskInfo().getStatus().toString(), this.metricsRpcServer.getMetrics(task.getJobName(), Integer.parseInt(task.getTaskIndex())), diagnosticMessage), System.currentTimeMillis()));
            } else {
                this.eventHandler.emitEvent(new Event(EventType.TASK_FINISHED, (Object)new TaskFinished(task.getJobName(), Integer.parseInt(task.getTaskIndex()), task.getTaskInfo().getStatus().toString(), this.metricsRpcServer.getMetrics(task.getJobName(), Integer.parseInt(task.getTaskIndex())), "NA"), System.currentTimeMillis()));
            }
            if (!Utils.isJobTypeTracked(task.getJobName(), this.tonyConf) && task.isFailed()) {
                this.untrackedTaskFailed = true;
            }
        } else {
            LOG.warn((Object)("No task found for container : [" + containerId + "]!"));
        }
    }

    private void killChiefWorkerIfTesting(String taskId) {
        if (System.getenv("TEST_WORKER_TERMINATION") != null && taskId.equals("worker:0")) {
            List<Container> containers = this.sessionContainersMap.get(this.session.sessionId);
            for (Container container : containers) {
                if (!this.session.getTask(container.getId()).getJobName().equals("worker")) continue;
                LOG.warn((Object)("Simulating worker termination for taskId: " + taskId));
                this.nmClientAsync.stopContainerAsync(container.getId(), container.getNodeId());
            }
        }
    }

    private class ContainerLauncher
    implements Runnable {
        Container container;

        ContainerLauncher(Container container) {
            this.container = container;
        }

        @Override
        public void run() {
            TonySession.TonyTask task = ApplicationMaster.this.session.getAndInitMatchingTaskByPriority(this.container.getPriority().getPriority());
            Preconditions.checkNotNull((Object)task, (Object)"Task was null! Nothing to schedule.");
            task.setTaskInfo(this.container);
            TaskInfo taskInfo = task.getTaskInfo();
            taskInfo.setStatus(TaskStatus.READY);
            Map containerResources = (Map)ApplicationMaster.this.jobTypeToContainerResources.get(task.getJobName());
            task.addContainer(this.container);
            LOG.info((Object)("Setting Container [" + this.container.getId() + "] for task [" + task.getId() + "].."));
            ConcurrentHashMap<String, String> containerLaunchEnv = new ConcurrentHashMap<String, String>(ApplicationMaster.this.containerEnv);
            String jobName = task.getJobName();
            String taskIndex = task.getTaskIndex();
            Map<String, String> dockerEnv = Utils.getContainerEnvForDocker(ApplicationMaster.this.tonyConf, jobName);
            containerLaunchEnv.putAll(dockerEnv);
            containerLaunchEnv.put("JOB_NAME", jobName);
            containerLaunchEnv.put("JOB_ID", ApplicationMaster.this.appIdString);
            containerLaunchEnv.put("TASK_INDEX", taskIndex);
            containerLaunchEnv.put("TASK_NUM", String.valueOf(ApplicationMaster.this.session.getTotalTrackedTasks()));
            if (ApplicationMaster.this.session.isChief(jobName, taskIndex)) {
                containerLaunchEnv.put("IS_CHIEF", Boolean.TRUE.toString());
            }
            containerLaunchEnv.put("SESSION_ID", String.valueOf(((ApplicationMaster)ApplicationMaster.this).session.sessionId));
            ArrayList<String> arguments = new ArrayList<String>(5);
            arguments.add(ApplicationMaster.this.session.getTaskCommand());
            arguments.add("1><LOG_DIR>/stdout");
            arguments.add("2><LOG_DIR>/stderr");
            ImmutableList commands = ImmutableList.of((Object)String.join((CharSequence)" ", arguments));
            LOG.info((Object)("Constructed command: " + commands));
            LOG.info((Object)("Container environment: " + containerLaunchEnv));
            HashMap<ApplicationAccessType, String> acls = new HashMap<ApplicationAccessType, String>(2);
            acls.put(ApplicationAccessType.VIEW_APP, "*");
            acls.put(ApplicationAccessType.MODIFY_APP, " ");
            ByteBuffer tokens = null;
            if (ApplicationMaster.this.secureMode) {
                tokens = ApplicationMaster.this.allTokens.duplicate();
            }
            ContainerLaunchContext ctx = ContainerLaunchContext.newInstance((Map)containerResources, containerLaunchEnv, (List)commands, null, (ByteBuffer)tokens, acls);
            ApplicationMaster.this.sessionContainersMap.computeIfAbsent(((ApplicationMaster)ApplicationMaster.this).session.sessionId, key -> Collections.synchronizedList(new ArrayList())).add(this.container);
            Utils.printTaskUrl(task.getTaskInfo(), LOG);
            ApplicationMaster.this.nmClientAsync.startContainerAsync(this.container, ctx);
            taskInfo.setStatus(TaskStatus.RUNNING);
            ApplicationMaster.this.eventHandler.emitEvent(new Event(EventType.TASK_STARTED, (Object)new TaskStarted(task.getJobName(), Integer.parseInt(task.getTaskIndex()), this.container.getNodeHttpAddress().split(":")[0], this.container.getId().toString()), System.currentTimeMillis()));
        }
    }

    private class RMCallbackHandler
    implements AMRMClientAsync.CallbackHandler {
        private RMCallbackHandler() {
        }

        public void onContainersCompleted(List<ContainerStatus> completedContainers) {
            LOG.info((Object)("Completed containers: " + completedContainers.size()));
            this.sleepForTesting();
            for (ContainerStatus containerStatus : completedContainers) {
                int exitStatus = containerStatus.getExitStatus();
                LOG.info((Object)("ContainerID = " + containerStatus.getContainerId() + ", state = " + containerStatus.getState() + ", exitStatus = " + exitStatus));
                String diagnostics = containerStatus.getDiagnostics();
                String errorInformation = null;
                if (0 != exitStatus) {
                    errorInformation = diagnostics;
                    LOG.error((Object)diagnostics);
                } else {
                    LOG.info((Object)diagnostics);
                }
                ApplicationMaster.this.processFinishedContainer(containerStatus.getContainerId(), exitStatus, errorInformation);
            }
        }

        private void sleepForTesting() {
            if (System.getenv("TEST_TASK_COMPLETION_NOTIFICATION_DELAYED") != null) {
                LOG.info((Object)"Sleeping for 1 second to simulate task completion notification delay");
                try {
                    Thread.sleep(1000L);
                }
                catch (InterruptedException e) {
                    LOG.error((Object)"Interrupted while sleeping", (Throwable)e);
                }
            }
        }

        public void onContainersAllocated(List<Container> containers) {
            LOG.info((Object)("Allocated: " + containers.size() + " containers."));
            for (Container container : containers) {
                LOG.info((Object)("Launching a task in container, containerId = " + container.getId() + ", containerNode = " + container.getNodeId().getHost() + ":" + container.getNodeId().getPort() + ", resourceRequest = " + container.getResource() + ", priority = " + container.getPriority()));
                ApplicationMaster.this.containersLauncherThreadPool.execute(new ContainerLauncher(container));
            }
        }

        public void onShutdownRequest() {
            LOG.info((Object)"onShutdownRequest called in RMCallbackHandler");
        }

        public void onNodesUpdated(List<NodeReport> list) {
            LOG.info((Object)"onNodesUpdated called in RMCallbackHandler");
        }

        public float getProgress() {
            int numTotalTrackedTasks = ApplicationMaster.this.session.getTotalTrackedTasks();
            return numTotalTrackedTasks > 0 ? (float)ApplicationMaster.this.session.getNumCompletedTrackedTasks() / (float)numTotalTrackedTasks : 0.0f;
        }

        public void onError(Throwable throwable) {
            LOG.error((Object)"Received error in AM to RM call", throwable);
            ApplicationMaster.this.stop();
        }
    }

    class NMCallbackHandler
    implements NMClientAsync.CallbackHandler {
        NMCallbackHandler() {
        }

        public void onContainerStopped(ContainerId containerId) {
            ApplicationMaster.this.processFinishedContainer(containerId, -105, "KILLED_BY_APPMASTER");
        }

        public void onContainerStatusReceived(ContainerId containerId, ContainerStatus containerStatus) {
            LOG.info((Object)("Container Status: id =" + containerId + ", status =" + containerStatus));
        }

        public void onContainerStarted(ContainerId containerId, Map<String, ByteBuffer> allServiceResponse) {
            LOG.info((Object)("Successfully started container " + containerId));
        }

        public void onStartContainerError(ContainerId containerId, Throwable t) {
            LOG.error((Object)("Failed to start container " + containerId), t);
        }

        public void onGetContainerStatusError(ContainerId containerId, Throwable t) {
            LOG.error((Object)("Failed to query the status of container " + containerId), t);
        }

        public void onStopContainerError(ContainerId containerId, Throwable t) {
            LOG.error((Object)("Failed to stop container " + containerId), t);
        }
    }

    private final class RpcForClient
    implements ApplicationRpc {
        private static final long REGISTRATION_STATUS_INTERVAL_MS = 15000L;
        private long registrationTimeoutMs;
        private Set<String> registeredTasks;
        private long lastRegisterWorkerTime;

        private RpcForClient() {
            this.registrationTimeoutMs = ApplicationMaster.this.tonyConf.getInt("tony.container.allocation.timeout", 900000);
            this.registeredTasks = new HashSet<String>();
            this.lastRegisterWorkerTime = System.currentTimeMillis();
        }

        @Override
        public void reset() {
            this.registeredTasks = new HashSet<String>();
        }

        @Override
        public Set<TaskInfo> getTaskInfos() {
            if (ApplicationMaster.this.singleNode && ApplicationMaster.this.proxyUrl != null) {
                HashSet<TaskInfo> additionalTasks = new HashSet<TaskInfo>();
                additionalTasks.add(new TaskInfo("driver", "0", Utils.constructContainerUrl(Utils.getCurrentHostName() + ":" + System.getenv(ApplicationConstants.Environment.NM_HTTP_PORT.name()), ApplicationMaster.this.containerId)));
                additionalTasks.add(new TaskInfo("notebook", "0", ApplicationMaster.this.proxyUrl));
                return additionalTasks;
            }
            if (!ApplicationMaster.this.singleNode && ApplicationMaster.this.session != null && ApplicationMaster.this.session.allTasksScheduled()) {
                return ApplicationMaster.this.session.getTonyTasks().values().stream().flatMap(tasks -> Arrays.stream(tasks).map(TonySession.TonyTask::getTaskInfo)).collect(Collectors.toSet());
            }
            return Collections.emptySet();
        }

        @Override
        public String getClusterSpec() throws IOException {
            ObjectMapper objectMapper = new ObjectMapper();
            return objectMapper.writeValueAsString(ApplicationMaster.this.session.getClusterSpec());
        }

        @Override
        public void taskExecutorHeartbeat(String taskId) {
            TonySession.TonyTask task = ApplicationMaster.this.session.getTask(taskId);
            if (task != null) {
                LOG.debug((Object)("[" + taskId + "] Received HB Ping !!"));
                ApplicationMaster.this.hbMonitor.receivedPing((Object)task);
            } else {
                LOG.warn((Object)("[" + taskId + "] Not registered for heartbeat monitoring !!"));
            }
        }

        @Override
        public String registerWorkerSpec(String taskId, String spec) throws IOException {
            TonySession.TonyTask task = ApplicationMaster.this.session.getTask(taskId);
            if (task.getHost() == null) {
                LOG.info((Object)("Received cluster spec registration request from task " + taskId + " with spec: " + spec));
                task.setHostPort(spec);
                this.registeredTasks.add(taskId);
                LOG.info((Object)("[" + taskId + "] Received Registration for HB !!"));
                ApplicationMaster.this.hbMonitor.register((Object)task);
                ApplicationMaster.this.killChiefWorkerIfTesting(taskId);
            }
            int numExpectedTasks = ApplicationMaster.this.session.getNumExpectedTasks();
            if (this.registeredTasks.size() == numExpectedTasks) {
                LOG.info((Object)("All " + numExpectedTasks + " expected tasks registered."));
                return this.getClusterSpec();
            }
            if (System.currentTimeMillis() - this.lastRegisterWorkerTime > 15000L) {
                Set unregisteredTasks = ApplicationMaster.this.getUnregisteredTasks();
                LOG.info((Object)String.format("Received registrations from %d tasks, awaiting registration from %d tasks.", this.registeredTasks.size(), numExpectedTasks - this.registeredTasks.size()));
                unregisteredTasks.forEach(t -> {
                    if (this.registrationTimeoutMs > 0L && System.currentTimeMillis() - t.getStartTime() > this.registrationTimeoutMs) {
                        String errorMsg = String.format("Stopping AM for task [%s:%s] registration timeout: allocated container is %s on host %s", t.getJobName(), t.getTaskIndex(), t.getContainer() != null ? t.getContainer().getId().toString() : "none", t.getContainer() != null ? t.getContainer().getNodeId().getHost() : "none");
                        LOG.error((Object)errorMsg);
                        ApplicationMaster.this.session.setFinalStatus(FinalApplicationStatus.FAILED, errorMsg);
                        ApplicationMaster.this.stop();
                    } else {
                        LOG.info((Object)String.format("Awaiting registration from task %s %s in %s on host %s", t.getJobName(), t.getTaskIndex(), t.getContainer() != null ? t.getContainer().getId().toString() : "none", t.getContainer() != null ? t.getContainer().getNodeId().getHost() : "none"));
                    }
                });
                this.lastRegisterWorkerTime = System.currentTimeMillis();
            }
            return null;
        }

        @Override
        public String registerExecutionResult(int exitCode, String jobName, String jobIndex, String sessionId) {
            LOG.info((Object)("Received result registration request with exit code " + exitCode + " from " + jobName + " " + jobIndex));
            TonySession.TonyTask task = ApplicationMaster.this.session.getTask(jobName + ":" + jobIndex);
            if (task != null) {
                LOG.info((Object)("Unregistering task [" + task.getId() + "] from Heartbeat monitor.."));
                ApplicationMaster.this.hbMonitor.unregister((Object)task);
            } else {
                LOG.warn((Object)("Task " + jobName + " " + jobIndex + " was null!"));
            }
            return "RECEIVED";
        }

        @Override
        public String registerTensorBoardUrl(String spec) throws Exception {
            LOG.info((Object)("Got request to update TensorBoard URL: " + spec));
            return ApplicationMaster.this.registerTensorBoardUrlToRM(spec);
        }

        @Override
        public void finishApplication() {
            LOG.info((Object)"Client signals AM to finish application.");
            ApplicationMaster.this.clientSignalToStop = true;
        }
    }
}

