[ Name = strcat(MyType,"CondorVersion"); Value = CondorVersion; Desc = "Version String"; TargetType = "Scheduler,Negotiator"; ] [ Name = strcat(MyType,"UpdatesLost"); Value = UpdatesLost; Verbosity = 2; Desc = "Number of ClassAd updates that were sent by this daemon but not received by the collector"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"UpdatesTotal"); Value = UpdatesTotal; Verbosity = 2; Desc = "Number of ClassAd updates that were sent by this daemon"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"RecentDaemonCoreDutyCycle"); Value = RecentDaemonCoreDutyCycle; Desc = "Recent fraction of busy time in the daemon event loop"; Scale = 100; Units = "%"; TargetType = "Scheduler,Negotiator"; ] /* Not a useful graph. Should be converted to a human readable string metric */ [ Name = strcat(MyType,"MonitorSelfAge"); Value = MonitorSelfAge; Verbosity = 99; Desc = "Age of this daemon"; Units = "seconds"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"MonitorSelfCPUUsage"); Value = MonitorSelfCPUUsage; Verbosity = 2; Desc = "The fraction of one CPU recently used by this daemon"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"MonitorSelfImageSize"); Value = MonitorSelfImageSize; Verbosity = 1; Desc = "Memory allocated to this daemon (i.e. virtual image size)"; Units = "bytes"; Scale = 1024; Type = "float"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"MonitorSelfRegisteredSocketCount"); Value = MonitorSelfRegisteredSocketCount; Verbosity = 2; Desc = "Number of sockets registered in this daemon's event loop"; Units = "sockets"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"MonitorSelfResidentSetSize"); Value = MonitorSelfResidentSetSize; Verbosity = 2; Desc = "RAM allocated to this daemon"; Units = "bytes"; Scale = 1024; Type = "float"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = strcat(MyType,"MonitorSelfSecuritySessions"); Value = MonitorSelfSecuritySessions; Verbosity = 2; Desc = "Number of security sessions in this daemon's cache"; TargetType = "Scheduler,Negotiator,Machine_slot1"; ] [ Name = "JobsAccumBadputTime"; Desc = "Runtime of jobs that were aborted (removed or held) or (standard universe only) evicted without a checkpoint."; Scale = 0.000277778; Units = "hours"; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsAccumExecuteTime"; Desc = "Time spent running jobs. Does not include file transfer and other job handling time."; Scale = 0.000277778; Units = "hours"; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsAccumPostExecuteTime"; Verbosity = 2; Desc = "Time spent processing a completed job (includes output file transfer)"; Scale = 0.000277778; Units = "hours"; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsAccumPreExecuteTime"; Verbosity = 2; Desc = "Time spent preparing to run a job (includes input file transfer)"; Scale = 0.000277778; Units = "hours"; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsAccumRunningTime"; Desc = "Time spent running jobs that were not counted as badput (i.e. not removed or held). Includes file transfer and other handling time."; Units = "hours"; Scale = 0.000277778; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsAccumTimeToStart"; Verbosity = 2; Desc = "Time between submit and running of a job"; Scale = 0.000277778; Units = "hours"; Type = "float"; TargetType = "Scheduler"; ] [ Name = "JobsCheckpointed"; Verbosity = 2; Desc = "Number of job run attempts that were interrupted and successfully checkpointed"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsCompleted"; Desc = "Number of jobs that terminated normally (i.e. not via a signal or abort)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsCoredumped"; Verbosity = 1; Desc = "Number of jobs that crashed and generated a core file"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsDebugLogError"; Verbosity = 2; Desc = "Count of shadows that exited due to debug log errors"; Units = "shadows"; TargetType = "Scheduler"; ] [ Name = "JobsExecFailed"; Verbosity = 1; Desc = "Count of job run attempts that failed to execute the specified command"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsExited"; Verbosity = 2; Desc = "Count of job run attempts that have completed (successfully or not)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsExitedAndClaimClosing"; Verbosity = 2; Desc = "Count of job run attempts that have completed when claim was not accepting additional jobs"; Units = "jobs"; TargetType = "Scheduler"; ] /* JobsExitedNormally is the same as JobsCompleted, so don't bother. */ /* [ Name = "JobsExitedNormally"; Desc = ""; Units = ""; TargetType = "Scheduler"; ] */ [ Name = "JobsExitException"; Verbosity = 2; Desc = "Count of job run attempts that ended with a job handling exception (shadow exception)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsKilled"; Verbosity = 1; Desc = "Count of job run attempts in which the job was killed (i.e. evicted)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsMissedDeferralTime"; Verbosity = 2; Desc = "Count of job run attempts that failed because the specified deferral time was missed"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsNotStarted"; Verbosity = 2; Desc = "Count of job run attempts that failed because the request to activate the claim failed"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsShadowNoMemory"; Verbosity = 2; Desc = "Count of job run attempts that failed because there was not enough memory (RESERVED_SWAP)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsShouldHold"; Verbosity = 2; Desc = "Count of job run attempts that have resulted in the job going on hold"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsShouldRemove"; Verbosity = 2; Desc = "Count of job run attempts that have resulted in the job being removed (e.g. periodic_remove policy)"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsShouldRequeue"; Verbosity = 2; Desc = "Count of job run attempts that ended with the job being requeued due to handling failures or OnExitRemove=false"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsStarted"; Verbosity = 1; Desc = "Number of job run attempts started"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "JobsSubmitted"; Desc = "Number of jobs submitted"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "MaxJobsRunning"; Verbosity = 1; Desc = "Configured limit on number of running jobs"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "NumUsers"; Verbosity = 1; Desc = "Number of different users who currently have jobs in the queue"; Units = "users"; TargetType = "Scheduler"; ] [ Name = "RecentStatsLifetime"; Verbosity = 2; Desc = "Seconds elapsed since the beginning of the current stats collection window"; Units = "seconds"; TargetType = "Scheduler"; ] [ Name = "ScheddSwapExhausted"; Verbosity = 2; Desc = "Non-zero when jobs cannot be started due to RESERVED_SWAP"; TargetType = "Scheduler"; ] [ Name = "ShadowsRunning"; Verbosity = 2; Desc = "Number of shadow processes currently running"; Units = "shadows"; TargetType = "Scheduler"; ] [ Name = "ShadowsStarted"; Verbosity = 2; Desc = "Number of shadow processes started"; Units = "shadows"; TargetType = "Scheduler"; ] [ Name = "StatsLifetime"; Verbosity = 2; Desc = "Seconds of elapsed time since the beginning of the schedd lifetime stat collection window"; Units = "seconds"; TargetType = "Scheduler"; ] [ Name = "TotalFlockedJobs"; Desc = "Number of jobs from this schedd that are flocked to other pools"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalHeldJobs"; Desc = "Number of jobs in this schedd that are on hold"; Units = "jobs"; TargetType = "Scheduler"; ] [ Aggregate = "SUM"; Name = "Held Jobs in Pool"; Value = TotalHeldJobs; Desc = "Number of jobs on hold in schedds reporting to this pool"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalIdleJobs"; Desc = "Number of idle jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Aggregate = "SUM"; Name = "Idle Jobs in Pool"; Value = TotalIdleJobs; Desc = "Number of idle jobs in schedds reporting to this pool"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalJobAds"; Desc = "Number of jobs currently in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Aggregate = "SUM"; Name = "Jobs in Pool"; Value = TotalJobAds; Desc = "Number of jobs currently in schedds reporting to this pool"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalLocalJobsIdle"; Verbosity = 2; Desc = "Number of local universe jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalLocalJobsRunning"; Verbosity = 2; Desc = "Number of running local universe jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalRemovedJobs"; Verbosity = 1; Desc = "Number of jobs that are in the process of being removed"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalRunningJobs"; Desc = "Number of running jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Aggregate = "SUM"; Name = "Running Jobs in Pool"; Value = TotalRunningJobs; Desc = "Number of running jobs in schedds reporting to this pool"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalSchedulerJobsIdle"; Verbosity = 2; Desc = "Number of idle scheduler universe jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = "TotalSchedulerJobsRunning"; Verbosity = 2; Desc = "Number of running scheduler universe jobs in this schedd's queue"; Units = "jobs"; TargetType = "Scheduler"; ] [ Name = strcat(Name,"-TotalRunningJobs"); Title = strcat(Name, " Total Running Jobs"); Aggregate = "SUM"; Value = RunningJobs; Verbosity = 2; Desc = strcat("Total number of running jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name, "RunningJobs"); Title = strcat(Name, " Running Jobs"); Value = RunningJobs; Verbosity = 2; Desc = strcat("Number of running jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name,"-TotalIdleJobs"); Title = strcat(Name, " Total Idle Jobs"); Aggregate = "SUM"; Value = IdleJobs; Verbosity = 2; Desc = strcat("Total number of idle jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name, "IdleJobs"); Title = strcat(Name, " Idle Jobs"); Value = IdleJobs; Verbosity = 2; Desc = strcat("Number of idle jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name,"-TotalHeldJobs"); Title = strcat(Name, " Total Held Jobs"); Aggregate = "SUM"; Value = HeldJobs; Verbosity = 2; Desc = strcat("Total number of held jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name, "HeldJobs"); Title = strcat(Name, " Held Jobs"); Value = HeldJobs; Verbosity = 2; Desc = strcat("Number of held jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name,"-TotalFlockedJobs"); Title = strcat(Name, " Total Flocked Jobs"); Aggregate = "SUM"; Value = FlockedJobs; Verbosity = 2; Desc = strcat("Total number of flocked jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(Name, "FlockedJobs"); Title = strcat(Name, " Flocked Jobs"); Value = FlockedJobs; Verbosity = 2; Desc = strcat("Number of flocked jobs from user ", Name); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Submitters"; ] [ Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalRunningJobs"); Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Running Jobs"); Aggregate = "SUM"; Value = RunningJobs; Verbosity = 2; Desc = strcat("Total number of running jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Accounting Groups"; ] [ Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalIdleJobs"); Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Idle Jobs"); Aggregate = "SUM"; Value = IdleJobs; Verbosity = 2; Desc = strcat("Total number of idle jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Accounting Groups"; ] [ Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalHeldJobs"); Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Held Jobs"); Aggregate = "SUM"; Value = HeldJobs; Verbosity = 2; Desc = strcat("Total number of held jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Accounting Groups"; ] [ Name = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"),"-TotalFlockedJobs"); Title = strcat(ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "\\1"),"nogroup"), " Total Flocked Jobs"); Aggregate = "SUM"; Value = FlockedJobs; Verbosity = 2; Desc = strcat("Total number of flocked jobs from ", ifThenElse(regexp("([a-zA-Z0-9.]+)\\.[a-zA-z0-9]+", splitUserName(Name)[0]),regexps("([a-zA-Z0-9.]+)\\.[a-zA-Z0-9]+", splitUserName(Name)[0], "group \\1"),"no group")); Units = "jobs"; TargetType = "Submitter"; Group = "HTCondor Accounting Groups"; ] [ Name = "FileTransferDownloadBytes"; Verbosity = 1; Derivative = true; Title = "File Transfer Download Bandwidth"; Desc = "Output transfers from jobs"; Units = "bytes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferDownloadBytesPerSecond_5m"; Verbosity = 1; Desc = "Rate of output transfers from jobs"; Units = "bytes/s"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Regex = "Owner_([^_]*)_FileTransferDownloadBytesPerSecond_5m"; Title = "\\1 Download Bytes Per Second"; Verbosity = 2; Desc = "Rate of output transfers from jobs by user \\1"; Units = "bytes/s"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferFileReadLoad_5m"; Verbosity = 1; Desc = "Number of file transfer processes reading input data from files"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] /* This looks like a mismatch of stuff */ [ Name = "FileTransferFileReadSeconds"; Verbosity = 99; Derivative = true; Title = "File Transfer File Read Load"; Desc = "Number of file transfer processes reading input data from files"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferFileWriteLoad_5m"; Verbosity = 1; Desc = "Number of file transfer processes writing output data to files"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] /* This looks like a mismatch of stuff */ [ Name = "FileTransferFileWriteSeconds"; Verbosity = 99; Derivative = true; Title = "File Transfer File Write Load"; Desc = "Number of file transfer processes writing output data to files"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferNetReadLoad_5m"; Verbosity = 1; Desc = "Number of file transfer processes reading output data from the network"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] /* This looks like a mismatch of stuff */ [ Name = "FileTransferNetReadSeconds"; Verbosity = 99; Derivative = true; Desc = "Number of file transfer processes reading output data from the network"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferNetWriteLoad_5m"; Verbosity = 1; Desc = "Number of file transfer processes writing input data to the network"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] /* This looks like a mismatch of stuff */ [ Name = "FileTransferNetWriteSeconds"; Verbosity = 99; Derivative = true; Title = "File Transfer Net Write Load"; Desc = "Number of file transfer processes writing input data to the network"; Units = "processes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferUploadBytes"; Derivative = true; Title = "File Transfer Upload Bandwidth"; Desc = "Input transfers to jobs"; Units = "bytes"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "FileTransferUploadBytesPerSecond_5m"; Verbosity = 1; Desc = "Rate of input transfers to jobs"; Units = "bytes/s"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Regex = "Owner_([^_]*)_FileTransferUploadBytesPerSecond_5m"; Title = "\\1 Upload Bytes Per Second"; Verbosity = 2; Desc = "Rate of input transfers from jobs by user \\1"; Units = "bytes/s"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "TransferQueueDownloadWaitTime"; Desc = "Oldest output file transfer waiting in the transfer queue"; Units = "seconds"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Aggregate = "MAX"; Name = "Pool Max TransferQueueDownloadWaitTime"; Value = TransferQueueDownloadWaitTime; Desc = "Oldest output file transfer waiting in the transfer queues reporting to this pool"; Units = "seconds"; TargetType = "Scheduler"; ] [ Name = "TransferQueueNumDownloading"; Desc = "Number of jobs actively transferring output"; Units = "jobs"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "TransferQueueNumUploading"; Desc = "Number of jobs actively transferring input"; Units = "jobs"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "TransferQueueNumWaitingToDownload"; Desc = "Number of jobs waiting in the transfer queue to transfer output"; Units = "jobs"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "TransferQueueNumWaitingToUpload"; Desc = "Number of jobs waiting in the transfer queue to transfer input"; Units = "jobs"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Name = "TransferQueueUploadWaitTime"; Desc = "Oldest input file transfer waiting in the transfer queue"; Units = "seconds"; TargetType = "Scheduler"; Group = "HTCondor File Transfer"; ] [ Aggregate = "MAX"; Name = "Pool Max TransferQueueUploadWaitTime"; Value = TransferQueueUploadWaitTime; Desc = "Oldest input file transfer waiting in the transfer queues reporting to this pool"; Units = "seconds"; TargetType = "Scheduler"; ] [ Name = "LastNegotiationCycleActiveSubmitterCount0"; Verbosity = 1; Desc = "The number of job submitters considered in the negotiation cycle"; Units = "submitters"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleCandidateSlots0"; Verbosity = 2; Desc = "The number of slot ClassAds considered for matchmaking (reduced by NEGOTIATOR_SLOT_POOLSIZE_CONSTRAINT if applicable)"; Units = "slots"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleDuration0"; Desc = "The number of seconds that it took to complete the negotiation cycle"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleMatches0"; Verbosity = 1; Desc = "The number of successful matches that were made in the negotiation cycle"; Units = "matches"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleMatchRate0"; Verbosity = 1; Desc = "Matches made per second during negotiation cycle"; Units = "matches/s"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleMatchRateSustained0"; Verbosity = 1; Desc = "Matches made per second, including waiting time between negotiation cycles"; Units = "matches/s"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleNumIdleJobs0"; Verbosity = 1; Desc = "The number of idle jobs belonging to job submitters"; Units = "jobs"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleNumJobsConsidered0"; Verbosity = 1; Desc = "The number of jobs considered for matchmaking (may be mutch lower than idle jobs due to auto-cluster optimizations)"; Units = "jobs"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleNumSchedulers0"; Verbosity = 2; Desc = "The number of schedds involved in negotiation for resources"; Units = "schedds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCyclePeriod0"; Verbosity = 1; Desc = "Seconds between the end of one cycle the the end of the next"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCyclePhase1Duration0"; Verbosity = 2; Desc = "Duration of Phase 1: getting submitter and machine ClassAds"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCyclePhase2Duration0"; Verbosity = 2; Desc = "Duration of Phase 2: filtering slots and processing accounting group configuration"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCyclePhase3Duration0"; Verbosity = 2; Desc = "Phase 3 of the negotiation cycle: sorting submitters by priority"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCyclePhase4Duration0"; Verbosity = 2; Desc = "Phase 4 of the negotiation cycle: matching slots to jobs"; Units = "seconds"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleRejections0"; Verbosity = 1; Desc = "The number of rejections that occurred in the negotiation cycle (only one per auto-cluster)"; Units = "jobs"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleSlotShareIter0"; Verbosity = 2; Desc = "The number of iterations in the negotiation cycle"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleTotalSlots0"; Verbosity = 1; Desc = "The total number of slot ClassAds that matched NEGOTIATOR_SLOT_CONSTRAINT"; Units = "slots"; TargetType = "Negotiator"; ] [ Name = "LastNegotiationCycleTrimmedSlots0"; Verbosity = 2; Desc = "The number of slot ClassAds considered for matchmaking, after filtering by Negotiator_CONSIDER_PREEMPTION, if applicable"; Units = "slots"; TargetType = "Negotiator"; ] [ Name = "ExpectedMachineGracefulDrainingBadput"; Verbosity = 2; Desc = "Job runtime that would be lost if graceful draining were initiated now."; Units = "cpus*seconds"; TargetType = "Machine_slot1"; ] [ Name = "ExpectedMachineGracefulDrainingCompletion"; Value = ExpectedMachineGracefulDrainingCompletion - time(); Verbosity = 2; Desc = "Time graceful draining could take to complete, assuming jobs take full retirement and vacate time and there is no suspension"; Units = "seconds"; TargetType = "Machine_slot1"; ] [ Name = "ExpectedMachineQuickDrainingBadput"; Verbosity = 2; Desc = "Job runtime that would be lost if quick draining were initiated now."; Units = "cpus*seconds"; TargetType = "Machine_slot1"; ] [ Name = "ExpectedMachineQuickDrainingCompletion"; Verbosity = 2; Desc = "Time quick draining could take to complete, assuming jobs take full retirement and vacate time and there is no suspension"; Units = "seconds"; TargetType = "Machine_slot1"; ] [ Name = "Linpack"; Value = KFlops; Verbosity = 2; Desc = "Linpack floating point benchmark"; Units = "FLOPS"; Scale = 1000; Type = "float"; TargetType = "Machine_slot1"; ] [ Name = "Dhrystone"; Value = Mips; Verbosity = 2; Desc = "Dhrystone integer benchmark"; Units = "Iterations/sec"; Scale = 1000000; Type = "float"; TargetType = "Machine_slot1"; ] [ Name = "TotalCondorLoadAvg"; Verbosity = 1; Desc = "The CPU load attributed to jobs"; TargetType = "Machine_slot1"; ] [ Name = "TotalCpus"; Verbosity = 2; Desc = "Number of cores"; Units = "cores"; TargetType = "Machine_slot1"; ] [ Aggregate = "SUM"; Name = "Cpus in Pool"; Value = TotalCpus; Verbosity = 2; Desc = "Number of cores in the pool"; Units = "cores"; TargetType = "Machine_slot1"; ] [ Name = "TotalDisk"; Verbosity = 2; Desc = "Disk space in the job execute directory"; Units = "bytes"; Scale = 1024; Type = "float"; TargetType = "Machine_slot1"; ] [ Name = "TotalLoadAvg"; Verbosity = 2; Desc = "System load average"; TargetType = "Machine_slot1"; ] [ Name = "TotalMemory"; Verbosity = 2; Desc = "RAM"; Units = "bytes"; Scale = 1048576; Type = "float"; TargetType = "Machine_slot1"; ] [ Name = "TotalSlots"; Verbosity = 2; Desc = "Number of slots"; Units = "slots"; TargetType = "Machine_slot1"; ] [ Aggregate = "SUM"; Name = "Pool Slot Count"; Value = TotalSlots; Desc = "Number of slots in the pool"; Units = "slots"; TargetType = "Machine_slot1"; ] [ Name = "TotalMachineDrainingBadput"; Verbosity = 1; Desc = "Job runtime that has been lost due to job evictions caused by draining"; Units = "cpus*seconds"; TargetType = "Machine_slot1"; ] [ Name = "TotalMachineDrainingUnclaimedTime"; Verbosity = 1; Desc = "Time that has not been used due to draining"; Units = "cpus*seconds"; TargetType = "Machine_slot1"; ] [ Name = "TotalVirtualMemory"; Verbosity = 2; Desc = "Addressable memory (RAM plus swap)"; Units = "bytes"; Scale = 1024; Type = "float"; TargetType = "Machine_slot1"; ] [ Name = "TotalPreemptions"; Verbosity = 2; Desc = "Total number of preempted jobs on this startd"; Units = "preemptions"; TargetType = "Machine_slot1"; ] [ Name = "TotalJobStarts"; Verbosity = 2; Desc = "Total number of jobs started on this startd since boot"; Units = "jobs"; TargetType = "Machine_slot1"; ] [ Aggregate = "SUM"; Name = "Poolwide Preemptions"; Value = TotalPreemptions; Verbosity = 2; Desc = "Poolwide Preemptions"; Units = "preemptions"; TargetType = "Machine_slot1"; ] [ Aggregate = "SUM"; Name = "Poolwide Job Starts"; Value = TotalJobStarts; Verbosity = 2; Desc = "Poolwide Job Starts"; Units = "jobs"; TargetType = "Machine_slot1"; ] [ Name = "AutoClusters"; Desc = "Number of active AutoClusters in the schedd"; Units = "autoclusters"; TargetType = "Scheduler"; ] [ Aggregate = "SUM"; Name = "AutoClusters in Pool"; Value = AutoClusters; Desc = "Number of active AutoClusters in schedds reporting to this pool"; Units = "autoclusters"; TargetType = "Scheduler"; ] [ Name = strcat(MyType,"WholeMachines"); Value = WholeMachines; Verbosity = 2; Desc = "Number of machines that were observed to be defragmented in the last polling interval"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"MachinesDraining"); Value = MachinesDraining; Verbosity = 2; Desc = "Number of machines that were observed to be draining in the last polling interval"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"RecentDrainSuccesses"); Value = RecentDrainSuccesses; Verbosity = 2; Desc = "Count of successful attempts to initiate draining during the past RecentStatsLifetime seconds"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"RecentDrainFailures"); Value = RecentDrainFailures; Verbosity = 2; Desc = "Count of failed attempts to initiate draining during the past RecentStatsLifetime seconds"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"AvgDrainingUnclaimed"); Value = AvgDrainingUnclaimed; Verbosity = 2; Desc = "Fraction of time CPUs in the pool have spent unclaimed by a user during draining of the machine"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"WholeMachinesPeak"); Value = WholeMachinesPeak; Verbosity = 2; Desc = "Largest number of machines that were ever observed to be simultaneously defragmented"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"AvgDrainingBadput"); Value = AvgDrainingBadput; Verbosity = 2; Desc = "Fraction of time CPUs in the pool have spent on jobs that were killed during draining of the machine"; TargetType = "Defrag"; ] [ Name = strcat(MyType,"MachinesDrainingPeak"); Value = MachinesDrainingPeak; Verbosity = 2; Desc = "Largest number of machines that were ever observed to be draining"; TargetType = "Defrag"; ]