\documentclass[english,serif,mathserif,xcolor=pdftex,dvipsnames,table]{beamer}
\usepackage[utf8]{inputenc}
%\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{fixltx2e}
\usepackage{graphicx}
\usepackage{colortbl}%
  %\newcommand{\cellcolor}[2]{\multicolumn{1}{>{\columncolor{#1}}c}{#2}}
\usepackage{listings}%
  \lstloadlanguages{sh}%
  \lstset{
    language=sh,%
    % --- basic appearance ---
    basicstyle=\ttfamily,%
    %columns=fullflexible,% best results for proportional fonts
    commentstyle=\small,%
    keywordstyle=\bfseries,% or \normalfont
    %identifierstyle=\itshape,%
    %procnamestyle=\bfseries\slshape, %\scshape,%
    %procnamekeys={def},%
    % --- escaping and special displays ---
    escapechar=@,% text between "@" will be rendered as normal TeX
    %moredelim=[il][\small\itshape]{\#},% ditto for text beween "#" and end-of-line
    texcl,%
    mathescape=false,%
    %literate={*{=}{{$\gets$ }}1 {==}{{$=$ }}1 {<=}{{$\leq$ }}1 {>=}{{$\geq$ }}1 {!=}{{$\neq$ }}1},%
    % --- display ---
    %showspaces=false,%
    %showstringspaces=false,%
    %xleftmargin=2em,%
    % --- line numbers ---
    %numbers=left,%
    %numberstyle=\tiny,%
    %stepnumber=1,%
    %firstnumber=1%
  }%
  \lstMakeShortInline{@}%
\usepackage{longtable}
\usepackage{mdwtab}
\usepackage{multirow}
\usepackage{float}
\usepackage{wrapfig}
\usepackage{relsize}
\usepackage{soul}
\usepackage{textcomp}
\usepackage{tikz}%
  \usetikzlibrary{arrows,shapes}%
  % For every picture that defines or uses external nodes, you'll have to
  % apply the 'remember picture' style. To avoid some typing, we'll apply
  % the style to all pictures.
  \tikzstyle{every picture}+=[remember picture]%
\usepackage{marvosym}
\usepackage{wasysym}
\usepackage{latexsym}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{url}
\tolerance=1000
\providecommand{\alert}[1]{\textbf{#1}}

\usetheme{uzhneu-en-informal}


\title[Introduction to ARC]{%
  Introduction to the \\
  ARC Grid middleware
}
\author[R. Murri]{%
  \textbf{Riccardo Murri} \\
  Grid Computing Competence Center, \\
  Organisch-Chemisches Institut, \\
  University of Zurich
}
\date{Oct.~20,~2011}

%% Use `\largeskip` to get a larger vertical white space between two
%% lines/paragraphs:
\newcommand{\largeskip}{\vspace{1em}}
\def\+{\largeskip}
\setlength{\parsep}{1.0em}

\begin{document}

% title frame
\maketitle


\section{Introduction}

\begin{frame}
  \frametitle{Grid vs. Clusters}

  \begin{tabular}{>{\raggedright}p{0.45\linewidth}@{\hspace{2ex}}>{\raggedright}p{0.55\linewidth}}
    \textbf{Cluster}
    & \textbf{Grid}
    \\[1ex]
    Local accounts
    & Global identity (DN)
    \\[1ex]
    Shared filesystem(s)
    & \emph{No sharing}; explicit movement of data
    \\[1ex]
    Scheduling policy controlled by local sysadmin
    & User chooses meta-scheduling policy; no control at all over
    \emph{local} scheduling.
    \\[1ex]
    Can inspect/debug the running system
    & No or little debugging information: interface to remote system
    mainly through sysadmins.
  \end{tabular}

\end{frame}


\begin{frame}
  \frametitle{Today's lab session}
  An introduction to the user-level ARC commands.
  \begin{itemize}
  \item authentication (certificate/proxy commands)
  \item job submission
  \item job control
  \end{itemize}
  
  \+
  {\small These slides are available for download from: 
    \url{http://www.gc3.uzh.ch/teaching/lsci2011/lab05.pdf}}
\end{frame}


\section{Authentication}

\begin{frame}[fragile]
  \frametitle{Obtaining a certificate}
  Use the \texttt{slcs-init} command.

  \+
  Usage:
\begin{semiverbatim}
{slcs-init --idp uzh.ch --user} {\small\it your-UZH-shortname}
\end{semiverbatim}

  \+
  Example:
\begin{semiverbatim}
\$ slcs-init --idp uzh.ch --user rmurri
Shibboleth Password: {\small\it (your UZH password)}
New Key Password: {\small\it (press `Enter' to re-use UZH passwd)}
Key password is empty, using Shibboleth password.
\end{semiverbatim}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Certificate files}
  Certificates are stored into the \texttt{\$HOME/.globus} directory:
\begin{semiverbatim}\scriptsize
\$ ls -l \$HOME/.globus
-rw-r----- 1 rmurri rmurri  4392 2011-10-13 10:10 usercert.pem
-rw------- 1 rmurri rmurri   963 2011-10-13 10:10 userkey.pem
\end{semiverbatim}

  % \+ 
  % If you want to store them elsewhere, set the environment
  % variables \texttt{X509\_USER\_CERT} and \texttt{X509\_USER\_KEY} to
  % point to the full path of the certificate and key files.
\end{frame}


\begin{frame}[fragile]
  \frametitle{Creating a Grid proxy}
  Use the command \texttt{arcproxy}.

  \+
  Usage:
\begin{semiverbatim}\footnotesize
\$ \textbf{arcproxy --voms=smscg} {\smaller\it (You should use \texttt{tutor} instead)}
Your identity: /DC=ch/DC=switch/DC=slcs/O=Universitaet Zuerich/CN=Riccardo Murri 94ADB37F
Enter pass phrase for /home/murri/.globus/userkey.pem:
..........++++++
...............................................++++++
Contacting VOMS server (named smscg): voms.smscg.ch on port: 15020
Proxy generation succeeded
Your proxy is valid until: 2011-10-14 02:16:07
\end{semiverbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Display proxy information}
  Use the command \texttt{arcproxy}.  

  \+
  Example: \emph{(lines wrapped as in 80-column terminal)}
\begin{semiverbatim}\tiny
\$ \textbf{arcproxy --info}
\alert<2>{Subject: /DC=ch/DC=switch/DC=slcs/O=Universitaet Zuerich/CN=Riccardo Murri 94ADB
37F/CN=1409327809}
\alert<3>{Issuer: /DC=ch/DC=switch/DC=slcs/O=Universitaet Zuerich/CN=Riccardo Murri 94ADB3
7F}
\alert<4>{Identity: /DC=ch/DC=switch/DC=slcs/O=Universitaet Zuerich/CN=Riccardo Murri 94AD
B37F}
\alert<5>{Time left for proxy: 11 hours 59 minutes 38 seconds}
Proxy path: /tmp/x509up_u1162
Proxy type: X.509 Proxy Certificate Profile RFC compliant impersonation proxy - 
RFC inheritAll proxy
====== AC extension information for VO smscg ======
\alert<6>{VO        : smscg}
subject   : /DC=ch/DC=switch/DC=slcs/O=Universitaet Zuerich/CN=Riccardo Murri 94
ADB37F
issuer    : /DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=hosts/C=CH/ST=Zuerich
/L=Zuerich/O=SWITCH/CN=voms.smscg.ch
uri       : voms.smscg.ch:15020
\alert<7>{attribute : /smscg}
\alert<5>{Time left for AC: 11 hours 59 minutes 43 seconds}
\end{semiverbatim}
\end{frame}


\section{Grid Cluster Architecture}

\begin{frame}[fragile]
  \frametitle{Grid Interface to \texttt{ocikbpra.uzh.ch}}
  \begin{center}
    \includegraphics[width=0.9\linewidth]{lab05/ocikbnor}
  \end{center}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Checking cluster availability}
  The \texttt{arcinfo} command prints out the cluster's ``health state'':
\begin{semiverbatim}\scriptsize
\$ \textbf{arcinfo ARC0:ocikbnor.uzh.ch}
Execution Service: ocikbnor.uzh.ch
 URL: ARC0:ldap://ocikbnor.uzh.ch:2135/Mds-Vo-name=local,o=Grid
 Queue: all.q
 Health State: ok
\end{semiverbatim}

  \+
  Add the \texttt{--long} option to print out \emph{all} the
  information available in the information system.

  \+ 
  {\em Quiz: what CPU type and architecture are reported by the cluster
    to the information system?}
\end{frame}


\section{Job submission}

\begin{frame}[fragile]
  \frametitle{Job submission, I}
  What is needed to submit a job to a remote system?

  \+
  What do we need to \emph{specify}?
\end{frame}


\begin{frame}
  \frametitle{Job submission, II}
  An ARC job description specifies the following:
  \begin{itemize}
  \item A script or binary application to run (the \texttt{executable} element).
  \item Command-line arguments to be passed to it (the \texttt{arguments} element).
  \item A list of files to copy \emph{to} the remote compute node (the \texttt{inputFiles} element).
  \item A list of files to copy \emph{back from} the remote compute node (the \texttt{outputFiles} element).
  \item Redirections for STDIN and STDOUT (the \texttt{stdin} and \texttt{stdout} elements).
  \end{itemize}
\end{frame}


\begin{frame}[fragile]
  \frametitle{XRSL examples, I}
  Compress the matrix file \texttt{M0,5-D5.sms} remotely, and get
  back the result. \emph{(Yes, it begins with an ampersand!)}
\begin{semiverbatim}
\$ cat gzip.xrsl
&
(executable="/bin/gzip")
(arguments="-v" "M0,5-D5.sms")
(inputFiles=("M0,5-D5.sms" ""))
(outputFiles=("M0,5-D5.sms.gz" ""))
\end{semiverbatim}

  This file format used in ARC is called ``Extended Resource
  Specification Language'' (XRSL).  
  
  Full specification available at:
  {\smaller \url{http://www.nordugrid.org/documents/xrsl.pdf}}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Job submission, III}
  Use the command \texttt{arcsub}:
\begin{semiverbatim}\tiny
\$ \textbf{arcsub -f gzip.xrsl}
Job submitted with jobid: gsiftp://arctest.unige.ch:2811/jobs/118391318501997184
2804036
\end{semiverbatim}

  Upon successful submission, \texttt{arcsub} prints a ``job ID'' to
  standard output; \emph{save it} because it's the only way to operate on a submitted job!

  \+
  The \texttt{-o}/\texttt{--jobids-to-file} option can save the job IDs to a text file:
\begin{semiverbatim}\tiny
\$ \textbf{arcsub} -f gzip.xrsl \textbf{--jobids-to-file}=jobids.txt
Job submitted with jobid: gsiftp://gordias.unige.ch:2811/jobs/185111318502147132
1545456
\$ cat jobids.txt 
gsiftp://gordias.unige.ch:2811/jobs/1851113185021471321545456
\end{semiverbatim}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Monitoring jobs, I}
  The \texttt{arcstat} command is used to monitor jobs submitted to ARC.

  \+
  Example:
\begin{semiverbatim}\tiny
\$ \textbf{arcstat} gsiftp://arctest.unige.ch:2811/jobs/1183913185019971842804036
Job: gsiftp://arctest.unige.ch:2811/jobs/1183913185019971842804036
 State: Finished (FINISHED)
\end{semiverbatim}

  It's always better to use the
  \texttt{-i}/\texttt{--jobids-from-file} option to read the job
  IDs from a text file:
\begin{semiverbatim}\tiny
\$ \textbf{arcstat --jobids-from-file}=jobids.txt 
Job: gsiftp://gordias.unige.ch:2811/jobs/1851113185021471321545456
 State: Finished (FINISHED)
 Exit Code: 0
\end{semiverbatim}
\end{frame}


\begin{frame}
  \frametitle{Monitoring jobs, II}
  Job status in ARC1.0 and 0.8 (deployed in SMSCG) can be: 
  \begin{description}\smaller
  \item[Accepted] The job description has been validated by the Grid frontend server, but not yet processed.
  \item[Preparing] The Grid server backend has started is ``setting the stage'' for the job, e.g., downloading required files.
  \item[Queuing] The job is currently waiting in the batch system's queue.
  \item[Running] The job is currently executing on a compute node in the batch system.
  \item[Finishing] The job is finished \emph{but} output is \emph{not yet} ready to be retrieved.
  \item[Finished] The job is finished and output is ready to be retrieved.
  \end{description}

  \+
  \emph{Different sites may report different job states, depending on the installed grid middleware version.} 
\end{frame}


\begin{frame}[fragile]
  \frametitle{Retrieving output}
  Use the \texttt{arcget} command to download the output of a finished job.

  \+
  Example:
\begin{semiverbatim}\tiny
\$ \textbf{arcget} gsiftp://arctest.unige.ch:2811/jobs/1183913185019971842804036
\$ ls 1183913185019971842804036/
M0,5-D5.sms.gz
\end{semiverbatim}
\end{frame}


\begin{frame}
  \frametitle{Job submission exercises}
  Exercises:
  \begin{enumerate}
  \item Write an XRSL file \texttt{gunzip.xrsl} to \emph{compress} a matrix file.

    What happens if you run \texttt{arcget} on the job while it's running?

    What happens if you run \texttt{arcget} twice on the same job ID?
    And after it's \emph{finished}?

  \item Write an XRSL file \texttt{rank1.xrsl} to run the command
    ``\texttt{rank-int.i386 M0,6-D5.sms}'', then run it on the Grid.
    Any errors in the output?

  \item Using \texttt{man arcsub}, find out what option you need to
    submit the \texttt{rank1.xrsl} job to \texttt{ocikbnor.uzh.ch}
    (and not to an arbitrary cluster).
  \end{enumerate}
\end{frame}


\begin{frame}[fragile]
  \frametitle{Std streams redirection}
  The \texttt{stdin}, \texttt{stdout} and \texttt{stderr} xRSL
  elements enable redirection of the standard input, output and error
  streams to a file.

\begin{semiverbatim}
\$ \textbf{cat cpuinfo.xrsl}
\&
(executable="/bin/cat")
(arguments="/proc/cpuinfo")
(stdout="stdout.txt")
(stderr="stderr.txt")
(outputFiles=("stdout.txt" ""))
\end{semiverbatim}  

  \begin{itemize}
  \item What command-line invocation does this xRSL describe?
  \item Why is there no \texttt{inputFiles} directive?
  \item What happens if you omit the \texttt{outputFiles} directive?
  \end{itemize}
\end{frame}


\begin{frame}
  \frametitle{Resource usage}
Exercises:
\begin{enumerate}
\item How can you measure the wall-clock and CPU time used by a
  program at the remote compute node?

\item Write an xRSL that compresses the file \texttt{M0,6-D8.sms} with
  \texttt{gzip} and reports on the CPU- and wall-clock time consumed
  at the execution node.

\item How can you measure the time taken for the network transfer of
  the uncompressed source file \texttt{M0,6-D8.sms} and the compressed
  result?
\end{enumerate}
\end{frame}


\begin{frame}
  \frametitle{Resource usage reporting in ARC}
  Within the directory containing retrieved output files, you will
  find a directory \texttt{gmlog} that contains ARC diagnostic
  information. 

  \+
  Two files are especially important:
  \begin{description}
    \item[diag] Contains resource usage
    \item[errors] Contains the entire output of the backend submission
      script.
  \end{description}

\end{frame}


\begin{frame}[fragile]
  \frametitle{Example ``diag'' file}
\emph{(Wrapped as in 80-column terminal.)}
\begin{semiverbatim}\tiny
frontend_system="Linux arc-0-28.local 2.6.18-164.6.1.el5 #1 SMP Tue Nov 3 16:18:
27 EST 2009 i686 i686 i386 GNU/Linux"
frontend_subject="/DC=com/DC=quovadisglobal/DC=grid/DC=switch/DC=hosts/C=CH/ST=Z
uerich/L=Zuerich/O=Universitaet Zuerich/CN=ocikbnor.uzh.ch"
frontend_ca="/C=BM/O=QuoVadis Limited/OU=Issuing Certification Authority/CN=QuoV
adis Grid ICA"
frontend_middleware="nordugrid-arc-0.8.2.2"
frontend_lrms="sge"
frontend_processor=" GenuineIntel QEMU Virtual CPU version 0.12.5 2992.664"
frontend_memory="1035184 kB"
ExecutionUnits=1
{\small\em [...]}
exitcode=0
LRMSStartTime=20111020123921Z
LRMSEndTime=20111020123921Z
nodename=compute-0-4.local
CPUTime=0.0s
WallTime=0.0s
KernelTime=0.0s
UserTime=0.0s
AverageTotalMemory=0kB
failed=0
\end{semiverbatim}
Do you see any similarity with the output of \texttt{time}?
\end{frame}

\begin{frame}
  \frametitle{Resource requirements}
  You can specify resource requirements (which also act as limits!) in the xRSL file:
  \begin{description}
  \item[count] Number of CPU cores to allocate to job.
  \item[cpuTime] Required (and maximum allowed) CPU time \emph{in minutes}.
  \item[wallTime] Required (and maximum allowed) wall-clock time \emph{in minutes}.
  \item[memory] Required (and maximum allowed) memory \emph{in MB}.
  \item[disk] Required disk space \emph{in MB}.
  \end{description}

  \+
  See the xRSL manual for a precise description.  No, really, \emph{read it!}
\end{frame}


\begin{frame}
  \frametitle{Final exercise}
  Write a program to compress files:
  \begin{itemize}
  \item The list of files to compress is given as command-line
    arguments to the program.
  \item Each file should be compressed by a separate Grid job.
  \item The program should wait until all jobs are done.
  \item When the program ends normally, all the compressed matrices
    should be found in the current directory.
  \end{itemize}

  Test it using the ``M*.sms'' matrix files that you downloaded in the
  \texttt{lab2} directory.
\end{frame}


\section{Further reading}

\begin{frame}
  \frametitle{References}
  \begin{enumerate}[{[1]}]
    \item ARC Clients User Manual (commands reference): \url{http://www.nordugrid.org/documents/arc-ui.pdf}
    \item ARC xRSL reference: \url{http://www.nordugrid.org/documents/xrsl.pdf}
  \end{enumerate}
\end{frame}

\end{document}

