\documentclass[english,serif,mathserif,xcolor=pdftex,dvipsnames,table]{beamer}
\usepackage[utf8]{inputenc}
%\usepackage[T1]{fontenc}
\usepackage{babel}
\usepackage{fixltx2e}
\usepackage{graphicx}
\usepackage{colortbl}%
  %\newcommand{\cellcolor}[2]{\multicolumn{1}{>{\columncolor{#1}}c}{#2}}
\usepackage{listings}%
  \lstloadlanguages{sh}%
  \lstset{
    language=sh,%
    % --- basic appearance ---
    basicstyle=\ttfamily,%
    %columns=fullflexible,% best results for proportional fonts
    commentstyle=\small,%
    keywordstyle=\bfseries,% or \normalfont
    %identifierstyle=\itshape,%
    %procnamestyle=\bfseries\slshape, %\scshape,%
    %procnamekeys={def},%
    % --- escaping and special displays ---
    escapechar=@,% text between "@" will be rendered as normal TeX
    %moredelim=[il][\small\itshape]{\#},% ditto for text beween "#" and end-of-line
    texcl,%
    mathescape=false,%
    %literate={*{=}{{$\gets$ }}1 {==}{{$=$ }}1 {<=}{{$\leq$ }}1 {>=}{{$\geq$ }}1 {!=}{{$\neq$ }}1},%
    % --- display ---
    %showspaces=false,%
    %showstringspaces=false,%
    %xleftmargin=2em,%
    % --- line numbers ---
    %numbers=left,%
    %numberstyle=\tiny,%
    %stepnumber=1,%
    %firstnumber=1%
  }%
  \lstMakeShortInline{@}%
\usepackage{longtable}
\usepackage{multirow}
\usepackage{float}
\usepackage{wrapfig}
\usepackage{soul}
\usepackage{textcomp}
\usepackage{tikz}%
  \usetikzlibrary{arrows,shapes}%
  % For every picture that defines or uses external nodes, you'll have to
  % apply the 'remember picture' style. To avoid some typing, we'll apply
  % the style to all pictures.
  \tikzstyle{every picture}+=[remember picture]%
\usepackage{marvosym}
\usepackage{wasysym}
\usepackage{latexsym}
\usepackage{amssymb}
\usepackage{hyperref}
\usepackage{url}
\tolerance=1000
\providecommand{\alert}[1]{\textbf{#1}}

\usetheme{uzhneu-en-informal}

%% \title[Large Scale Computing Infrastructures]{%
%%   \textbf{Large Scale Computing Infrastructures}\\
%%   \small{(MINF 4526 HS2011)}\\
%%   \small{Lecture 2: from clusters to distributed systems}
%% }

%% \author[S.\ Maffioletti] {%
%%   Sergio Maffioletti \\ 
%%   \texttt{<sergio.maffioletti@gc3.uzh.ch>} \\
%%   \institute[GC3, Univ. of Zurich]% will appear on the bottom line
%%             {\href{http://www.gc3.uzh.ch/}{Grid Computing Competence Centre}, 
%%               \href{http://www.uzh.ch/}{University of Zurich}
%%               \\ \url{http://www.gc3.uzh.ch/}}
%% }

\title[Large Scale Computing Infrastructures]{%
   Large Scale Computing Infrastructures \\
   \small{MINF 4526 HS201} \\
   \small{Lecture 3: the Swiss Multi Science Computing Grid (SMSCG)}
 }
\author[RS Maffioletti]{%
   \textbf{Sergio Maffioletti} \\
   Grid Computing Competence Center, \\
   Organisch-Chemisches Institut, \\
   University of Zurich
 }
% \date{Sept.~27,~2011}

%% Use `\largeskip` to get a larger vertical white space between two
%% lines/paragraphs:
\newcommand{\largeskip}{\vspace{1em}}
\def\+{\largeskip}
\setlength{\parsep}{1.0em}

\begin{document}

% title frame
\maketitle

% The goal of this class is to present SMSCG as one posible instance
% of a generic distributed infrastructure for scientific computing
% Note: stress this once again before starting.
% Present the overall architecture (basically ARC):
% AuthZ, AuthN
% GridFTP
% Computing Service
% Storage Service
% from both system perspective and usage perspective 
% for usage perspective better to find few examples
% this should be seen also in preparation for the next class
% where scientific usacases will be presented
% students will use the SMSCG as a base for implementing some of the problematics
% that will be discussed around the scientific usecases

% TOC
\begin{frame}
  
  \frametitle{Table of content}
  \begin{enumerate}
  \item{} What is SMSCG
  \item{} Basic components
  \item{} from system perspective
  \item{} from usage perspective
  \end{enumerate}
  
\end{frame}

%%%%%%%%%%% What is SMSCG

% Intro

%% what is an infrastructure (more than a middleware)
% if they cannot perceive it, at least mention the overhead of organizing 
% a distributed group of individuals with different policies and expectations
% list: monitoring, accounting, SLAs, and all the specific agreements that has to be reached when 
% operating and supporring suc an infrastructure
% (infrastructure is a collection of resources, people, services, policies and knowledge)
% what is an infrastructure: middleware as we described in previous
% lecture but also additional services to make it work
% as a production system.
% monitor, accounting, website, probes, operational activitites, ...
% Note: maybe place it towards the end of the talk ?

\begin{frame}
\frametitle{before we start}
\url{https://rt.smscg.ch/registration.php}
\end{frame}

\begin{frame}
\frametitle{The Swiss Multi Science Computing Grid}
\includegraphics[height=0.2\textheight]{lecture03/smscg_logo.jpg} \url{http://smscg.ch}\\

{\bf Objective:}\\
Build and operate a distributed computing infrastructure dedicated to scientific applications.


{\bf Main goal:} Build a platform that can accommodate
requirements from different scientific domains


{\bf What does this means:}\\
Create a network of resources, services, 
knowledge, people, and procedures.

\end{frame}

\begin{frame}
  \frametitle{an Infrastructure}
  Harmonize existing computing resources that belongs to different academic institutions so as to provide a uniform scientific computing infrastructure for scientists.
  \begin{itemize}
  \item {\it Coordinated} effort between partners 
  \item {\it Computing} and {\it data services} through well established and mature technologies
  \item {\it Access} through AAI infrastructure
  \item {\it Monitoring} (monitor.smscg.ch and nagios)
  \item {\it Accounting} information collected from all partners
  \item {\it Support}: ticketing system, mailing list, personalized user support
  \item {\it Application} integration / user enabling
  \item {\it Policies} agreed among partners and users
  \item {\it Procedures} agreed among partners and users
  \end{itemize}
\end{frame}

%% Describe geographical layout (who are the partners)
\begin{frame}
\frametitle{SMSCG partners}
  \includegraphics[height=0.2\textheight]{lecture03/smscg_logo.jpg}\\
  \includegraphics[height=0.6\textheight]{lecture03/SMSCG-MAP.png}  
\end{frame}

%%%%%%%%%%%% Basic components

%% Recap the layout discussed in previous class: 
% . Uniform access to resources
% . Global resource management
% . Uniform resource description
% . Resource scheduling
% . Data management
% . Application management
% 
% and explain what and how it implements them

\begin{frame}
  \frametitle{end-user abstraction of services}
  \begin{itemize}
  \item User interacts with {\it services}
  \item No real perception of {\it servers} and {\it physical location}
  \item unless {\it needs} for specific purposes
  \item physical knowledge is needed when the infrastructure is {\it not behaving} as expected or {\it not providing} the required level of control
  \item thus is also needed to expose {\it fine-grain} control
  \end{itemize}
\end{frame}

%% ARC Approach
% image: from distributred clusters to unified cluster abstraction
% different clusters with LRMS on top of it
% then higher level box with: 
% . Uniform access to resources (Auth)
% . Global resource management (LRMS interface)
% . Uniform resource description (xrsl and GLUE)
% . Resource scheduling (broker and infosys)
% . Data management (GridFTP)
% . Application management (...)

%% ARC deployment
% image: what goes on each frontend
% main components: Auth + Interface with LRMS + GridFTP + Infosys
\begin{frame}
  \frametitle{Building a distributed system abstraction}
  \includegraphics[height=0.5\textheight]{lecture03/smscg_base01.png}  
\end{frame}

\begin{frame}
  \frametitle{Building a distributed system abstraction}
  \includegraphics[height=0.55\textheight]{lecture03/smscg_base02.png}  
\end{frame}

\begin{frame}
  \frametitle{Building a distributed system abstraction}
  \includegraphics[height=0.55\textheight]{lecture03/smscg_base03.png}  
\end{frame}

\begin{frame}
  \frametitle{Advance Resource Connector (ARC)}
  \begin{itemize}
  \item middleware to connect distributed cluster and storage resources
  \item batch system model abstraction (job management)
  \item built on de-facto standard components and protocols
  \item is the foundation of the Swiss National Distributed Computing Infrastructure
  \end{itemize}
  \url{http://www.nordugrid.org}
\end{frame}

\begin{frame}
  \frametitle{ARC approach}
  \includegraphics[height=0.6\textheight]{lecture03/smscg_base04.png}  
\end{frame}


%% SMSCG infrastructure layout
% image: UIs + CEs + SEs + ISs

\begin{frame}
  \frametitle{ARC\_CE}
  Computing Interface
  \begin{itemize}
  \item Provides uniform access to diverse and heterogeneous cluster resources
  \item Allows to manage jobs uniformly
  \item All requests to a give ARC\_CE are {\it translated} in local requests
  \end{itemize}
\end{frame}

\begin{frame} [fragile]
  \frametitle{ARC\_SE}
  Storage Interface
  \begin{itemize}
  \item Provides an Interface to a Storage 
  \item All storage resources accessible through GridFTP protocol
  \item Data are {\it visible} in the system
  \item Data can be used as Input/Output references
  \item \url{gsiftp://storage01.uzh.ch/GAMESS/Validation/inputData01.inp}
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{ARC\_UI}
  User Interface\\
  Provides to end-user the command line interfaces and the client programming libraries to
  access and control omputing and storage resources
  \begin{itemize}
  \item manage X509 certificates and proxies (global identity)
  \item job management (resource requirements, job submission and monitor)
  \item data staging (copy and move data to/from storage and/or computing resources)
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{ARC\_Index}
  Index Service
  \begin{itemize}
  \item Yellow Pages service
  \item Provides an index of services and resources available in a given infrastructure
  \item Needed in the brokering process
  \item Every resource registers to an Index Service
  \end{itemize}
  In this way, client components only have to know how to reach the Index Service (e.g. google Search)
\end{frame}

% Note: every component explained is contextualized with the reference image
% for example circle in red what components enter into the game

%%% Uniform access to resources
% AuthZ and AuthN based on globus GSI
% Note: find proper references
% not too detailed as Riccardo will explain it afterwards
\begin{frame}
  \frametitle{Uniform Authentication and Authorization}
  \begin{itemize}
  \item Access to resources authenticated and authorized through the GSI (Grid Security Infrastructure) infrastructure
  \item Core of this infrastructure is the X509 digital certificate
  \item This provides a unique identity within the infrastructure
  \item Every service request is attached with an X509 certificate (proxy)
  \item Every service authenticates and authorizes the requests by inspecting the X509 certificate (proxy)
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Uniform Resource Management}
  \includegraphics[height=0.8\textheight]{lecture03/SMSCG-computing-architecture.jpg}  
\end{frame}

%%%  Global resource management
% LRMS interfaces
\begin{frame}
  \frametitle{Uniform Resource Management}
  Each Computing Resource hosts few specific ARC\_CE services:
  \begin{itemize}
  \item GridFTP (Data transfer and job control server)
  \item BDII (Information system)
  \item a-rex (interface with LRMS)
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Interface with LRMS}
  a-rex is the service responsible for processing the jobs, moving them through states, collected reporting information
\end{frame}

%%% Uniform resource description
% Infosys e GLUE schema (in breve)
\begin{frame}
  \frametitle{Uniform resource description}
  \begin{itemize}
  \item Several services (e.g. Resource broker) need information about the current status of services and resources
  \item Resources and services information are collected at local sites and published by an {\it Information System}
  \item in SMSCG, the local information provider is called BDII
  \end{itemize}
  \url{https://twiki.cern.ch/twiki/bin/view/EGEE/BDII}
\end{frame}

\begin{frame}
  \frametitle{BDII}
  \includegraphics[height=0.8\textheight]{lecture03/bdii.png}  
\end{frame}

\begin{frame}
  \frametitle{Information Indexes}
  \includegraphics[height=0.8\textheight]{lecture03/Overview.png}  
\end{frame}

\begin{frame}
  \frametitle{BDII}
  % few words about BDII
  \begin{itemize}
  \item Run on local resource were services and/or resource information need to be collected
  \item Periodically checks the status of the system
  \item Uses local system information to build a representation of services and/or resources status (e.g. /proc/cpuinfo)
  \end{itemize}
\end{frame}

\begin{frame} [fragile]
  \frametitle{Information Index}
  local information is publicly available through ldap protocol (ldapsearch)
  \begin{lstlisting}[basicstyle=\tiny]
ldapsearch -x -H ldap://giis.smscg.ch:2135 -b mds-vo-name=Switzerland,o=grid
  \end{lstlisting}
  \begin{itemize}
  \item Every resource or service provider publishes its information to an {\it Information Index}
  \item Information Index works as a {\it yellow pages} service (e.g. www.directories.ch)
  \end{itemize}
  \url{http://www.openldap.org/doc/admin24/guide.html}
\end{frame}

\begin{frame}
  \frametitle{Information representation: GLUE schema}
  \begin{itemize}
  \item Information on services and resources is organized in an agreed schema
  \item GLUE is an ontology used to classify resources and services
  \item Services can relay on the agreed ontology to have a representation of the status of the system
  \end{itemize}
\end{frame}

\begin{frame} [fragile]
\frametitle{GLUE schema examples}
  \begin{lstlisting}[basicstyle=\small]
Computing Element (objectclass GlueCE)
 GlueCEUniqueID: 
 GlueCEName: 

Cluster (objectclass GlueCluster)
 GlueClusterUniqueID: 
 GlueClusterName: 
  \end{lstlisting}
\end{frame}

% xrsl
\begin{frame}
  \frametitle{eXtended Resource Specification Language}
  \begin{itemize}
  \item Resource requirements are expressed using an agreed syntax (in SMSCG: xrsl)
  \item This syntax is machine and LRMS independent
  \item It only provides a way to express some resource requirements (the most common/relevant)
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{eXtended Resource Specification Language}
  \begin{lstlisting}
(executable="gms")
(stdout="log.out")
(InputFiles=("" "/data/20110721/data01.inp"))
(OutputFiles=("" "results/20110721/data01.out"))
(memory="500M")
  \end{lstlisting}
\end{frame}

\begin{frame}
  \frametitle{eXtended Resource Specification Language}
  \begin{itemize}
  \item Resource requirements are matched with resources and services descriptions by Resource Broker ({\it match making})
  \item The result is a ranked list of selected resources
  \item Each resource/service is contacted until one accepts to serve the request
  \end{itemize}
\end{frame}

%%%  Resource scheduling
% explain the broker and show some examples
\begin{frame}
  \frametitle{Resource Brokering}
  In ARC, brokering of resources is done by the client
  \begin{itemize}
  \item starting from a given request (e.g. job submit)
  \item client contacts {\it Information Indexes} to collect resources and service information
  \item resource requirements from {\it xrsl} are matched with resource and service information in {\it GLUE} schema
  \item Scheduling policies are applied to rank the list of selected resources
  \item The list is used (following the ranking) to search for a resource or service to fulfill the request
  \end{itemize}
\end{frame}

%%% Data management
% GridFTP
% Note: same as for AuthZ: intro, explain what is does but
% leave comparison and low level technical details for a dedicated lecture
\begin{frame}
  \frametitle{Data management}
  ARC uses GridFTP as a reference data transfer protocol\\
  \begin{itemize}
  \item It provides an {\it interface} for secure job submission and retrieval 
  \item with authorization and authentication based on the {\it Grid Security Infrastructure}, 
  \item consists of a standard GridFTP server with extentions to handle jobs manipulation requests. 
  \item These jobs are then processed and sent to the local batch system by the a-rex.
  \end{itemize}
\end{frame}

%%% Application management
% Do we ? maybe not

%%% Usage examples
% single job submission
% what is the problem when dealing with large data analysis (still the same:
% how to move data, how to allocate resources, how to describe requirements
% how to submit and monitor, how to cope with failures
% how to adapt with what the system is providing (adaptive)

\begin{frame}
  \frametitle{Jobflow}
  \begin{itemize}
  \item A user prepares a job description using the extended Globus Resource Specification Language (XRSL). 
  \item This description may include application specific requirements, such as input and output data description, as well as other options used in resource matching, such as architecture or an explicit cluster.
  \item {\bf arcsub}: The job description is interpreted by the ARC\_UI, which makes resource brokering using the Information System, and forward the job to the chosen cluster, eventually uploading specified accompanying files.
  \end{itemize}
\end{frame}

\begin{frame}
  \frametitle{Jobflow}
  \begin{itemize}
  \item The job request is received by the {\bf a\-rex} throught the {\bf GridFTP} process both residing on the selected ARC\_CE. 
  \item {\bf a\-rex} handles pre-processing, job submission to the local system, and post-processing, depending on the job specifications. 
  \item {\bf Input and output data} are also controlled by the a\-rex.
  \item {\bf arcstat}: to control the status of a submitted request (job)
  \item Upon the job end, specified in the job description files can be retrieved by the user {\bf arcget}.
  \end{itemize}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Usage example: single job}
  \begin{lstlisting}[basicstyle=\tiny]
arcsub -f rosetta.xrsl
Job submitted with jobid: gsiftp://smscg.inf.usi.ch:2811/jobs/270221318408731455290050

arcstat gsiftp://smscg.inf.usi.ch:2811/jobs/270221318408731455290050
Job: gsiftp://smscg.inf.usi.ch:2811/jobs/270221318408731455290050
 State: Finished (FINISHED)
 Exit Code: 0

arcget gsiftp://smscg.inf.usi.ch:2811/jobs/270221318408731455290050

ls 270221318408731455290050/
stdout

  \end{lstlisting}
\end{frame}

%% Usage examples
% Multiple job submission
%
\begin{frame}
  \frametitle{Usage example: multiple jobs}
\end{frame}

\end{document}

