%Template article for preprint document class `elsart'
% SP 2001/01/05
% Modified 2005/10/20 for CTW 2006
% Please find the original file at http://www.elsevier.com/
\documentclass{ctwart}
% Use the option doublespacing or reviewcopy to obtain double line spacing
%\documentclass[doublespacing]{elsart}
% if you use PostScript figures in your article
% use the graphics package for simple commands
% \usepackage{graphics}
% or use the graphicx package for more complicated commands
% \usepackage{graphicx}
% or use the epsfig package if you prefer to use the old commands
% \usepackage{epsfig}
% The amssymb package provides various useful mathematical symbols
\usepackage{amssymb}
\newcommand{\ov}{\overline}
\begin{document}
\setcounter{page}{115}
\begin{frontmatter}
% Title, authors and addresses
% use the thanksref command within \title, \author or \address for footnotes;
% use the corauthref command within \author for corresponding author footnotes;
% use the ead command for the email address,
% and the form \ead[url] for the home page:
% \title{Title\thanksref{label1}}
% \thanks[label1]{}
% \author{Name\corauthref{cor1}\thanksref{label2}}
% \ead{email address}
% \ead[url]{home page}
% \thanks[label2]{}
% \corauth[cor1]{}
% \address{Address\thanksref{label3}}
% \thanks[label3]{}
\title{Determining Optimal Stationary Strategies for Discounted Stochastic Optimal Control Problem on Networks}
% use optional labels to link authors explicitly to addresses:
% \author[label1,label2]{}
% \address[label1]{}
% \address[label2]{}
\author[label1]{Dmitrii Lozovanu\corauthref{cor1}},
\author[label2]{Stefan Pikl}
\ead{lozovanu@usm.md, stefan.pickl@unibw.de}
\corauth[cor1]{Dmitrii Lozovanu}
\address[label1]{Institute of Mathematics and Computer Science, Academy of Sciences, \\
Academy str., 5, Chisinau, MD--2028, Moldova}
\address[label2]{Institut f\"ur Theoretische Informatik, Mathematik
und Operations Research, \\ Fakult\"at fur Informatik,
Universit\"at der Bundeswehr, M\"unchen}
\begin{abstract}
The stochastic version of discrete optimal control problem with
infinite time horizon and discounted integral-time cost criterion
is considered. This problem is formulated and
studied on certain networks. A polynomial time algorithm for determining
the optimal stationary strategies for the considered problems is
proposed and some applications of the algorithm for related Markov
decision problems are described.
\end{abstract}
\begin{keyword}
Discounted Stochastic Control Problem \sep Optimal Stationary Strategies \sep Polynomial Time
Algorithm\sep
Discounted Markov Processes\underline{}\end{keyword}
\end{frontmatter}
% main text
% The Appendices part is started with the command \appendix;
% appendix sections are then done as normal sections
% \appendix
% \section{}
% \label{}
\section{Introduction, Problem Formulation and the Main Concept}
In this paper we consider the stochastic version of the following
discrete optimal control problem with infinite time horizon and a
discounted integral-time cost criterion by trajectory.
Let a time-discrete system $L$ with a finite set of states $X$ be given. Assume that the
dynamics of the system is described by a directed graph of states’
transitions $G=(X,E)$ where the set of vertices $X$ corresponds to
the set of states of the dynamical system; an arbitrary directed
edge $e=(x,y)$ expresses the possibility of the system to pass
from the state $x=x(t)$ to the state $y=x(t)$ at every discrete
moment of time $t=0,1,2,\dots $. Hereby, a directed edge $e=(x,y)\in
E$ corresponds to a feasible stationary control of system $L$ in
the state $x$ and the subset of edges $E^+(x)=\{e=(x,y)\in E | y \in
X \}$ corresponds to the set of feasible stationary controls of
the system in the state $x\in X$. We assume that on the edge set $E$ a
cost function $c:E\to R$ is defined which assigns a cost $c_e$ to
each directed edge $e=(x,y)\in E$ when the system makes a transition
from the state $x=x(t)$ to the state $y=x(t+1)$ for every
$t=0,1,2,\dots $,i.e. the costs $c_{x(t),x(t+1)}$ does not depend
on $t$. We define a stationary control of system $L$ in $G$ as a
map
$$s: x\to y\in X^+(x) \quad for \quad x\in X,$$
where $X^+(x)=\{y\in X|(x,y)\in E \}$.
Let $s$ be an arbitrary stationary control. Then the set of edges
of the form $(x,s(x))$ in $G$ generates a subgraph $G_s=(X,E_s)$
where each vertex $x\in X$ contains one leaving directed edge. So,
if the starting state $x_0=x(0)$ is fixed then the system makes
transitions from one state to another through the corresponding
directed edges $e_0^s,e_1^s,e_2^s,\dots,e_t^s,\dots,$ where
$e_t^s=(x(t),x(t+1)),\quad t=0,1,2,\dots$. This sequence of
directed edges generates a trajectory $x_0=x(0),x(1),x(2),\dots $
which leads to a unique directed cycle. For an arbitrary
stationary strategy $s$ and a fixed starting state $x_0$ the
discounted integral-time cost $\sigma_{x_0}^{\lambda}(s)$ is
defined as follows
$\sigma_{x_0}^{\lambda}(s)=\sum_{t=0}^{\infty}\lambda^t
c_{e_t^s},$ where $\lambda,\quad 0\leq \lambda <1,$ is a given (so
called) discounted factor. Based on the results from
\cite{how,put} it is easy to show that for an arbitrary stationary
strategy $s$ there exists $\sigma_{x_0}^{\lambda}(s)$. If we
denote by $\sigma^{\lambda}(s)$ the vector column with components
$\sigma_x^{\lambda}(s)$ for $x\in X$ then
$\sigma_{x_0}^{\lambda}(s)$ can be found by solving the system of
linear equations $(I-\lambda P^s)\sigma^{\lambda}(s)=\ov{c}^s,$
where $\ov{c}^s$ is the vector with corresponding components
$c_{(x,s(x))}$ for $x\in X, \quad I$ is the identity matrix and
$P^s$ the matrix with elements $p_{x,y}^s$ for $x,y\in X$ defined
as follows
$$
p_{x,y}^s=\left\{%
\begin{array}{ll}
1, & \hbox{if $y=s(x)$;} \\
0, & \hbox{if $y\neq s(x)$.} \\
\end{array}
\right.
$$
We are seeking for a stationary control $s^*$ such that
$\sigma_{x_0}^{\lambda}(s^*)=\min_{s}\sigma_{x_0}^{\lambda}(s)$.
In this paper we consider the stochastic version of the problem
formulated above. We assume that the dynamical system may admit
states in which the vector of control parameters is changed in a
random way. So, the set of states $X$ is divided into two subsets
$X=X_1\cup X_2, \quad X_1\cap X_2=\emptyset$ , where $X_1$
represents the set of states in which the decision maker is able to
control the dynamical system and $X_2$ represents the set of
states in which the dynamical system makes transition to the next
state in a random way. So, for every $x\in X$ on the set of
feasible transitions $E^+(x)$ the distribution function
$p:E^+(x)\to R$ is defined such that $\sum_{e\in E^+(x)}p_e=1,\quad p_e\geq
0, \forall e\in E^+(x)$ and the transitions from the states $x\in
X_2$ to the the next states are made according to these
distribution functions. Here in a similar way as in the previous case
of the problem we assume that to each directed edge $e=(x,y)\in E$
a cost $c_e$ is associated when the system makes a transition from
the state $x=x(t)$ to the state $y=x(t+1)$ for every
$t=0,1,2,\dots $. In addition we assume that the discounted factor
$\lambda,\quad 0\leq \lambda <1,$ and the starting state $x_0$ are
given. We define a stationary control for the considered problem
as a map
$$s:x\to y\in X^+(x)\quad for \quad x\in X_1.$$ For an arbitrary
stationary strategy $s$ we define the graph $G_s=(X,E_s\cup
E_{X_2})$, where $E_s=\{e=(x,y)\in E | x\in X_1, y=s(x)\},\quad
E_{X_2}=\{e=(x,y) | x\in X_2, y\in X\}.$ This graph corresponds to a
Markov process with the probability matrix $P^s=(p_{x,y}^s),$
where
$$
p_{x,y}^s=\left\{%
\begin{array}{lll}
p_{x,y}, &\hbox{if $x\in X_2$ and $y=X$;}\\
1, & \hbox{if $x\in X_1$ and $y=s(x)$;} \\
0, & \hbox{if $x\in X_1$ and $y\neq s(x)$.} \\
\end{array}
\right.
$$
For this Markov process with associated costs $c_e,\quad e\in E $
we can define the expected discounted integral-time cost
$\sigma_{x_0}^{\lambda} (s)$ in the same way as for discounted
Markov processes with rewards (if we treat the rewards as the
costs). In this paper we consider the problem of determining the
strategy $s^*$ for which
$\sigma_{x_0}^{\lambda}(s^*)=\min_{s}\sigma_{x_0}^{\lambda}(s).$
\section{The Main Results}
The stationary case of the considered discounted stochastic
control problem can be studied and solved using the general
concept of Markov decision processes and the linear programming
approach to corresponding problems (see \cite{how,loz1,put} ).
Here we develop a new technique and we will formulate a new linear
programming problem which is more suitable to the specific context.
To obtain
our linear model we shall use the following condition:
\begin{equation}\label{eq2}
\left \{
\begin{array}{l}
\quad\sigma_x-\lambda \sum_{y\in X^+(x)}
p_{x,y}^s\sigma_y=\sum_{y\in X^+(x)}c_{(x,y)}p_{x,y}^s,\quad\forall x\in X_1;\\
\quad\sigma_x-\lambda \sum_{y\in X^+(x)}
p_{x,y}\sigma_y=\sum_{y\in X+(x)}c_{(x,y)}p_{x,y},\quad\forall
x\in X_2,
\end{array}
\right.
\end{equation}
for an arbitrary stationary strategy $s$. For fixed $s$ the
probabilities $p_{x,y}^s,\quad x\in X,y\in X^+(x),$ satisfy the
conditions: $\sum_{y \in X^+(x)} p_{x,y}^s=1,\forall x\in
X_1;\quad p_{x,y}\in \{0,1\},\forall x\in X_1,y\in X^+(x).$ The
system ($\ref{eq2}$) has a unique solution with respect to
$\sigma_x$ for $x\in X$ and therefore we uniquely determine
$\sigma_{x_0}^s$. Thus we can consider the linear programming
problem: Maximize
\begin{equation}\label{eq1}
\quad \quad \quad\quad\psi_{p^s}(\sigma )= \sigma_{x_0}
\end{equation}
subject to (\ref{eq2}). This problem has a unique feasible
solution which is the optimal one. The dual program for this problem
is: Minimize
\begin{equation}\label{eq3}
\quad \varphi_{p^s}(\alpha)= \sum_{x\in X_1}\sum_{y\in
X(x)}c_{(x,y)}p_{x,y}^s\alpha_x+\sum_{x\in X_2}\sum_{y\in
X(x)}c_{(x,y)}p_{x,y}\alpha_x
\end{equation}
subject to
\begin{equation}\label{eq4}
\left \{
\begin{array}{l}
\alpha_y-\lambda \sum_{x\in X_1^-(y)}
p_{x,y}^s\alpha_x-\lambda\sum_{x\in X_2^-(y)}p_{x,y}\alpha_x\geq 1,\ \ y=x_0;\\
\alpha_y-\lambda \sum_{x\in X_1^-(y)}
p_{x,y}^s\alpha_x-\lambda\sum_{x\in X_2^-(y)}p_{x,y}\alpha_x\geq 0,\ \ \forall \ y\in X\setminus \{x_0\}.\\
\end{array}
\right.
\end{equation}
If we take here the minimum with respect to $s$ then we obtain a
bilinear programming problem with respect to $\alpha_x$ and
$p_{x,y}^s$, where $p_{x,y}^s$ satisfy the conditions: $\sum_{y\in
X^+(x)}p_{x,y}^s=1; p_{x,y}^s\in \{0,1\},\forall x\in X_1, y\in
X^+(x)$. We have proved that the optimal solution is preserved if
these conditions are changed by conditions: $\sum_{y\in
X^+(x)}p_{x,y}^s\alpha_x=\alpha_x,\forall x\in X_1,\forall y\in
X^+(x); \alpha_x\geq 0,\beta_{x,y}\geq 0,\forall x\in X,y\in
X^+(y).$ If we substitute after that operation
$\beta_{x,y}=p_{x,y}^s\alpha_x$ then our bilinear programming
problem obtained on the bases of (\ref{eq3}),(\ref{eq4}) with
mentioned above conditions is reduced to the linear programming
problem: Minimize
\begin{equation}\label{eq5}
\quad \quad \varphi(\alpha,\beta)= \sum_{x\in X_1}\sum_{y\in
X(x)}c_{(x,y)}\beta_{x,y}+\sum_{x\in X_2}\sum_{y\in
X(x)}c_{(x,y)}p_{x,y}\alpha_x
\end{equation}
subject to
\begin{equation}\label{eq6}
\left \{
\begin{array}{l}
\alpha_y-\lambda \sum_{x\in X_1^-(y)}
\beta_{x,y}-\lambda\sum_{x\in X_2^-(y)}p_{x,y}\alpha_x\geq 1,\ \ y=x_o;\\
\alpha_y-\lambda \sum_{x\in X_1^-(y)}
\beta_{x,y}-\lambda\sum_{x\in X_2^-(y)}p_{x,y}\alpha_x\geq 0,\ \ \forall \ y\in X\setminus \{x_0\};\\
\sum_{y\in X^-(x)}\beta_{x,y}=\alpha_x,\ \ \forall x\in X_1;\
\beta_{x,y}\geq 0,\alpha_x\geq 0, \forall x\in X,y\in X^+(x),
\end{array}
\right.
\end{equation}
where $X_1^-(y)=\{x\in X_1 |(x,y)\in E\},\ X_2^-(y)=\{x\in
X_2|(x,y)\in E\}$. The following result holds: If
$\alpha_x^*,\beta_{x,y}^*$ is a basic optimal solution of the
problem (\ref{eq5}),(\ref{eq6}) and $\alpha_x^*\neq 0$ for $x\in
X_1$ then $p_{x,y}^{s^*}=\beta_{x,y}^*/\alpha_x^* \in \{0,1\},
x\in X_1,y\in X^+(y);$ the optimal stationary strategy $s^*:x\to
y$ for $y\in X^+(x)$ corresponds to $p_{x,y}^{s^ *}=1$ for $x\in
X_1,y\in X^+(x)$.
\par \vspace*{-6mm}
\begin{thebibliography}{18}
\par \vspace*{-6mm}
\bibitem{how}
Howard R.A., Dynamic Programming and Markov Processes. Wiley
(1960)
\bibitem{loz1}
Lozovanu D., Pickl. S., Optimization and Multiobjective Control of
Time-Discrete Systems. Springer Verlag (2009)
\bibitem{put} Puterman M., Markov Decision Processes. Wiley (1993)
\end{thebibliography}
\end{document}