summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2017-01-16 07:35:02 +0100
committerPeter Wu <peter@lekensteyn.nl>2017-01-16 07:35:02 +0100
commitb9070693776c448b542c5f7d7f4a0882a89fbab0 (patch)
tree74a2fe9cae9c94548f17486e0e623455e8347a69
parent995b06c4e9617c44429cc4d0ae3d0518384d6471 (diff)
download2IMF25-AR-b9070693776c448b542c5f7d7f4a0882a89fbab0.tar.gz
NFA: finish 2
-rw-r--r--part2.tex184
1 files changed, 182 insertions, 2 deletions
diff --git a/part2.tex b/part2.tex
index 9d925ed..4c5d6de 100644
--- a/part2.tex
+++ b/part2.tex
@@ -290,10 +290,189 @@ for which the only non-empty words in $L(M)$ of length $<$ 5 are $aa$, $bb$,
$aba$, $baa$, $abab$, $babb$ and $bbba$.
\subsection*{Solution:}
+Model the NFA as $M = \{Q, \Sigma, \Delta, q_0, F\}$ where $Q=\{q_0, \ldots,
+q_{m-1}\}$ is the set of states, $\Sigma$ is the alphabet, $\Delta$ is the
+transition function $\Delta: Q \times \Sigma \to P(Q)$, $q_0 \in Q$ is the
+initial state and $F \subseteq Q$ is the set of final (accepting) states. The
+list of accepted words is $L(M)$.
+Let $S_{i,j}$ be the mapping to a state in $Q$ from character index $0 \leq j$
+of word $w_i \in L(M)$.
+
+For the given scenario, we also define \emph{bad} words (words with a length
+smaller than five, but not in the accepted language):
+\begin{align}
+ W' = \{ w |
+ w \in \Sigma^*
+ \land 1 \leq \#w \leq 4
+ \land w \not\in L(M)
+ \}
+\end{align}
+
+To solve the problem, we first try to find a NFA for a $m$ states and then
+gradually decrement it until we obtain \texttt{unsat}. An overview of our
+approach (a more detailed description will follow):
+\begin{enumerate}
+\item For each valid word $w_i$ in $L(M)$:
+ \begin{enumerate}
+ \item Add new states for each letter $j$ of the word $w_i$: $S_{i,j}$.
+ Note that these states use different variables ($S$) than the states
+ of the NFA ($Q$).
+ \item Add new transitions to the formula, starting from the common
+ initial state $q_0$. The next states are $S_{i,0}$, $S_{i,1}$, etc.
+ This expresses that the input $w_i$ is indeed accepted by $M$.
+ \item Add a constraint to the formula to express that the last letter
+ ends up in a final state.
+ \end{enumerate}
+\item For every \emph{bad} word, add an implication to the formula: if
+ the word exists, then the last state of this word must not be accepting.
+\item Finally map each of the states $S_{i,j}$ to a state in $Q$ via
+ conjunctions (for each word) consisting of disjunctions (for each state
+ $S_{i,j}$) that try to match with any state in $Q$.
+\end{enumerate}
+
+First we require that every non-empty word $w_i$ is indeed accepted as input: a
+path exists from the initial state to the final state, following each letter one
+by one:
+\begin{align}
+ \bigwedge_{w_i \in L(M)}
+ \Delta(q_0, w_{i,0}) = S_{i,0}
+ \land
+ (\bigwedge_{j=1}^{\#w_i}
+ \Delta(S_{i,j-1}, w_{i,j-1}) = S_{i,j})
+ \land
+ S_{i,\#w_i} \in F
+\end{align}
+
+The above expression assigns values to $S_{i,j}$, making it easier to put
+restrictions on the ``characters'' of each word (for example, requiring that the
+word originates from the initial state $q_0$ and that the last state is
+accepting). To make it more useful, it should be mapped somehow to $Q$ so we can
+obtain the desired NFA. For simplicity, let's assume that each character can be
+assigned to any state in $Q$:
+\begin{align}
+ \bigwedge_{w_i \in L(M)}
+ \bigwedge_{j=0}^{\#w_i}
+ \bigvee_{q \in Q} S_{i,j} = q
+\end{align}
+Note that this includes all possible states in $Q$, including the initial state
+$q_0$. Being the initial state does not exclude the state from receiving
+incoming transitions.
+
+Finally we have to exclude transitions, otherwise even a single, initial and
+final state would be permitted (with transitions to itself). Since loops are
+possible and transitions to any earlier state can occur, it is not sufficient
+to (for example) just exclude input that differ only in the trailing character.
+Therefore we check all possible \emph{bad words} $W'$.
+\begin{align}
+ \bigwedge_{w_i \in W'}
+ \Delta(q_0, w_{i,0}) = T_{i,0}
+ \land
+ (\bigwedge_{j=1}^{\#w_i}
+ \Delta(T_{i,j-1}, w_{i,j-1}) = T_{i,j})
+ \implies
+ T_{i,\#w_i} \not\in F
+\end{align}
+
+\textbf{Implementation notes}:
+we combined the above three ingredients into a conjunction and implemented it
+via a Python script that generates a SMT. To allow for concise SMT without
+exploding in the number of variables, we used functions. These have no limits in
+their domain or range so we have to add manual checks for these. For the states
+$S_{i,j}$ that match good words and for states $T_{i,j}$ that match bad words,
+we implemented range checks like $T_{i,j} \in Q$.
+(Actually, we set $q_i = i$ for $0 \leq i < m$, so the above range check simply
+checks whether the values are within that range.)
+These checks are either added to the global formula (for the good words) or
+before the implication (for the bad words).
+
+The generated SMT program has a shape which is shown in Listing~\ref{lst:nfa}.
+It runs within a second, even for $m=1000$. By gradually reducing it as stated
+before, we found that for $m=7$ no automaton was found (\texttt{unsat}) but for
+$m=8$, a valid result was obtained (see Figure~\ref{fig:nfa}). Note that the Z3
+output (Listing~\ref{lst:nfaZ3}) contains out-of-range and out-of-domain values
+because the $trans$ ($\Delta$) function for example is used with arbitrary
+values before the implication. When an implication evaluates to false, then the
+solver still keeps the garbage values even if it remains unused (its exact value
+does not influence the result of the evaluation).
+
+\begin{lstlisting}[
+ caption={Generated SMT program for NFA with $m=8$.},
+ label={lst:nfa},basicstyle=\scriptsize,language=lisp
+]
+(benchmark test.smt
+:logic QF_UFLIA
+:extrafuns (
+(q0 Int) (q1 Int) (q2 Int) (q3 Int) (q4 Int) (q5 Int) (q6 Int) (q7 Int)
+(s0_0 Int) (s0_1 Int) (s1_0 Int) (s1_1 Int) (s2_0 Int) (s2_1 Int) (s2_2 Int)
+(s3_0 Int) (s3_1 Int) (s3_2 Int) (s4_0 Int) (s4_1 Int) (s4_2 Int) (s4_3 Int)
+(s5_0 Int) (s5_1 Int) (s5_2 Int) (s5_3 Int) (s6_0 Int) (s6_1 Int) (s6_2 Int)
+(s6_3 Int)
+(trans Int Int Int)
+(final Int Bool)
+)
+:formula (and
+(= q0 0) (= q1 1) (= q2 2) (= q3 3) (= q4 4) (= q5 5) (= q6 6) (= q7 7)
+(= (trans q0 0) s0_0)
+(= (trans s0_0 0) s0_1)
+(final s0_1)
+...
+(= (trans q0 1) s6_0)
+(= (trans s6_0 1) s6_1)
+(= (trans s6_1 1) s6_2)
+(= (trans s6_2 0) s6_3)
+(final s6_3)
+(=> (and (>= (trans q0 0) 0) (< (trans q0 0) 8)) (not (final (trans q0 0))))
+(=> (and (>= (trans q0 1) 0) (< (trans q0 1) 8)) (not (final (trans q0 1))))
+(=> (and (>= (trans q0 0) 0) (< (trans q0 0) 8)
+ (>= (trans (trans q0 0) 1) 0) (< (trans (trans q0 0) 1) 8))
+ (not (final (trans (trans q0 0) 1))))
+...
+(or (= q0 s0_0) (= q1 s0_0) ... (= q7 s0_0))
+...
+(or (= q0 s6_3) (= q1 s6_3) ... (= q7 s6_3))
+))
+\end{lstlisting}
+\begin{minipage}{.35\textwidth}
+\begin{lstlisting}[
+ caption={Output of Z3 for the generated SMT in Listing~\ref{lst:nfa}.},
+ label={lst:nfaZ3},basicstyle=\scriptsize,language=lisp
+]
+sat
+s6_3 -> 5
+s6_2 -> 1
+...
+
+s0_0 -> 4
+q7 -> 7
+q6 -> 6
+...
+q0 -> 0
+trans -> {
+ 0 0 -> 4
+ 4 0 -> 5
+...
+ 3 0 -> (- 5921)
+ (- 1) 0 -> (- 8946)
+...
+ else -> 5
+}
+final -> {
+ 2 -> true
+ 5 -> true
+...
+ 1 -> false
+...
+ (- 1324) -> false
+ else -> false
+}
+\end{lstlisting}
+\end{minipage}
+\hfill
+\begin{minipage}{.55\textwidth}
\begin{figure}[H]
\centering
-\begin{tikzpicture}[>=latex,line join=bevel,scale=.8]
+\begin{tikzpicture}[>=latex,line join=bevel,scale=.6]
%%
\node (q1) at (349.84bp,107.75bp) [draw,circle] {q1};
\node (q0) at (63.348bp,142.75bp) [draw,circle] {q0};
@@ -331,8 +510,9 @@ $aba$, $baa$, $abab$, $babb$ and $bbba$.
\draw (200.59bp,68.248bp) node {b};
%
\end{tikzpicture}
+\caption{\label{fig:nfa}Resulting NFA for $m=8$.}
\end{figure}
-
+\end{minipage}
\section*{Problem: }
The goal of this problem is to exploit the power of the recommended tools rather