Theory Words (Isabelle2013-2: December 2013)

theory Words
imports Preliminaries
begin

subsection {* $\omega$-words *}

text {*
  Automata recognize languages, which are sets of words. For the
  theory of $\omega$-automata, we are mostly interested in
  $\omega$-words, but it is sometimes useful to reason about
  finite words, too. We are modeling finite words as lists; this
  lets us benefit from the existing library. Other formalizations
  could be investigated, such as representing words as functions
  whose domains are initial intervals of the natural numbers.
*}

subsubsection {* Type declaration and elementary operations *}

text {*
  We represent $\omega$-words as functions from the natural numbers
  to the alphabet type. Other possible formalizations include
  a coinductive definition or a uniform encoding of finite and
  infinite words, as studied by M\"uller et al.
*}

type_synonym
  'a word = "nat => 'a"

definition
  suffix :: "[nat, 'a word] => 'a word"
  where "suffix k x ≡ λn. x (k+n)"

lemma suffix_nth [simp]:
  "(suffix k x) n = x (k+n)"
by (simp add: suffix_def)

lemma suffix_0 [simp]:
  "suffix 0 x = x"
by (simp add: suffix_def)

lemma suffix_suffix [simp]:
  "suffix m (suffix k x) = suffix (k+m) x"
by (rule ext, simp add: suffix_def add_assoc)

text {*
  A finite part of an infinite word can be obtained as the
  result of mapping the word over the desired index interval.
  For now, we do not define a separate constant for this
  construction, as it does not occur very often, and we prefer
  to rely on the proof machinery provided by the standard library.
  However, the standard rewriting rules include the theorem @{text upt_Suc},
  which tends to convert, say, $[i..j]$ into $[i..j(] @ [j]$.
  Since this is (almost) never what we want, we remove the theorem
  from the rule base in effect for this session.
*}
declare upt_Suc [simp del]


text {*
  We can prefix a finite word to an $\omega$-word, and a way
  to obtain an $\omega$-word from a finite, non-empty word is by
  $\omega$-iteration.
*}

definition
  conc :: "['a list, 'a word] => 'a word"    ("_/ conc _" [66,65] 65)
  where "w conc x == λn. if n < length w then w!n else x (n - length w)"

definition
  iter :: "'a list => 'a word"
  where "iter w == if w = [] then undefined else (λn. w!(n mod (length w)))"

syntax (xsymbols)
  conc :: "['a list, 'a word] => 'a word"    ("_/ \<frown> _" [66,65] 65)
  iter :: "'a list => 'a word"               ("(_⇧^ω)" [1000])

lemma conc_empty[simp]: "[] \<frown> w = w"
  unfolding conc_def by auto

lemma conc_fst:
  "n < length w ==> (w \<frown> x) n = w!n"
by (simp add: conc_def)

lemma conc_snd:
  "¬(n < length w) ==> (w \<frown> x) n = x (n - length w)"
by (simp add: conc_def)

lemma iter_nth [simp]:
  "0 < length w ==> w⇧^ω n = w!(n mod (length w))"
by (simp add: iter_def)

lemma conc_conc:
  "u \<frown> v \<frown> w = (u @ v) \<frown> w" (is "?lhs = ?rhs")
proof
  fix n
  have u: "n < length u ==> ?lhs n = ?rhs n"
    by (simp add: conc_def nth_append)
  have v: "[| ¬(n < length u); n < length u + length v |] ==> ?lhs n = ?rhs n"
    by (simp add: conc_def nth_append, arith)
  have w: "¬(n < length u + length v) ==> ?lhs n = ?rhs n"
    by (simp add: conc_def nth_append, arith)
  from u v w show "?lhs n = ?rhs n" by auto
qed

lemma prefix_suffix: 
  "x = (map x [0..<n] ) \<frown> (suffix n x)"
by (rule ext, simp add: conc_def)

lemma iter_unroll:
  "0 < length w ==> w⇧^ω = w \<frown> w⇧^ω"
by (rule ext, simp add: conc_def mod_geq)

subsubsection {* The limit set of an $\omega$-word *}

text {*
  The limit set (also called infinity set) of an $\omega$-word
  is the set of letters that appear infinitely often in the word.
  This set plays an important role in defining acceptance conditions
  of $\omega$-automata.
*}

definition
  limit :: "'a word => 'a set"
  where "limit x ≡ { a . ∃⇩_∞n . x n = a }"

lemma limit_iff_frequent:
  "(a ∈ limit x) = (∃⇩_∞n . x n = a)"
by (simp add: limit_def)

text {*
  The following is a different way to define the limit,
  using the reverse image, making the laws about reverse
  image applicable to the limit set. 
  (Might want to change the definition above?)
*}

lemma limit_vimage:
  "(a ∈ limit x) = infinite (x -` {a})"
by (simp add: limit_def Inf_many_def vimage_def)

lemma two_in_limit_iff:
  "({a,b} ⊆ limit x) = 
   ((∃n. x n =a ) ∧ (∀n. x n = a --> (∃m>n. x m = b)) ∧ (∀m. x m = b --> (∃n>m. x n = a)))"
  (is "?lhs = (?r1 ∧ ?r2 ∧ ?r3)")
proof
  assume lhs: "?lhs"
  hence 1: "?r1" by (auto simp: limit_def elim: INFM_EX)
  from lhs have "∀n. ∃m>n. x m = b" by (auto simp: limit_def INFM_nat)
  hence 2: "?r2" by simp
  from lhs have "∀m. ∃n>m. x n = a" by (auto simp: limit_def INFM_nat)
  hence 3: "?r3" by simp
  from 1 2 3 show "?r1 ∧ ?r2 ∧ ?r3" by simp
next
  assume "?r1 ∧ ?r2 ∧ ?r3"
  hence 1: "?r1" and 2: "?r2" and 3: "?r3" by simp+
  have infa: "∀m. ∃n≥m. x n = a"
  proof
    fix m
    show "∃n≥m. x n = a" (is "?A m")
    proof (induct m)
      from 1 show "?A 0" by simp
    next
      fix m
      assume ih: "?A m"
      then obtain n where n: "n ≥ m" "x n = a" by auto
      with 2 obtain k where k: "k>n" "x k = b" by auto
      with 3 obtain l where l: "l>k" "x l = a" by auto
      from n k l have "l ≥ Suc m" by auto
      with l show "?A (Suc m)" by auto
    qed
  qed
  hence infa': "∃⇩_∞n. x n = a" by (simp add: INFM_nat_le)
  have "∀n. ∃m>n. x m = b"
  proof
    fix n
    from infa obtain k where k1: "k≥n" and k2: "x k = a" by auto
    from 2 k2 obtain l where l1: "l>k" and l2: "x l = b" by auto
    from k1 l1 have "l > n" by auto
    with l2 show "∃m>n. x m = b" by auto
  qed
  hence "∃⇩_∞m. x m = b" by (simp add: INFM_nat)
  with infa' show "?lhs" by (auto simp: limit_def)
qed

text {*
  For $\omega$-words over a finite alphabet, the limit set is
  non-empty. Moreover, from some position onward, any such word
  contains only letters from its limit set.
*}

lemma limit_nonempty:
  assumes fin: "finite (range x)"
  shows "∃a. a ∈ limit x"
proof -
  from fin obtain a where "a ∈ range x ∧ infinite (x -` {a})"
    by (rule inf_img_fin_domE, auto)
  hence "a ∈ limit x"
    by (auto simp add: limit_vimage)
  thus ?thesis ..
qed

lemmas limit_nonemptyE = limit_nonempty[THEN exE]

lemma limit_inter_INF:
  assumes hyp: "limit w ∩ S ≠ {}"
  shows "∃⇩_∞ n. w n ∈ S"
proof -
  from hyp obtain x where "∃⇩_∞ n. w n = x" and "x ∈ S"
    by (auto simp add: limit_def)
  thus ?thesis
    by (auto elim: INFM_mono)
qed

text {*
  The reverse implication is true only if $S$ is finite.
*}

lemma INF_limit_inter:
  assumes hyp: "∃⇩_∞ n. w n ∈  S" and fin: "finite (S ∩ range w)"
  shows  "∃a. a ∈ limit w ∩ S"
proof (rule ccontr)
  assume contra: "¬(∃a. a ∈ limit w ∩ S)"
  hence "∀a∈S. finite {n. w n = a}"
    by (auto simp add: limit_def Inf_many_def)
  with fin have "finite (UN a:S ∩ range w. {n. w n = a})"
    by auto
  moreover
  have "(UN a:S ∩ range w. {n. w n = a}) = {n. w n ∈ S}"
    by auto
  moreover
  note hyp
  ultimately show "False"
    by (simp add: Inf_many_def)
qed

lemma fin_ex_inf_eq_limit: "finite A ==> (∃⇩_∞i. w i ∈ A) <-> limit w ∩ A ≠ {}"
  by (metis INF_limit_inter equals0D finite_Int limit_inter_INF)

lemma limit_in_range_suffix:
  "limit x ⊆ range (suffix k x)"
proof
  fix a
  assume "a ∈ limit x"
  then obtain l where
    kl: "k < l" and xl: "x l = a"
    by (auto simp add: limit_def INFM_nat)
  from kl obtain m where "l = k+m"
    by (auto simp add:  less_iff_Suc_add)
  with xl show "a ∈ range (suffix k x)"
    by auto
qed

lemma limit_in_range: "limit r ⊆ range r"
  using limit_in_range_suffix[of r 0] by simp

lemmas limit_in_range_suffixD = limit_in_range_suffix[THEN subsetD]

theorem limit_is_suffix:
  assumes fin: "finite (range x)"
  shows "∃k. limit x = range (suffix k x)"
proof -
  have "∃k. range (suffix k x) ⊆ limit x"
  proof -
    -- "The set of letters that are not in the limit is certainly finite."
    from fin have "finite (range x - limit x)"
      by simp
    -- "Moreover, any such letter occurs only finitely often"
    moreover
    have "∀a ∈ range x - limit x. finite (x -` {a})"
      by (auto simp add: limit_vimage)
    -- "Thus, there are only finitely many occurrences of such letters."
    ultimately have "finite (UN a : range x - limit x. x -` {a})"
      by (blast intro: finite_UN_I)
    -- "Therefore these occurrences are within some initial interval."
    then obtain k where "(UN a : range x - limit x. x -` {a}) ⊆ {..<k}"
      by (blast dest: finite_nat_bounded)
    -- "This is just the bound we are looking for."
    hence "∀m. k ≤ m --> x m ∈ limit x"
      by (auto simp add: limit_vimage)
    hence "range (suffix k x) ⊆ limit x"
      by auto
    thus ?thesis ..
  qed
  then obtain k where "range (suffix k x) ⊆ limit x" ..
  with limit_in_range_suffix
  have "limit x = range (suffix k x)"
    by (rule subset_antisym)
  thus ?thesis ..
qed

theorems limit_is_suffixE = limit_is_suffix[THEN exE]


text {*
  The limit set enjoys some simple algebraic laws with respect
  to concatenation, suffixes, iteration, and renaming.
*}

theorem limit_conc [simp]:
  "limit (w \<frown> x) = limit x"
proof (auto)
  fix a assume a: "a ∈ limit (w \<frown> x)"
  have "∀m. ∃n. m<n ∧ x n = a"
  proof
    fix m
    from a obtain n where "m + length w < n ∧ (w \<frown> x) n = a"
      by (auto simp add: limit_def Inf_many_def infinite_nat_iff_unbounded)
    hence "m < n - length w ∧ x (n - length w) = a"
      by (auto simp add: conc_def)
    thus "∃n. m<n ∧ x n = a" ..
  qed
  hence "infinite {n . x n = a}"
    by (simp add: infinite_nat_iff_unbounded)
  thus "a ∈ limit x"
    by (simp add: limit_def Inf_many_def)
next
  fix a assume a: "a ∈ limit x"
  have "∀m. length w < m --> (∃n. m<n ∧ (w \<frown> x) n = a)"
  proof (clarify)
    fix m
    assume m: "length w < m"
    with a obtain n where "m - length w < n ∧ x n = a"
      by (auto simp add: limit_def Inf_many_def infinite_nat_iff_unbounded)
    with m have "m < n + length w ∧ (w \<frown> x) (n + length w) = a"
      by (simp add: conc_def, arith)
    thus "∃n. m<n ∧ (w \<frown> x) n = a" ..
  qed
  hence "infinite {n . (w \<frown> x) n = a}"
    by (simp add: unbounded_k_infinite)
  thus "a ∈ limit (w \<frown> x)"
    by (simp add: limit_def Inf_many_def)
qed

theorem limit_suffix [simp]: 
  "limit (suffix n x) = limit x"
proof -
  have "x = (map x [0..<n]) \<frown> (suffix n x)"
    by (simp add: prefix_suffix)
  hence "limit x = limit ((map x [0..<n]) \<frown> suffix n x)"
    by simp
  also have "… = limit (suffix n x)"
    by (rule limit_conc)
  finally show ?thesis
    by (rule sym)
qed

theorem limit_iter [simp]:
  assumes nempty: "0 < length w"
  shows "limit w⇧^ω = set w"
proof
  have "limit w⇧^ω ⊆ range w⇧^ω"
    by (auto simp add: limit_def dest: INFM_EX)
  also from nempty have "… ⊆ set w"
    by auto
  finally show "limit w⇧^ω ⊆ set w" .
next
  {
    fix a assume a: "a ∈ set w"
    then obtain k where k: "k < length w ∧ w!k = a"
      by (auto simp add: set_conv_nth)
    -- "the following bound is terrible, but it simplifies the proof"
    from nempty k
    have "∀m. w⇧^ω ((Suc m)*(length w) + k) = a"
      by (simp add: mod_add_left_eq)
    moreover
    -- "why is the following so hard to prove??"
    have "∀m. m < (Suc m)*(length w) + k"
    proof
      fix m
      from nempty have "1 ≤ length w" by arith
      hence "m*1 ≤ m*length w" by simp
      hence "m ≤ m*length w" by simp
      with nempty have "m < length w + (m*length w) + k" by arith
      thus "m < (Suc m)*(length w) + k" by simp
    qed
    moreover note nempty
    ultimately have "a ∈ limit w⇧^ω"
      by (auto simp add: limit_iff_frequent INFM_nat)
  }
  then show "set w ⊆ limit w⇧^ω" by auto
qed

lemma limit_o [simp]:
  assumes a: "a ∈ limit w"
  shows "f a ∈ limit (f o w)"
proof -
  from a
  have "∃⇩_∞n. w n = a"
    by (simp add: limit_iff_frequent)
  hence "∃⇩_∞n. f (w n) = f a"
    by (rule INFM_mono, simp)
  thus "f a ∈ limit (f o w)"
    by (simp add: limit_iff_frequent)
qed

text {*
  The converse relation is not true in general: $f(a)$ can be in the
  limit of $f \circ w$ even though $a$ is not in the limit of $w$.
  However, @{text limit} commutes with renaming if the function is
  injective. More generally, if $f(a)$ is the image of only finitely
  many elements, some of these must be in the limit of $w$.
*}

lemma limit_o_inv:
  assumes fin: "finite (f -` {x})" and x: "x ∈ limit (f o w)"
  shows "∃a ∈ (f -` {x}). a ∈ limit w"
proof (rule ccontr)
  assume contra: "¬(∃a ∈ (f -` {x}). a ∈ limit w)"
  -- "hence, every element in the pre-image occurs only finitely often"
  then have "∀a ∈ (f -` {x}). finite {n. w n = a}"
    by (simp add: limit_def Inf_many_def)
  -- "so there are only finitely many occurrences of any such element"
  with fin have "finite (\<Union> a ∈ (f -` {x}). {n. w n = a})"
    by auto
  -- {* these are precisely those positions where $x$ occurs in $f \circ w$ *}
  moreover
  have "(\<Union> a ∈ (f -` {x}). {n. w n = a}) = {n. f(w n) = x}"
    by auto
  ultimately
  -- "so $x$ can occur only finitely often in the translated word"
  have "finite {n. f(w n) = x}"
    by simp
  -- {* \ldots\ which yields a contradiction *}
  with x show "False"
    by (simp add: limit_def Inf_many_def)
qed

theorem limit_inj [simp]:
  assumes inj: "inj f"
  shows "limit (f o w) = f ` (limit w)"
proof
  show "f ` limit w ⊆ limit (f o w)"
    by auto
next
  show "limit (f o w) ⊆ f ` limit w"
  proof
    fix x
    assume x: "x ∈ limit (f o w)"
    from inj have "finite (f -` {x})"
      by (blast intro: finite_vimageI)
    with x obtain a where a: "a ∈ (f -` {x}) ∧ a ∈ limit w"
      by (blast dest: limit_o_inv)
    thus "x ∈ f ` (limit w)"
      by auto
  qed
qed

subsubsection {* Index sequences and piecewise definitions *}

text {*
  A word can be defined piecewise: given a sequence of words $w_0, w_1, \ldots$
  and a strictly increasing sequence of integers $i_0, i_1, \ldots$ where $i_0=0$,
  a single word is obtained by concatenating subwords of the $w_n$ as given by
  the integers: the resulting word is
  \[
    (w_0)_{i_0} \ldots (w_0)_{i_1-1} (w_1)_{i_1} \ldots (w_1)_{i_2-1} \ldots
  \]
  We prepare the field by proving some trivial facts about such sequences of 
  indexes.
*}

definition
  idx_sequence :: "nat word => bool"
  where "idx_sequence idx ≡ (idx 0 = 0) ∧ (∀n. idx n < idx (Suc n))"

lemma idx_sequence_less:
  assumes iseq: "idx_sequence idx"
  shows "idx n < idx (Suc(n+k))"
proof (induct k)
  from iseq show "idx n < idx (Suc (n + 0))"
    by (simp add: idx_sequence_def)
next
  fix k
  assume ih: "idx n < idx (Suc(n+k))"
  from iseq have "idx (Suc(n+k)) < idx (Suc(n + Suc k))"
    by (simp add: idx_sequence_def)
  with ih show "idx n < idx (Suc(n + Suc k))"
    by (rule less_trans)
qed

lemma idx_sequence_inj:
  assumes iseq: "idx_sequence idx"
  and eq: "idx m = idx n"
  shows "m = n"
proof (rule nat_less_cases)
  assume "n<m"
  then obtain k where "m = Suc(n+k)"
    by (auto simp add: less_iff_Suc_add)
  with iseq have "idx n < idx m"
    by (simp add: idx_sequence_less)
  with eq show ?thesis
    by simp
next
  assume "m<n"
  then obtain k where "n = Suc(m+k)"
    by (auto simp add: less_iff_Suc_add)
  with iseq have "idx m < idx n"
    by (simp add: idx_sequence_less)
  with eq show ?thesis
    by simp
qed (simp)

lemma idx_sequence_mono:
  assumes iseq: "idx_sequence idx"
  and m: "m ≤ n"
  shows "idx m ≤ idx n"
proof (cases "m=n")
  case True
  thus ?thesis by simp
next
  case False
  with m have "m < n" by simp
  then obtain k where "n = Suc(m+k)"
    by (auto simp add: less_iff_Suc_add)
  with iseq have "idx m < idx n"
    by (simp add: idx_sequence_less)
  thus ?thesis by simp
qed

text {*
  Given an index sequence, every natural number is contained in the
  interval defined by two adjacent indexes, and in fact this interval
  is determined uniquely.
*}

lemma idx_sequence_idx:
  assumes "idx_sequence idx"
  shows "idx k ∈ {idx k ..< idx (Suc k)}"
using assms by (auto simp add: idx_sequence_def)

lemma idx_sequence_interval:
  assumes iseq: "idx_sequence idx"
  shows "∃k. n ∈ {idx k ..< idx (Suc k) }"
    (is "?P n" is "∃k. ?in n k")
proof (induct n)
  from iseq have "0 = idx 0"
    by (simp add: idx_sequence_def)
  moreover
  from iseq have "idx 0 ∈ {idx 0 ..< idx (Suc 0) }"
    by (rule idx_sequence_idx)
  ultimately
  show "?P 0" by auto
next
  fix n
  assume "?P n"
  then obtain k where k: "?in n k" ..
  show "?P (Suc n)"
  proof (cases "Suc n < idx (Suc k)")
    case True
    with k have "?in (Suc n) k"
      by simp
    thus ?thesis ..
  next
    case False
    with k have "Suc n = idx (Suc k)"
      by auto
    with iseq have "?in (Suc n) (Suc k)"
      by (simp add: idx_sequence_def)
    thus ?thesis ..
  qed
qed

lemma idx_sequence_interval_unique:
  assumes iseq: "idx_sequence idx"
  and k: "n ∈ {idx k ..< idx (Suc k) }"
  and m: "n ∈ {idx m ..< idx (Suc m) }"
  shows "k = m"
proof (rule nat_less_cases)
  assume "k < m"
  hence "Suc k ≤ m" by simp
  with iseq have "idx (Suc k) ≤ idx m"
    by (rule idx_sequence_mono)
  with m have "idx (Suc k) ≤ n"
    by auto
  with k have "False"
    by simp
  thus ?thesis ..
next
  assume "m < k"
  hence "Suc m ≤ k" by simp
  with iseq have "idx (Suc m) ≤ idx k"
    by (rule idx_sequence_mono)
  with k have "idx (Suc m) ≤ n"
    by auto
  with m have "False"
    by simp
  thus ?thesis ..
qed (simp)

lemma idx_sequence_unique_interval:
  assumes iseq: "idx_sequence idx"
  shows "∃! k. n ∈ {idx k ..< idx (Suc k) }"
proof (rule ex_ex1I)
  from iseq show "∃k. n ∈ {idx k ..< idx (Suc k)}"
    by (rule idx_sequence_interval)
next
  fix k y
  assume "n ∈ {idx k..<idx (Suc k)}" and "n ∈ {idx y..<idx (Suc y)}"
  with iseq show "k = y" by (auto elim: idx_sequence_interval_unique)
qed

text {*
  Now we can define the piecewise construction of a word using
  an index sequence.
*}

definition
  merge :: "['a word word, nat word] => 'a word"
  where "merge ws idx ≡
           λ n. let i = THE i. n ∈ {idx i ..< idx (Suc i) } in ws i n"

lemma merge:
  assumes idx: "idx_sequence idx"
  and n: "n ∈ {idx i ..< idx (Suc i) }"
  shows "merge ws idx n = ws i n"
proof -
  from n have "(THE k. n ∈ {idx k ..< idx (Suc k) }) = i"
    by (rule the_equality[OF _ sym[OF idx_sequence_interval_unique[OF idx n]]]) simp
  thus ?thesis
    by (simp add: merge_def Let_def)
qed

lemma merge0:
  assumes idx: "idx_sequence idx"
  shows "merge ws idx 0 = ws 0 0"
proof (rule merge[OF idx])
  from idx have "idx 0 < idx (Suc 0)"
    by (unfold idx_sequence_def, blast)
  with idx show "0 ∈ {idx 0 ..< idx (Suc 0)}"
    by (simp add: idx_sequence_def)
qed

lemma merge_Suc:
  assumes idx: "idx_sequence idx"
  and n: "n ∈ {idx i ..< idx (Suc i) }"
  shows "merge ws idx (Suc n) = 
         (if Suc n = idx (Suc i) then ws (Suc i) else ws i) (Suc n)"
proof (auto)
  assume eq: "Suc n = idx (Suc i)"
  from idx have "idx (Suc i) < idx (Suc(Suc i))"
    by (unfold idx_sequence_def, blast)
  with eq idx show "merge ws idx (idx (Suc i)) = ws (Suc i) (idx (Suc i))"
    by (simp add: merge)
next
  assume neq: "Suc n ≠ idx (Suc i)"
  with n have "Suc n ∈ {idx i ..< idx (Suc i) }"
    by auto
  with idx show "merge ws idx (Suc n) = ws i (Suc n)"
    by (rule merge)
qed

end