Theory KMP
theory KMP
imports "Isabelle_LLVM.IICF"
"HOL-Library.Sublist"
begin
declare len_greater_imp_nonempty[simp del] min_absorb2[simp]
section‹Specification›text_raw‹\label{sec:spec}›
subsection‹Sublist-predicate with a position check›
subsubsection‹Definition›
text ‹One could define›
definition "sublist_at' xs ys i ≡ take (length xs) (drop i ys) = xs"
text‹However, this doesn't handle out-of-bound indexes uniformly:›
value[nbe] "sublist_at' [] [a] 5"
value[nbe] "sublist_at' [a] [a] 5"
value[nbe] "sublist_at' [] [] 5"
text‹Instead, we use a recursive definition:›
fun sublist_at :: "'a list ⇒ 'a list ⇒ nat ⇒ bool" where
"sublist_at (x#xs) (y#ys) 0 ⟷ x=y ∧ sublist_at xs ys 0" |
"sublist_at xs (y#ys) (Suc i) ⟷ sublist_at xs ys i" |
"sublist_at [] ys 0 ⟷ True" |
"sublist_at _ [] _ ⟷ False"
text‹In the relevant cases, both definitions agree:›
lemma "i ≤ length ys ⟹ sublist_at xs ys i ⟷ sublist_at' xs ys i"
unfolding sublist_at'_def
by (induction xs ys i rule: sublist_at.induct) auto
text‹However, the new definition has some reasonable properties:›
subsubsection‹Properties›
lemma sublist_lengths: "sublist_at xs ys i ⟹ i + length xs ≤ length ys"
by (induction xs ys i rule: sublist_at.induct) auto
lemma Nil_is_sublist: "sublist_at ([] :: 'x list) ys i ⟷ i ≤ length ys"
by (induction "[] :: 'x list" ys i rule: sublist_at.induct) auto
text‹Furthermore, we need:›
lemma sublist_step[intro]:
"⟦i + length xs < length ys; sublist_at xs ys i; ys!(i + length xs) = x⟧ ⟹ sublist_at (xs@[x]) ys i"
apply (induction xs ys i rule: sublist_at.induct)
apply auto
using sublist_at.elims(3) by fastforce
lemma all_positions_sublist:
"⟦i + length xs ≤ length ys; ∀jj<length xs. ys!(i+jj) = xs!jj⟧ ⟹ sublist_at xs ys i"
proof (induction xs rule: rev_induct)
case Nil
then show ?case by (simp add: Nil_is_sublist)
next
case (snoc x xs)
from ‹i + length (xs @ [x]) ≤ length ys› have "i + length xs ≤ length ys" by simp
moreover have "∀jj<length xs. ys!(i + jj) = xs!jj"
by (simp add: nth_append snoc.prems(2))
ultimately have "sublist_at xs ys i"
using snoc.IH by simp
then show ?case
using snoc.prems by auto
qed
lemma sublist_all_positions: "sublist_at xs ys i ⟹ ∀jj<length xs. ys!(i+jj) = xs!jj"
by (induction xs ys i rule: sublist_at.induct) (auto simp: nth_Cons')
text‹It also connects well to theory @{theory "HOL-Library.Sublist"} (compare @{thm[source] sublist_def}):›
lemma sublist_at_altdef:
"sublist_at xs ys i ⟷ (∃ps ss. ys = ps@xs@ss ∧ i = length ps)"
proof (induction xs ys i rule: sublist_at.induct)
case (2 ss t ts i)
show "sublist_at ss (t#ts) (Suc i) ⟷ (∃xs ys. t#ts = xs@ss@ys ∧ Suc i = length xs)"
(is "?lhs ⟷ ?rhs")
proof
assume ?lhs
then have "sublist_at ss ts i" by simp
with "2.IH" obtain xs where "∃ys. ts = xs@ss@ys ∧ i = length xs" by auto
then have "∃ys. t#ts = (t#xs)@ss@ys ∧ Suc i = length (t#xs)" by simp
then show ?rhs by blast
next
assume ?rhs
then obtain xs where "∃ys. t#ts = xs@ss@ys ∧ Suc i = length xs" by blast
then have "∃ys. ts = (tl xs)@ss@ys ∧ i = length (tl xs)"
by (metis hd_Cons_tl length_0_conv list.sel(3) nat.simps(3) size_Cons_lem_eq tl_append2)
then have "∃xs ys. ts = xs@ss@ys ∧ i = length xs" by blast
with "2.IH" show ?lhs by simp
qed
qed auto
corollary sublist_iff_sublist_at: "Sublist.sublist xs ys ⟷ (∃i. sublist_at xs ys i)"
by (simp add: sublist_at_altdef Sublist.sublist_def)
subsection‹Sublist-check algorithms›
text‹
We use the Isabelle Refinement Framework to
phrase the specification and the algorithm.
›
text‹@{term s} for "searchword" / "searchlist", @{term t} for "text"›
definition "kmp_SPEC s t = SPEC (λ
None ⇒ ∄i. sublist_at s t i |
Some i ⇒ sublist_at s t i ∧ (∀ii<i. ¬sublist_at s t ii))"
lemma is_arg_min_id: "is_arg_min id P i ⟷ P i ∧ (∀ii<i. ¬P ii)"
unfolding is_arg_min_def by auto
lemma kmp_result: "kmp_SPEC s t =
RETURN (if sublist s t then Some (LEAST i. sublist_at s t i) else None)"
unfolding kmp_SPEC_def sublist_iff_sublist_at
apply (auto intro: LeastI dest: not_less_Least split: option.splits)
by (meson LeastI nat_neq_iff not_less_Least)
corollary weak_kmp_SPEC: "kmp_SPEC s t ≤ SPEC (λpos. pos≠None ⟷ Sublist.sublist s t)"
by (simp add: kmp_result)
lemmas kmp_SPEC_altdefs =
kmp_SPEC_def[folded is_arg_min_id]
kmp_SPEC_def[folded sublist_iff_sublist_at]
kmp_result
section‹Naive algorithm›
text‹Since KMP is a direct advancement of the naive "test-all-starting-positions" approach, we provide it here for comparison:›
subsection‹Invariants›
definition "I_out_na s t ≡ λ(i,j,pos).
(∀ii<i. ¬sublist_at s t ii) ∧
(case pos of None ⇒ j = 0
| Some p ⇒ p=i ∧ sublist_at s t i)"
definition "I_in_na s t i ≡ λ(j,pos).
case pos of None ⇒ j < length s ∧ (∀jj<j. t!(i+jj) = s!(jj))
| Some p ⇒ sublist_at s t i"
subsection‹Algorithm›
text‹The following definition is taken from Helmut Seidl's lecture on algorithms and data structures@{cite GAD} except that we
▪ output the identified position @{term ‹pos :: nat option›} instead of just @{const True}
▪ use @{term ‹pos :: nat option›} as break-flag to support the abort within the loops
▪ rewrite @{prop ‹i ≤ length t - length s›} in the first while-condition to @{prop ‹i + length s ≤ length t›} to avoid having to use @{typ int} for list indexes (or the additional precondition @{prop ‹length s ≤ length t›})
›
definition "naive_algorithm s t ≡ do {
let i=0;
let j=0;
let pos=None;
(_,_,pos) ← WHILEIT (I_out_na s t) (λ(i,_,pos). i + length s ≤ length t ∧ pos=None) (λ(i,j,pos). do {
(_,pos) ← WHILEIT (I_in_na s t i) (λ(j,pos). t!(i+j) = s!j ∧ pos=None) (λ(j,_). do {
let j=j+1;
if j=length s then RETURN (j,Some i) else RETURN (j,None)
}) (j,pos);
if pos=None then do {
let i = i + 1;
let j = 0;
RETURN (i,j,None)
} else RETURN (i,j,Some i)
}) (i,j,pos);
RETURN pos
}"
subsection‹Correctness›
text‹The basic lemmas on @{const sublist_at} from the previous chapter together
with the Refinement Framework's verification condition generator / solver suffice:›
lemma "s ≠ [] ⟹ naive_algorithm s t ≤ kmp_SPEC s t"
unfolding naive_algorithm_def kmp_SPEC_def I_out_na_def I_in_na_def
apply (refine_vcg
WHILEIT_rule[where R="measure (λ(i,_,pos). length t - i + (if pos = None then 1 else 0))"]
WHILEIT_rule[where R="measure (λ(j,_::nat option). length s - j)"]
)
apply (all ‹clarsimp;(auto; fail)?›)
subgoal by (metis add_Suc_right all_positions_sublist less_antisym)
subgoal using less_Suc_eq by blast
subgoal by (metis less_SucE sublist_all_positions)
subgoal by (auto split: option.splits) (metis sublist_lengths add_less_cancel_right leI le_less_trans)
done
text‹Note that the precondition cannot be removed without an extra branch: If @{prop ‹s = []›}, the inner while-condition accesses out-of-bound memory. This will apply to KMP, too.›
section‹Knuth--Morris--Pratt algorithm›
text‹Just like our templates@{cite KMP77}@{cite GAD}, we first verify the main routine and discuss the computation of the auxiliary values @{term ‹𝔣 s›} only in a later section.›
subsection‹Preliminaries: Borders of lists›
definition "border xs ys ⟷ prefix xs ys ∧ suffix xs ys"
definition "strict_border xs ys ⟷ border xs ys ∧ length xs < length ys"
definition "intrinsic_border ls ≡ ARG_MAX length b. strict_border b ls"
subsubsection‹Properties›