# Library Coq.Strings.String

Contributed by Laurent ThÃ©ry (INRIA); Adapted to Coq V8 by the Coq Development Team

Require Import Arith.
Require Import Ascii.

### Definition of strings

Implementation of string as list of ascii characters

Inductive string : Set :=
| EmptyString : string
| String : ascii -> string -> string.

Delimit Scope string_scope with string.
Open Local Scope string_scope.

Equality is decidable

Definition string_dec : forall s1 s2 : string, {s1 = s2} + {s1 <> s2}.
Defined.

### Concatenation of strings

Reserved Notation "x ++ y" (right associativity, at level 60).

Fixpoint append (s1 s2 : string) : string :=
match s1 with
| EmptyString => s2
| String c s1' => String c (s1' ++ s2)
end

where "s1 ++ s2" := (append s1 s2) : string_scope.

Length

Fixpoint length (s : string) : nat :=
match s with
| EmptyString => 0
| String c s' => S (length s')
end.

Nth character of a string

Fixpoint get (n : nat) (s : string) {struct s} : option ascii :=
match s with
| EmptyString => None
| String c s' => match n with
| O => Some c
| S n' => get n' s'
end
end.

Two lists that are identical through get are syntactically equal

Theorem get_correct :
forall s1 s2 : string, (forall n : nat, get n s1 = get n s2) <-> s1 = s2.

The first elements of s1 ++ s2 are the ones of s1

Theorem append_correct1 :
forall (s1 s2 : string) (n : nat),
n < length s1 -> get n s1 = get n (s1 ++ s2).

The last elements of s1 ++ s2 are the ones of s2

Theorem append_correct2 :
forall (s1 s2 : string) (n : nat),
get n s2 = get (n + length s1) (s1 ++ s2).

### Substrings

substring n m s returns the substring of s that starts at position n and of length m; if this does not make sense it returns ""

Fixpoint substring (n m : nat) (s : string) : string :=
match n, m, s with
| 0, 0, _ => EmptyString
| 0, S m', EmptyString => s
| 0, S m', String c s' => String c (substring 0 m' s')
| S n', _, EmptyString => s
| S n', _, String c s' => substring n' m s'
end.

The substring is included in the initial string

Theorem substring_correct1 :
forall (s : string) (n m p : nat),
p < m -> get p (substring n m s) = get (p + n) s.

The substring has at most m elements

Theorem substring_correct2 :
forall (s : string) (n m p : nat), m <= p -> get p (substring n m s) = None.

### Test functions

Test if s1 is a prefix of s2

Fixpoint prefix (s1 s2 : string) {struct s2} : bool :=
match s1 with
| EmptyString => true
| String a s1' =>
match s2 with
| EmptyString => false
| String b s2' =>
match ascii_dec a b with
| left _ => prefix s1' s2'
| right _ => false
end
end
end.

If s1 is a prefix of s2, it is the substring of length length s1 starting at position O of s2

Theorem prefix_correct :
forall s1 s2 : string,
prefix s1 s2 = true <-> substring 0 (length s1) s2 = s1.

Test if, starting at position n, s1 occurs in s2; if so it returns the position

Fixpoint index (n : nat) (s1 s2 : string) : option nat :=
match s2, n with
| EmptyString, 0 =>
match s1 with
| EmptyString => Some 0
| String a s1' => None
end
| EmptyString, S n' => None
| String b s2', 0 =>
if prefix s1 s2 then Some 0
else
match index 0 s1 s2' with
| Some n => Some (S n)
| None => None
end
| String b s2', S n' =>
match index n' s1 s2' with
| Some n => Some (S n)
| None => None
end
end.

Opaque prefix.

If the result of index is Some m, s1 in s2 at position m

Theorem index_correct1 :
forall (n m : nat) (s1 s2 : string),
index n s1 s2 = Some m -> substring m (length s1) s2 = s1.

If the result of index is Some m, s1 does not occur in s2 before m

Theorem index_correct2 :
forall (n m : nat) (s1 s2 : string),
index n s1 s2 = Some m ->
forall p : nat, n <= p -> p < m -> substring p (length s1) s2 <> s1.

If the result of index is None, s1 does not occur in s2 after n

Theorem index_correct3 :
forall (n m : nat) (s1 s2 : string),
index n s1 s2 = None ->
s1 <> EmptyString -> n <= m -> substring m (length s1) s2 <> s1.

Transparent prefix.

If we are searching for the Empty string and the answer is no this means that n is greater than the size of s

Theorem index_correct4 :
forall (n : nat) (s : string),
index n EmptyString s = None -> length s < n.

Same as index but with no optional type, we return 0 when it does not occur

Definition findex n s1 s2 :=
match index n s1 s2 with
| Some n => n
| None => 0
end.

### Concrete syntax

The concrete syntax for strings in scope string_scope follows the Coq convention for strings: all ascii characters of code less than 128 are litteral to the exception of the character `double quote' which must be doubled.

Strings that involve ascii characters of code >= 128 which are not part of a valid utf8 sequence of characters are not representable using the Coq string notation (use explicitly the String constructor with the ascii codes of the characters).

Example HelloWorld := " ""Hello world!"" ".