#+options: ':t *:t -:t ::t <:t H:4 \n:nil ^:t arch:headline author:t #+options: broken-links:nil c:nil creator:nil d:(not "LOGBOOK") date:t e:t #+options: email:nil f:t inline:t num:t p:nil pri:nil prop:nil stat:t tags:t #+options: tasks:t tex:t timestamp:t title:t toc:nil todo:t |:t #+title: Semantics of an embedded vector architecture for formal verification of software #+date: May 2022 #+author: Greg Brown #+latex_header: \newcommand{\candidatenumber}{2487C} #+latex_header: \newcommand{\college}{Queens' College} #+latex_header: \newcommand{\course}{Computer Science Tripos, Part III} #+email: greg.brown@cl.cam.ac.uk #+language: en-GB #+select_tags: export #+exclude_tags: noexport #+creator: Emacs 27.2 (Org mode 9.6) #+cite_export: biblatex #+bibliography: ./thesis.bib #+latex_class: thesis #+latex_class_options: [12pt,a4paper,twoside] #+latex_header: \usepackage[hyperref=true,url=true,backend=biber,natbib=true]{biblatex} % citations #+latex_header: \usepackage[vmargin=20mm,hmargin=25mm]{geometry} % page margins #+latex_header: \usepackage[chapter]{minted} % code snippets #+latex_header: \usepackage{parskip} % vertical space for paragraphs #+latex_header: \usepackage{setspace} % line spacing #+latex_header: \usepackage{newunicodechar} % unicode in code snippets #+latex_header: \usepackage{ebproof} % Hoare logic rules #+latex_header: \usepackage{mathtools} % a math character? #+latex_header: \usepackage{stmaryrd} % some math characters #+latex_header: \usepackage{refcount} % for counting pages #+latex_header: \usepackage{upquote} % for correct quotation marks in verbatim text #+latex_header: \usepackage{caption} % not sure why this one [[https://www.overleaf.com/learn/latex/How_to_Write_a_Thesis_in_LaTeX_(Part_3)%3A_Figures%2C_Subfigures_and_Tables#Subfigures]]... #+latex_header: \usepackage{subcaption} % add subfigures #+latex_compiler: pdflatex #+latex_header: \newunicodechar{ʳ}{\ensuremath{^\texttt{r}}} #+latex_header: \newunicodechar{ˡ}{\ensuremath{^\texttt{l}}} #+latex_header: \newunicodechar{Γ}{\ensuremath{\Gamma}} #+latex_header: \newunicodechar{Δ}{\ensuremath{\Delta}} #+latex_header: \newunicodechar{Κ}{\ensuremath{K}} #+latex_header: \newunicodechar{Σ}{\ensuremath{\Sigma}} #+latex_header: \newunicodechar{γ}{\ensuremath{\gamma}} #+latex_header: \newunicodechar{δ}{\ensuremath{\delta}} #+latex_header: \newunicodechar{ε}{\ensuremath{\epsilon}} #+latex_header: \newunicodechar{λ}{\ensuremath{\lambda}} #+latex_header: \newunicodechar{σ}{\ensuremath{\sigma}} #+latex_header: \newunicodechar{ᵗ}{\ensuremath{^\texttt{t}}} #+latex_header: \newunicodechar{′}{\ensuremath{'}} #+latex_header: \newunicodechar{ⁱ}{\ensuremath{^\texttt{i}}} #+latex_header: \newunicodechar{⁺}{\ensuremath{^{+}}} #+latex_header: \newunicodechar{₁}{\ensuremath{_1}} #+latex_header: \newunicodechar{₂}{\ensuremath{_2}} #+latex_header: \newunicodechar{₃}{\ensuremath{_3}} #+latex_header: \newunicodechar{ₚ}{\ensuremath{_\texttt{p}}} #+latex_header: \newunicodechar{ₛ}{\ensuremath{_\texttt{s}}} #+latex_header: \newunicodechar{ₜ}{\ensuremath{_\texttt{t}}} #+latex_header: \newunicodechar{ℓ}{l} #+latex_header: \newunicodechar{ℕ}{\ensuremath{\mathbb{N}}} #+latex_header: \newunicodechar{ℚ}{\ensuremath{\mathbb{Q}}} #+latex_header: \newunicodechar{ℝ}{\ensuremath{\mathbb{R}}} #+latex_header: \newunicodechar{ℤ}{\ensuremath{\mathbb{Z}}} #+latex_header: \newunicodechar{⇒}{\ensuremath{\rightarrow}} #+latex_header: \newunicodechar{∀}{\ensuremath{\forall}} #+latex_header: \newunicodechar{∃}{\ensuremath{\exists}} #+latex_header: \newunicodechar{∎}{\ensuremath{\blacksquare}} #+latex_header: \newunicodechar{∘}{\ensuremath{\circ}} #+latex_header: \newunicodechar{∙}{\ensuremath{\cdot}} #+latex_header: \newunicodechar{∧}{\ensuremath{\wedge}} #+latex_header: \newunicodechar{∨}{\ensuremath{\vee}} #+latex_header: \newunicodechar{∷}{\texttt{::}} #+latex_header: \newunicodechar{≈}{\ensuremath{\approx}} #+latex_header: \newunicodechar{≉}{\ensuremath{\not\approx}} #+latex_header: \newunicodechar{≔}{\ensuremath{\coloneqq}} #+latex_header: \newunicodechar{≟}{\ensuremath{\buildrel ?\over =}} #+latex_header: \newunicodechar{≡}{\ensuremath{\equiv}} #+latex_header: \newunicodechar{≢}{\ensuremath{\not\equiv}} #+latex_header: \newunicodechar{≤}{\ensuremath{\le}} #+latex_header: \newunicodechar{≥}{\ensuremath{\ge}} #+latex_header: \newunicodechar{⊆}{\ensuremath{\subseteq}} #+latex_header: \newunicodechar{⊎}{\ensuremath{\uplus}} #+latex_header: \newunicodechar{⊔}{\ensuremath{\sqcup}} #+latex_header: \newunicodechar{⊢}{\ensuremath{\vdash}} #+latex_header: \newunicodechar{⊤}{\ensuremath{\top}} #+latex_header: \newunicodechar{⊥}{\ensuremath{\bot}} #+latex_header: \newunicodechar{⌊}{\ensuremath{\lfloor}} #+latex_header: \newunicodechar{⌋}{\ensuremath{\rfloor}} #+latex_header: \newunicodechar{⟦}{\ensuremath{\llbracket}} #+latex_header: \newunicodechar{⟧}{\ensuremath{\rrbracket}} #+latex_header: \newunicodechar{⟶}{\ensuremath{\rightarrow}} #+latex_header: \newunicodechar{⦃}{\{\{} #+latex_header: \newunicodechar{⦄}{\}\}} #+latex_header: \newunicodechar{𝕀}{\ensuremath{\mathbb{I}}} #+latex_header: \newtheorem{theorem}{Theorem} #+latex_header: %TC:envir minted 1 ignore #+latex_header: \newif\ifsubmission # Uncomment when anonymous # #+latex_header: \submissiontrue #+begin_src elisp :exports results :results none :eval export (make-variable-buffer-local 'org-latex-title-command) (setq org-latex-title-command " %%TC:ignore \\begin{sffamily} \\begin{titlepage} \\makeatletter \\hspace*{-14mm}\\includegraphics[width=65mm]{logo-dcst-colour} \\ifsubmission %% submission proforma cover page for blind marking \\begin{Large} \\vspace{20mm} Research project report title page \\vspace{35mm} Candidate \\candidatenumber \\vspace{42mm} \\textsl{\`\`\\@title\'\'} \\end{Large} \\else %% regular cover page \\begin{center} \\Huge \\vspace{\\fill} \\@title \\vspace{\\fill} \\@author \\vspace{10mm} \\Large \\college \\vspace{\\fill} \\@date \\vspace{\\fill} \\end{center} \\fi \\vspace{\\fill} \\begin{center} Submitted in partial fulfillment of the requirements for the\\\\ \\course \\end{center} \\end{titlepage} \\end{sffamily} \\makeatother \\newpage %%TC:endignore ") #+end_src #+begin_export latex %TC:ignore \begin{sffamily} Total page count: \pageref{lastpage} % calculate number of pages from % \label{firstcontentpage} to \label{lastcontentpage} inclusive \makeatletter \@tempcnta=\getpagerefnumber{lastcontentpage}\relax% \advance\@tempcnta by -\getpagerefnumber{firstcontentpage}% \advance\@tempcnta by 1% \xdef\contentpages{\the\@tempcnta}% \makeatother Main chapters (excluding front-matter, references and appendix): \contentpages~pages (pp~\pageref{firstcontentpage}--\pageref{lastcontentpage}) #+end_export #+name: wordcount #+begin_src elisp :exports none (if (not (boundp 'squid-eval)) (setq squid-eval nil)) (if (not squid-eval) (progn (setq squid-eval t) (org-latex-export-to-latex))) (setq squid-eval nil) (let* ((outfile (org-export-output-file-name ".tex"))) (shell-command-to-string (concat "texcount -0 -sum \'" outfile "\'"))) #+end_src #+RESULTS: wordcount : 11800 Main chapters word count: call_wordcount() #+begin_export latex Methodology used to generate that word count: \begin{quote} \begin{verbatim} $ texcount -0 -sum report.tex 11800 \end{verbatim} \end{quote} \end{sffamily} \onehalfspacing #+end_export * Abstract :PROPERTIES: :unnumbered: t :END: The ultimate goal of this work is to formally verify an implementation [cite:@10.46586/tches.v2022.i1.482-505] of the number-theoretic transform (NTT) for the Armv8.1-M architecture. This report focuses on producing the first formalisation of the semantics of the Armv8-M architecture and its M-profile vector extension. The pseudocode used to describe the instructions within the manual by [cite/t:@arm/DDI0553B.s] did not have a formal semantic description. For this project I created AMPSL, to mock the Arm specification language, within Agda. The syntax closely follows that of ASL, save some minor modifications due to limitations within Agda and adjustments to simplify the semantics. This report describes both a denotational semantics and Hoare logic for AMPSL. The denotational semantics interprets AMPSL statements and expressions as Agda functions on a variable context. The Hoare logic instead provides a syntax-directed derivation of AMPSL's action on assertions about the execution state. I also use Agda to formally verify a variant of Barrett reduction. Barrett reduction is an important subroutine used by the NTT, to efficiently find a \ldquo{}small\rdquo{} representable of a value modulo some base [cite:@10.1007/3-540-47721-7_24]. Formalising Barrett reduction is a significant step in formalising the NTT. #+latex: \ifsubmission\else * Acknowledgements :PROPERTIES: :unnumbered: t :END: I would like to thank Dominic Mulligan, Hanno Becker and Gustavo Petri at Arm for the initial idea and producing valuable feedback on my project and report throughout the year. I would also like to thank Jeremy Yallop for supervising the project and providing support throughout the project. They also provided useful and actionable feedback on the various drafts of my report. #+latex: \fi #+latex: \cleardoublepage #+toc: headlines 2 # #+toc: listings # #+toc: tables #+latex: %TC:endignore * Introduction #+latex: \label{firstcontentpage} The ultimate goal of this work is to formally verify an implementation [cite:@10.46586/tches.v2022.i1.482-505] of the number-theoretic transform (NTT) for the Armv8.1-M architecture. The NTT is a vital procedure for lattice-based post-quantum cryptography. PQC is a type of cryptography immune to rapid attack by large-scale quantum computers. Armv8-M is a common architecture in embedded devices. Due to the resource-constrained nature of an embedded device, and the PQC's huge computational demands, algorithms like the NTT are presented using hand-written, highly-optimised assembly code. To ensure these cryptographic algorithms are correct, and thus embedded devices are secure, formal verification of these algorithms is necessary. This report focuses on producing the first formalisation of the semantics of the Armv8-M architecture, in particular its M-profile vector extension. [cite/t:@arm/DDI0553B.s] provides a pseudocode description of the operation of Armv8-M instructions using the Arm pseudocode (henceforth ASL). Unfortunately this language is primarily designed for describing instructions [cite:@arm/DDI0553B.s §E1.1.1] and not proving semantic properties. To remedy this, I designed AMPSL (Arm M-profile Pseudocode Specification Language, or AMPSL Mocks Pseudocode Specification Language). AMPSL is written in Agda, a dependently-typed proof assistant [cite:@10.1007/978-3-642-03359-9_6]. The syntax mirrors that of ASL, save some minor modifications due to limitations within Agda and adjustments to simplify the semantics. AMPSL is given semantics in two different forms. The first is a denotational semantics, which converts the various program elements into functions within Agda. This enables explicitly computing the effect of AMPSL on the processor state. AMPSL also has a set of Hoare logic rules, which form an axiomatic, syntax-directed approach to describing how a statement in AMPSL modifies assertions on the processor state. Due to AMPSL's similarity to ASL, I can convert the ASL description of Armv8-M instructions into AMPSL. From this I can derive the semantics of Armv8-M instructions using AMPSL's semantics. I also use Agda to formally verify a variant of Barrett reduction. Barrett reduction is an important subroutine used by the NTT, to efficiently find a \ldquo{}small\rdquo{} representable of a value modulo some base [cite:@10.1007/3-540-47721-7_24]. Much like how a formalisation of the NTT is an important step in formalising the behaviour of many PQC algorithms, formalising Barrett reduction is an important step in formalising the NTT. #+name: raw_progress #+begin_src dot :file progress.pdf :exports none digraph { node [shape=box,width=2.5,height=0.6]; A [label="Functional correctness\nof NTT",style=dotted]; B [label="Functional correctness\nof Barrett reduction",style=dotted]; C [label="NTT properties",style=dotted]; D [label="Armv8-M Instruction\nsemantics",style=dashed]; E [label="Barrett reduction\nproperties"]; F [label="AMPSL semantics"]; G [label="AMPSL properties",style=dashed]; H [label="Model of Armv8-M\nin AMPSL"]; H -> D; G -> B; F -> G; F -> D; E -> C [style=dashed]; E -> B; D -> B; C -> A; B -> A [style=dashed]; } #+end_src #+name: progress #+caption: Progress made towards formalising an implementation of NTT for the #+caption: Armv8.1-M architecture. Work completed in this report has a solid #+caption: outline. Items where only trivial, time-consuming work is left have a #+caption: dashed border. Significant outstanding work has a dotted #+caption: border. call_raw_progress() [[progress]] shows the progress this work has made to verifying an NTT implementation for Armv8.1-M vector extension. Whilst it does not achieve the final goal, it forms solid foundations towards it. The main contributions of this report are as follows: - In [[*AMPSL Syntax]], I introduce the syntax of the AMPSL programming language. Its primary goal is to facilitate easy translation of programs from ASL, detailed in [[*Arm Pseudocode]] into AMPSL. - AMPSL's semantics are described in [[*AMPSL Semantics]]. AMPSL has a simple semantics which facilitates straight-forward proofs about various AMPSL programs. I detail both a denotational semantics and a Hoare logic for AMPSL. The Hoare logic used by AMPSL somewhat varies from the traditional presentation, given in [[*Hoare Logic]], to eliminate adaptation rules. - In [[*Soundness of AMPSL's Hoare Logic]], I prove that the set of Hoare logic rules for AMPSL are sound with respect to the denotational semantics. This proof is possible due to Agda's foundation of Martin-Löf's type theory, the significance of which is given in [[*Agda]]. As AMPSL's Hoare logic is sound, the behaviour of the computationally-intensive denotational semantics can instead be specified using syntax-directed Hoare logic. - A number of proof outlines for AMPSL programs are given in [[*Using AMPSL for Proofs]]. This describes how AMPSL is used to give the semantics of Armv8.1-M instructions. It also demonstrates that AMPSL is viable for the formal verification of various programs, and lays the groundwork for the formally verifying the NTT implementation given by [cite/t:@10.46586/tches.v2022.i1.482-505]. - Finally, a formal proof of a Barrett reduction variant is given in [[*Proof of Barrett Reduction]]. To my knowledge this is the first such machine-verified correctness proof for Barrett reduction. Further, it is the first proof for a domain other than integers and rationals. * Background ** Arm Pseudocode ASL is a strongly-typed imperative programming language [cite:@arm/DDI0553B.s §E1.2.1]. It has a first-order type system, a small set of operators and basic control flow. Its primary purpose is to explain how executing an Armv8-M instruction modifies the processor state. As it is a descriptive aid, ASL features some design choices atypical for other imperative programming languages making it difficult to execute. Something common to ASL and nearly all imperative languages are primitive types for Booleans, tuples, structs, enumerations and fixed-length arrays. Two interesting primitive types used by ASL are mathematical integers and real numbers. Other imperative languages typically use fixed-width integers and floating point rationals as efficient approximations for these values, with the downside of having overflow and precision loss errors. As ASL is for specification over execution, efficiency is of no concern so using the mathematical types prevents a whole class of errors. The final primitive type used by ASL is the bitstring; a fixed-length sequence of 0s and 1s. Some readers may wonder what the difference is between this type and Boolean arrays. The justification given by [cite/t:@arm/DDI0553B.s §E1.2.2] is more philosophical than practical: \ldquo{}bitstrings are the only concrete data type in pseudocode\rdquo{}. In some places, bitstrings can be used in arithmetic operations, by first converting them to an unsigned integer. ASL types have all of the associated standard operations, including equality, ordering, Boolean connectives and arithmetic. The most interesting operation in ASL is bitstring slicing. First, there is no type for a bit outside a bitstring, so slicing always returns a bitstring. Slicing then works in much the same way as array slicing in languages like Python and Rust; slicing an integer range from a bitstring returns a new bitstring with values corresponding to the indexed bits. The other special feature of bitstring slicing is that it can also slice integers. In that case, ASL \ldquo{}treats an integer as equivalent to a sufficiently long [\ldots] bitstring\rdquo{} [cite:@arm/DDI0553B.s §E1.3.3]. The final interesting difference between ASL and most imperative languages is the variety of top-level items. ASL has three item forms: procedures, functions and array-like functions. Procedures and functions behave like procedures and functions of other imperative languages. Their arguments are passed by value, and the only difference between the two is that procedures do not return values whilst functions do [cite:@arm/DDI0553B.s §E1.4.2]. Array-like functions act as getters and setters for machine state. Every array-like function has a reader form, and most have a writer form. This distinction exists because \ldquo{}reading from and writing to an array element require different functions\rdquo{}, [cite:@arm/DDI0553B.s §E1.4.2], due to some machine registers being read-only instead of read-writeable. The writer form acts as one of the targets of assignment expressions, along with variables and the result of bitstring concatenation and slicing [cite:@arm/DDI0553B.s §E1.3.5]. An example of ASL is given in [[*Example Armv8-M Instruction Models in AMPSL]], alongside an AMPSL model of it. ** M-profile Vector Extension The M-profile vector extension adds vector instructions to the Armv8-M architecture. A vector in this case is a 128-bit register, logically split into four 32-bit beats. Each beat is then divided into one, two or four lanes each of 32, 16 or 8 bits respectively [cite:@arm/DDI0553B.s §B5.3]. A processor can execute either one, two or four instruction beats in an \ldquo{}architecture tick\rdquo{}, an atomic unit of processor time [cite:@arm/DDI0553B.s §\(\texttt{I}_\texttt{PCBB}\)]. The number of beats executed per instruction can also change throughout execution. Processors are allowed but not required to execute two vector instructions concurrently [cite:@arm/DDI0553B.s §B5.3]. To summarise the overlap rules, at least the first two beats of the current instruction must be completed before executing the next one. Then, at most the first two beats of that instruction can be executed before the current instruction is finished. ** Hoare Logic Hoare logic is a proof system for programs written in imperative programming languages. At its core, the logic describes how to build partial correctness triples, which describe how program statements affect assertions about machine state. The bulk of a Hoare logic derivation is dependent only on the program syntax the proof targets. A partial correctness triple is a relation between a precondition \(P\), a program statement \(s\) and a postcondition \(Q\). If \(\{P\} s \{Q\}\) is a partial correctness triple, then whenever \(P\) holds for some machine state, then when executing \(s\), \(Q\) holds for the state after it terminates [cite:@10.1145/363235.363259]. This is a /partial/ correctness triple because the postcondition only holds if \(s\) terminates. When all statements terminate, this relation is called a correctness total triple. #+name: WHILE-Hoare-logic #+caption: Hoare logic rules for the WHILE language. #+begin_figure \begin{center} \begin{prooftree} \infer0[SKIP]{\{P\}\;\texttt{s}\;\{P\}} \infer[rule style=no rule,rule margin=3ex]1{\{P\}\;\texttt{s₁}\;\{Q\}\qquad\{Q\}\;\texttt{s₂}\;\{R\}} \infer1[SEQ]{\{P\}\;\texttt{s₁;s₂}\;\{Q\}} \infer0[ASSIGN]{\{P[\texttt{x}/\texttt{v}]\}\;\texttt{x:=v}\;\{P\}} \infer[rule style=no rule,rule margin=3ex]1{\{P \wedge \texttt{e}\}\;\texttt{s₁}\;\{Q\}\qquad\{P \wedge \neg \texttt{e}\}\;\texttt{s₂}\;\{Q\}} \infer1[IF]{\{P\}\;\texttt{if e then s₁ else s₂}\;\{Q\}} \infer[rule style=no rule,rule margin=3ex]2{\{P \wedge \texttt{e}\}\;\texttt{s}\;\{P\}} \infer1[WHILE]{\{P\}\;\texttt{while e do s}\;\{P \wedge \neg \texttt{e}\}} \infer[rule style=no rule,rule margin=3ex]1{\models P_1 \rightarrow P_2\qquad\{P_2\}\;\texttt{s}\;\{Q_2\}\qquad\models Q_2 \rightarrow Q_1} \infer1[CSQ]{\{P_1\}\;\texttt{s}\;\{Q_1\}} \end{prooftree} \end{center} #+end_figure [[WHILE-Hoare-logic]] shows the rules [cite/t:@10.1145/363235.363259] introduced for the WHILE language. There are two broad classes of Hoare logic rules. The structural rules SKIP, SEQ, IF, ASSIGN and WHILE reflect how program syntax affects program execution, and thus how to modify the precondition and postcondition assertions accordingly. The adaptation rule CSQ uses the same statement in the premise and conclusion changing only the preconditions and postconditions used. The SKIP and SEQ rules reflect the idea that the skip statement has no effect on state, and sequencing statements composes their effects. The IF rule is also uncomplicated. No matter which branch is taken, the postcondition remains the same; an if statement does no computation after executing a branch. The branch choice depends on the value of ~e~. Because this value is known before executing a branch, it is added to the preconditions in the premises. The ASSIGN rule is the most unintuitive structural rule. In the postcondition, any use of ~x~ can be replaced by ~v~ and, due to the assignment, the assertion maintains its truth value. In the precondition ~x~ could have any value. By applying the substitution of ~v~ for ~x~ to the precondition, the fact that ~x~ changes value is irrelevant. The final structural Hoare logic rule for the WHILE language is the WHILE rule. This rule can be derived by observing that a while statement is a fixed-point. As ~while e do s~ is equivalent to ~if e then (s ; while e do s) else skip~, the IF, SEQ and SKIP rules can be used to solve the recursion equations for the precondition and postcondition of the while statement. The only adaptation rule in WHILE's Hoare logic is the rule of consequence, CSQ. CSQ is necessary in this Hoare logic so that the assertions can be manipulated into forms suitable for use by each structural rule. Other forms of Hoare logic, like the one for AMPSL given in [[*Hoare Logic Semantics]], make adaptation rules redundant. [cite/t:@10.1145/363235.363259] does not specify the logic used to evaluate the implications in rule premises. Regular choices are first-order logic and higher-order logic [cite:@10.1007/s00165-019-00501-3;@10.1007/s001650050057]. For specifying program behaviour, having auxiliary variables in the logic is vital [cite:@10.1007/s001650050057]. Auxiliary variables are a set of variables that cannot be used within a program, so remain constant between the precondition and postcondition. This allows for proofs about arbitrary values without requiring them to appear in the program. ** Agda Agda is a dependently-typed proof assistant and functional programming language, based on Martin-Löf's type theory [cite:@10.1007/978-3-642-03359-9_6]. [cite/t:@10.1007/978-3-642-03359-9_6] provide an excellent introduction to the language. I will now summarise the most important Agda features for the implementation of AMPSL. *Inductive families*. Agda generalises ML-style datatypes allowing them to be indexed by values as well as types. This is best illustrated by an example. Fixed-length vectors can be defined by the following snippet: #+begin_src agda2 data Vec (A : Set) : (n : ℕ) → Set where [] : Vec A 0 _∷_ : ∀ {n} → A → Vec A n → Vec A (suc n) #+end_src Note the type of ~Vec~. It is a function that accepts a type (or ~Set~) ~A~ and a natural number, and returns a type. ~A~ is a /parameter/ of ~Vec~ whilst ~n~ is an /index/. Parameters are required to be the same for all constructors, whilst indices can vary between constructors [cite:@agda.readthedocs.io p. \texttt{language/data-types.html}]. Whilst parameter values must be constant across the constructor return values, they can vary within the constructor arguments, even for the same type. One example of this is the ~Assertion~ type given in [[*Hoare Logic Assertions]]. The ~all~ and ~some~ constructors both accept an ~Assertion Σ Γ (t ∷ Δ)~, but because they return an ~Assertion Σ Γ Δ~ the definition is valid. *Propositional equality*. One very important datatype in Agda is propositional equality, shown in the following snippet: #+begin_src agda2 data _≡_ {A : Set} : A → A → Set where refl : ∀ {x} → x ≡ x #+end_src The only constructor, ~refl~, requires that the two values in the type are identical. Hence whenever there is a value of type ~x ≡ y~, ~x~ and ~y~ have the same value, even when Agda cannot compute that value. One useful propositional-equality eliminator is in the ~subst~ function: #+begin_src agda2 subst : (B : A → Set) → x ≡ y → B x → B y subst _ refl Bx = Bx #+end_src Given a proof ~x ≡ y~ that ~x~ and ~y~ are equal, this function makes uses of ~x~ and ~y~ within types interchangeable. *Parameterised modules and records*. Agda modules can accept parameters, which can be used anywhere in the module. This works well with Agda's record types, whose fields are able to depend on values of other fields. The following snippet shows how records can be used to define a monoid: #+begin_src agda2 record Monoid ℓ : Set (ℓsuc ℓ) where infixl 5 _∙_ field Carrier : Set ℓ _∙_ : Op₂ Carrier ε : Carrier ∙-cong : ∀ {x y u v} → x ≡ y → u ≡ v → x ∙ y ≡ u ∙ v ∙-assoc : ∀ {x y z} → (x ∙ y) ∙ z ≡ x ∙ (y ∙ z) ∙-idˡ : ∀ {x} → ε ∙ x ≡ x ∙-idʳ : ∀ {x} → x ∙ ε ≡ x #+end_src This record bundles together an underlying ~Carrier~ type with a binary operator ~_∙_~ and identity element ~ε~. It also contains all the proofs necessary to show that ~_∙_~ and ~ε~ form a monoid. Note that unlike the previous ~Vec~ example, ~Set~ has now been given a parameter. This is to encode different universe levels within Agda. As ~Set~ is a type within Agda, it must itself have a type. If ~Set~ was within ~Set~, Agda would be subject to a logical inconsistency known as Girard's paradox [cite:@cs.cmu.edu/girard-72-thesis]. Thus, ~Set~ has type ~Set 1ℓ~, and ~Set i~ has type ~Set (ℓsuc i)~. When a module is parameterised by a ~Monoid~, then the module has an abstract monoid. It can use the structure and laws given in the record freely, but it cannot use additional laws (e.g. commutativity) without an additional argument. This is useful when a type's operations and properties are well-defined, but a good representation is unknown. *Instance arguments* Instance arguments are analogous to the type class constraints found in Haskell [cite:@agda.readthedocs.io p. \texttt{language/instance-arguments.html}]. They are a special form of implicit argument that are solved via /instance resolution/ over unification. Instance arguments are a good solution for cases where Agda tries \ldquo{}too hard\rdquo{} to find a solution for implicit arguments, and needs the implicit arguments to be specified explicitly. Using instance arguments instead can force a particular solution onto Agda, reducing the volume of explicit code. * Related Work There exist many formal verification tools designed to describe either ISA instruction semantics or prove algorithms correct. This section describes some significant work in the field and how the AMPSL's design improves upon it. ** Sail Sail [cite:@10.1145/3290384] is a language for describing processor instruction-set architecture semantics. It has a syntax similar to the pseudocode specification of most architectures and a first-order type system with dependent bitvector and numeric types. It is officially used by [cite/t:@riscv/spec-20191213] to specify the RISC-V concurrent memory semantics. Sail has many different backends available, including sequential emulators, concurrency models and theorem-prover definitions. Further, there are tools to automatically translate documents from the Arm Specification Language into Sail [cite:@10.1145/3290384]. Despite Sail's advantages over other solutions, including AMPSL, using Sail in this project is not suitable for a few reasons. First is a lack of documentation for the Sail theorem-proving backends. Without prior knowledge in using Sail, deciphering the automatically-generated statements from Sail would likely consume more time than creating AMPSL. Another reason to avoid Sail is the unnecessary complexity in modelling the ISA semantics. One of Sail's highlights is in its description of architectural memory models. However, this work attempts to verify the functional correctness of NTT, an algorithm with very little memory usage. Creating a simpler language like AMPSL removes the need to reason about these complex memory interactions and focus on the computation itself. ** Other Functional Correctness Tools Various tools exist for proving functional correctness of programs. These include tools that target C such as CryptoLine [cite:@10.1145/3319535.3354199], Fiat Crypto [cite:@10.1109/SP.2019.00005], Frama-C [cite:@10.1007/s00165-014-0326-7] and VST [cite:@10.1007/978-3-642-19718-5_1], as well as tools that target assembly directly such as Jasmin [cite:@10.1145/3133956.3134078] and Vale [cite:@10.1145/3290376]. There are two distinct problems with using these tools to verify that a pre-existing NTT implementation for Armv8-M is functionally correct: - These tools do not accept assembly as an input language. This means they are unable to verify an existing assembly algorithm. - These tools do not target Armv8-M assembly as output. Jasmin and Vale, whilst targeting assembly, do not currently target the Armv8-M architecture, let alone the M-profile vector extension. The other tools target C and require using verified compiler, of which none currently target Armv8-M. The most similar tool to what this project is trying to achieve is a formal verification tool by [cite/t:@10.1145/3391900], which targets the REDFIN instruction set. REDFIN has much less complex semantics than Armv8-M, to the point where the semantics can be expressed directly without the need for a specification language, making using REDFIN in proofs much easier than Armv8-M instructions. * Design of AMPSL and its Semantics In this chapter I introduce AMPSL, an Agda embedding of ASL. I also describe AMPSL's semantics via a denotational semantics that interprets AMPSL expressions and statements as Agda functions. One downside of AMPSL's simple denotational semantics is that control flow for looping constructs is fully evaluated. This is inefficient for loops that undergo many iterations. This can be resolved by a syntax-directed Hoare logic for AMPSL. ** AMPSL Syntax ASL has some small features that make it difficult to work with in Agda directly. AMPSL makes minor changes to ASL to better facilitate this embedding, typically generalising existing ASL features. *** AMPSL Types #+name: AMPSL-types #+caption: The Agda datatype representing the primitive AMPSL types. #+attr_latex: :float t #+begin_src agda2 data Type : Set where bool : Type int : Type fin : (n : ℕ) → Type real : Type tuple : Vec Type n → Type array : Type → (n : ℕ) → Type #+end_src [[AMPSL-types]] gives the Agda datatype representing the AMPSL types. Most of these have a direct analogue to ASL types. Instead of an enumeration construct, AMPSL uses the ~fin n~ type, representing a set of ~n~ elements. Similarly, structs are represented by ~tuple~ types. The most significant difference between ASL and AMPSL is the representation of bitstrings. Instead of a separate bitstring type, AMPSL uses Boolean arrays. This lets AMPSL generalise some ASL operations to other array types and makes AMPSL more expressive. ASL specifies three different type properties: equality comparisons, order comparisons and arithmetic operations. Whilst using the relevant operations in AMPSL requires a proof that the types have the required property, using instance arguments allows for these proofs to be elided in almost all cases. AMPSL maintains the ASL type properties with two additions. First, array types have equality if the enumerated type also has equality. This is a natural generalisation of equality and allows for AMPSL's formulation of bitstrings as Boolean arrays to have equality. Second, finite sets also have ordering. This change is primarily a convenience feature for comparing finite sets representing an integer subset. The final interesting type feature in AMPSL is implicit coercion for arithmetic. As ASL arithmetic is polymorphic for integers and reals, AMPSL needs a function to decide the result type. By describing the output type as a function on the input types, the same constructor can be used for all combinations of numeric inputs. *** AMPSL Expressions #+name: AMPSL-literalType #+caption: Mappings from AMPSL types into Agda types for specifying literals. #+begin_src agda literalType : Type → Set literalType bool = Bool literalType int = ℤ literalType (fin n) = Fin n literalType real = ℤ literalType (tuple ts) = literalTypes ts literalType (array t n) = Vec (literalType t) n #+end_src Unlike ASL, where only a few types have literal expressions, every type in AMPSL has a literal form. This mapping is given by the ~literalType~ function, shown in [[AMPSL-literalType]]. Most AMPSL literals accept the corresponding Agda type as a value. The only exception to this rule is for ~real~ values. As Agda does not have a type representing mathematical reals, integers are used instead. This is sufficient as any literal real value in ASL is represented as a decimal; a rational value. #+name: AMPSL-expr-prototypes #+caption: Agda declarations of the AMPSL program elements. #+attr_latex: :float t #+begin_src agda data Expression (Σ : Vec Type o) (Γ : Vec Type n) : Type → Set data Reference (Σ : Vec Type o) (Γ : Vec Type n) : Type → Set data LocalReference (Σ : Vec Type o) (Γ : Vec Type n) : Type → Set data Statement (Σ : Vec Type o) (Γ : Vec Type n) : Set data LocalStatement (Σ : Vec Type o) (Γ : Vec Type n) : Set data Function (Σ : Vec Type o) (Γ : Vec Type n) (ret : Type) : Set data Procedure (Σ : Vec Type o) (Γ : Vec Type n) : Set #+end_src #+name: AMPSL-grammar #+caption: AMPSL's grammar. The formal Agda definition is in the provided #+caption: source code. #+begin_figure \begin{align*} \mathrm{Natural}\;{}n = & \texttt{0} \mid \texttt{suc}\;{}n \\ \mathrm{Fin}\;{}i = & \texttt{0F} \mid \texttt{suc}\;{}i \\ \mathrm{LocalReference}\;{}R = & \texttt{var}\;{}i \mid \\ & \texttt{[}\;{}R\;{}\texttt{]} \mid \texttt{unbox}\;{}R \mid \texttt{cast eq}\;{}e \\ & \texttt{merge}\;{}R\;{}R\;{}R \mid \texttt{slice}\;{}R\;{}R \mid \texttt{cut}\;{}R\;{}R \mid \\ & \texttt{head}\;{}R \mid \texttt{tail}\;{}R \\ \mathrm{Reference}\;{}r = & \textrm{Like LocalReference} \mid \texttt{state}\;{}i\\ \mathrm{Expression}\;{}e = & \textrm{Like Reference} \mid \texttt{lit}\;{}x \mid e\;{}\texttt{≟}\;{}e \mid e\;{}\texttt{>}\;{}n \mid e\;{}\texttt{\textasciicircum}\;{}n \mid \texttt{rnd}\;{}e \mid \\ & \texttt{fin}\;{}f\;{}e \mid \texttt{asInt}\;{}e \mid \\ & \texttt{nil} \mid \texttt{cons}\;{}e\;{}e \mid \\ & \texttt{call}\;{}F\;{}es \mid \texttt{if}\;{}e\;{}\texttt{then}\;{}e\;{}\texttt{else}\;{}e \\ \mathrm{LocalStatement}\;{}S = & \texttt{skip} \mid S\;{}\texttt{∙}\;{}S \mid R\;{}\texttt{≔}\;{}e \mid \\ & \texttt{declare}\;{}e\;{}S \mid \texttt{for}\;{}n\;{}S \mid \\ & \texttt{if}\;{}e\;{}\texttt{then}\;{}S \mid \texttt{if}\;{}e\;{}\texttt{then}\;{}S\;{}\texttt{else}\;{}S \\ \mathrm{Statement}\;{}s = & \textrm{Like LocalStatement} \mid \\ & r\;{}\texttt{≔}\;{}e \mid \texttt{invoke}\;{}P\;{}es \\ \mathrm{Function}\;{}F = & \texttt{init}\;{}e\;{}\texttt{∙}\;{}S\;{}\texttt{end} \\ \mathrm{Procedure}\;{}P = & s\;{}\texttt{∙end} \end{align*} #+end_figure [[AMPSL-expr-prototypes]] lists the Agda data type declarations corresponding to the AMPSL program elements. The grammar is summarised in [[AMPSL-grammar]]. Each type is parameterised by two variable contexts: Σ for global variables and Γ for local variables. The two variable contexts are split to simplify the types for function calls and procedure invocations. As the set of global variables does not change across a program, functions and procedures keep the same value of parameter Σ in their types. As functions and procedures have different local variables than the calling context, having the local variable context as a separate parameter makes the change simple. An ~Expression~ in AMPSL corresponds to expressions in ASL. Many operators are identical to those in ASL, and others are simple renamings (like ~≟~ for ~==~ for equality comparisons). Unlike ASL, where literals can appear unqualified, AMPSL literals are introduced by the ~lit~ constructor. The most immediate change for programming in AMPSL versus ASL is how variables are handled. Because the ~Expression~ type depends on the values of variable contexts, a variable is referred to by its index into the context. For example, a variable context \(\{x \mapsto \mathrm{int}, y \mapsto \mathrm{real}\}\) is represented in AMPSL as the context ~int ∷ real ∷ []~. The variable \(x\) is then represented by ~var 0F~ in AMPSL, where the ~F~ indicates the index is from a finite set. Because the global and local variable contexts are disjoint for the ~Expression~ type, variables are constructed using ~state~ or ~var~ respectively. Whilst this decision makes programming using AMPSL more complex, it greatly simplifies the language for use in constructing proofs. AMPSL expressions also add a number of useful constructs to ASL. One such pair are ~[_]~ and ~unbox~, which construct and destruct a length-one array respectively. Others are ~fin~, which allows for arbitrary computations on elements of finite sets, and ~asInt~, which converts a finite value into an integer. The final three noteworthy operators are ~merge~, ~slice~ and ~cut~. These all perform operations on arrays, by either merging two together, taking out a slice, or dropping a slice. Unlike ASL where bitstring slicing requires a range, these three operators use Agda's dependent types and type inference so that only a base offset is necessary. ~slice xs i~, like bitstring slicing, extracts a contiguous subset from an array ~xs~, such that the first element in ~slice xs i~ is in ~xs~ at position ~i~. ~cut xs i~ returns the remainder of ~slice xs i~; the two ends of ~xs~ not in the slice, concatenated. Finally, ~merge xs ys i~ joins ~xs~ and ~ys~ to form a product-projection triple with ~slice~ and ~cut~. The ~Reference~ type is the name AMPSL gives to assignable expressions from ASL. The ~LocalReference~ type is identical to ~Reference~, except it does not include global variables. Due to complications in the semantics of multiple assignments to one location, \ldquo{}product\rdquo operations like ~merge~ and ~cons~ are excluded from being references, despite concatenated bitstrings and tuples being assignable expressions in ASL. Whilst [cite/t:@arm/DDI0553B.s §E1.3.3] requires that no position in a bitstring is referenced twice, enforcing this in AMPSL for ~merge~ and ~cons~ would make their use unergonomic in practice for writing code or proofs. **** Example AMPSL Expressions One arithmetic operator used in ASL is left shift. [cite/t:@arm/DDI0553B.s §E1.3.4] explains how this can be encoded using other arithmetic operators in AMPSL, as shown below: #+begin_src agda2 _<<_ : Expression Σ Γ int → (n : ℕ) → Expression Σ Γ int e << n = e * lit (ℤ.+ (2 ℕ.^ n)) #+end_src There is a lot of hidden complexity here. First, consider the literal statement's type. The unary plus operation indicates the literal is an Agda integer. However, there are two AMPSL types with Agda integers for literal values: ~int~ and ~real~. The multiplication result must be an ~int~, and the first argument is also an ~int~. Because multiplication's type is determined by implicit coercion, Agda can unwind the function defining the coercion and finds that the literal must be an ~int~. Another pseudocode operation not yet described in AMPSL is integer slicing. Here is an expression that slices a single bit from an integer, following the procedure by [cite/t:@arm/DDI0553B.s §E1.3.3]: #+begin_src agda2 getBit : ℕ → Expression Σ Γ int → Expression Σ Γ bit getBit i x = inv (x - ((x >> suc i) << suc i) > toℕ esize in let result′,sat = call (SignedSatQ (toℕ esize-1) (value ∷ [])) in let sat = head (tail result′,sat) in let result′ = head result′,sat in ,*index-32 size result i ≔ result′ ∙ if sat && hasBit elmtMask (fin e*esize>>3 (tup (i ∷ []))) then FPSCR-QC ≔ lit true )) ∙ invoke copyMasked (lit acc ∷ result ∷ curBeat ∷ elmtMask ∷ []) ))) ∙end where open Instr.VQRDMULH d op₂ = let curBeat = head (var 3F) in let i = var 0F in [ (λ src₂ → index-32 size (index (! R) (lit src₂)) i) , (λ src₂ → index-32 size (! Q[ lit src₂ , curBeat ]) i) ]′ src₂ rVal = lit 1ℤ << toℕ esize-1 #+end_src \newpage ~VMLA~ in AMPSL: #+begin_src agda2 vmla : Instr.VMLA → Procedure State [] vmla = declare (call GetCurInstrBeat []) ( let curBeat = head (var 0F) in declare (! Q[ lit src₁ , curBeat ]) ( declare (lit (Vec.replicate false)) ( let elmtMask = head (tail (var 2F)) in let curBeat = head (var 2F) in let result = var 0F in for (toℕ elements) ( let curBeat = head (var 3F) in let op₁ = var 2F in let element₁ = call sint (index-32 size op₁ i ∷ []) in let result = var 1F in let i = var 0F in let op₂ = ! Q[ lit acc , curBeat ] in let element₃ = call sint (index-32 size op₂ i ∷ []) in ,*index-32 size result i ≔ call (sliceⁱ 0) ((element₁ * element₂ + element₃) ∷ []) )) ∙ invoke copyMasked (lit acc ∷ result ∷ curBeat ∷ elmtMask ∷ []) )) ∙end where open Instr.VMLA d element₂ = call sint (index-32 size (index (! R) (lit src₂)) (lit 0F) ∷ []) #+end_src #+latex: \label{lastpage} #+latex: %TC:endignore # LocalWords: AMPSL Hoare NTT PQC structs bitstring bitstrings