From bd1f6b5eefe15c8f5fa73da2d1fc4b36705bfe0e Mon Sep 17 00:00:00 2001 From: JJ Date: Sun, 29 Sep 2024 15:20:09 -0700 Subject: meow --- 404.html | 6 +- computation/index.md | 11 - cpsc/index.md | 11 + index.html | 6 +- ling/english-specifier.png | Bin 0 -> 8373 bytes ling/glossary.md | 4 + ling/head-final.png | Bin 0 -> 4644 bytes ling/head-initial.png | Bin 0 -> 4848 bytes ling/index.md | 12 + ling/left-adjunct.png | Bin 0 -> 4733 bytes ling/no-subject-movement.png | Bin 0 -> 14696 bytes ling/pragmatics.md | 40 ++ ling/right-adjunct.png | Bin 0 -> 5163 bytes ling/semantics.md | 89 +++ ling/subject-movement.png | Bin 0 -> 19203 bytes ling/syntax.md | 556 +++++++++++++++++++ ling/tense-ed.png | Bin 0 -> 13876 bytes ling/tense-no-tense.png | Bin 0 -> 10820 bytes ling/tense-null.png | Bin 0 -> 13530 bytes ling/tense-s.png | Bin 0 -> 14158 bytes ling/tense-will.png | Bin 0 -> 12780 bytes ling/types.md | 4 + linguistics/english-specifier.png | Bin 8373 -> 0 bytes linguistics/glossary.md | 4 - linguistics/head-final.png | Bin 4644 -> 0 bytes linguistics/head-initial.png | Bin 4848 -> 0 bytes linguistics/index.md | 12 - linguistics/left-adjunct.png | Bin 4733 -> 0 bytes linguistics/no-subject-movement.png | Bin 14696 -> 0 bytes linguistics/pragmatics.md | 40 -- linguistics/right-adjunct.png | Bin 5163 -> 0 bytes linguistics/semantics.md | 89 --- linguistics/subject-movement.png | Bin 19203 -> 0 bytes linguistics/syntax.md | 556 ------------------- linguistics/tense-ed.png | Bin 13876 -> 0 bytes linguistics/tense-no-tense.png | Bin 10820 -> 0 bytes linguistics/tense-null.png | Bin 13530 -> 0 bytes linguistics/tense-s.png | Bin 14158 -> 0 bytes linguistics/tense-will.png | Bin 12780 -> 0 bytes linguistics/types.md | 4 - math/algebra.md | 69 +++ math/analysis.md | 6 + math/category-theory.md | 5 + math/coding-theory.md | 1049 +++++++++++++++++++++++++++++++++++ math/foundations.md | 6 + math/index.md | 12 + math/information-theory.md | 6 + math/linear-algebra.md | 666 ++++++++++++++++++++++ math/logic.md | 154 +++++ math/number-theory.md | 441 +++++++++++++++ math/proof.md | 5 + mathematics/algebra.md | 69 --- mathematics/analysis.md | 6 - mathematics/category-theory.md | 5 - mathematics/coding-theory.md | 1049 ----------------------------------- mathematics/foundations.md | 6 - mathematics/index.md | 12 - mathematics/information-theory.md | 6 - mathematics/linear-algebra.md | 666 ---------------------- mathematics/logic.md | 154 ----- mathematics/number-theory.md | 441 --------------- mathematics/proof.md | 5 - 62 files changed, 3141 insertions(+), 3141 deletions(-) delete mode 100644 computation/index.md create mode 100644 cpsc/index.md create mode 100644 ling/english-specifier.png create mode 100644 ling/glossary.md create mode 100644 ling/head-final.png create mode 100644 ling/head-initial.png create mode 100644 ling/index.md create mode 100644 ling/left-adjunct.png create mode 100644 ling/no-subject-movement.png create mode 100644 ling/pragmatics.md create mode 100644 ling/right-adjunct.png create mode 100644 ling/semantics.md create mode 100644 ling/subject-movement.png create mode 100644 ling/syntax.md create mode 100644 ling/tense-ed.png create mode 100644 ling/tense-no-tense.png create mode 100644 ling/tense-null.png create mode 100644 ling/tense-s.png create mode 100644 ling/tense-will.png create mode 100644 ling/types.md delete mode 100644 linguistics/english-specifier.png delete mode 100644 linguistics/glossary.md delete mode 100644 linguistics/head-final.png delete mode 100644 linguistics/head-initial.png delete mode 100644 linguistics/index.md delete mode 100644 linguistics/left-adjunct.png delete mode 100644 linguistics/no-subject-movement.png delete mode 100644 linguistics/pragmatics.md delete mode 100644 linguistics/right-adjunct.png delete mode 100644 linguistics/semantics.md delete mode 100644 linguistics/subject-movement.png delete mode 100644 linguistics/syntax.md delete mode 100644 linguistics/tense-ed.png delete mode 100644 linguistics/tense-no-tense.png delete mode 100644 linguistics/tense-null.png delete mode 100644 linguistics/tense-s.png delete mode 100644 linguistics/tense-will.png delete mode 100644 linguistics/types.md create mode 100644 math/algebra.md create mode 100644 math/analysis.md create mode 100644 math/category-theory.md create mode 100644 math/coding-theory.md create mode 100644 math/foundations.md create mode 100644 math/index.md create mode 100644 math/information-theory.md create mode 100644 math/linear-algebra.md create mode 100644 math/logic.md create mode 100644 math/number-theory.md create mode 100644 math/proof.md delete mode 100644 mathematics/algebra.md delete mode 100644 mathematics/analysis.md delete mode 100644 mathematics/category-theory.md delete mode 100644 mathematics/coding-theory.md delete mode 100644 mathematics/foundations.md delete mode 100644 mathematics/index.md delete mode 100644 mathematics/information-theory.md delete mode 100644 mathematics/linear-algebra.md delete mode 100644 mathematics/logic.md delete mode 100644 mathematics/number-theory.md delete mode 100644 mathematics/proof.md diff --git a/404.html b/404.html index e710c9f..e046371 100644 --- a/404.html +++ b/404.html @@ -8,9 +8,9 @@ title: 404
diff --git a/computation/index.md b/computation/index.md deleted file mode 100644 index ce13a58..0000000 --- a/computation/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -layout: computation -title: computation ---- - -
-
-
-
- -# computation is *well-defined* execution diff --git a/cpsc/index.md b/cpsc/index.md new file mode 100644 index 0000000..ce13a58 --- /dev/null +++ b/cpsc/index.md @@ -0,0 +1,11 @@ +--- +layout: computation +title: computation +--- + +
+
+
+
+ +# computation is *well-defined* execution diff --git a/index.html b/index.html index e4982d3..37e0131 100644 --- a/index.html +++ b/index.html @@ -46,9 +46,9 @@

scattered thoughts

diff --git a/ling/english-specifier.png b/ling/english-specifier.png new file mode 100644 index 0000000..4500211 Binary files /dev/null and b/ling/english-specifier.png differ diff --git a/ling/glossary.md b/ling/glossary.md new file mode 100644 index 0000000..5bfb895 --- /dev/null +++ b/ling/glossary.md @@ -0,0 +1,4 @@ +--- +layout: linguistics +title: linguistics/glossary +--- diff --git a/ling/head-final.png b/ling/head-final.png new file mode 100644 index 0000000..bbce198 Binary files /dev/null and b/ling/head-final.png differ diff --git a/ling/head-initial.png b/ling/head-initial.png new file mode 100644 index 0000000..920f94e Binary files /dev/null and b/ling/head-initial.png differ diff --git a/ling/index.md b/ling/index.md new file mode 100644 index 0000000..074db20 --- /dev/null +++ b/ling/index.md @@ -0,0 +1,12 @@ +--- +layout: linguistics +title: linguistics +--- + +
+
+
+
+
+ +# linguistics is the study of *language* diff --git a/ling/left-adjunct.png b/ling/left-adjunct.png new file mode 100644 index 0000000..306483d Binary files /dev/null and b/ling/left-adjunct.png differ diff --git a/ling/no-subject-movement.png b/ling/no-subject-movement.png new file mode 100644 index 0000000..3f87211 Binary files /dev/null and b/ling/no-subject-movement.png differ diff --git a/ling/pragmatics.md b/ling/pragmatics.md new file mode 100644 index 0000000..d1b181a --- /dev/null +++ b/ling/pragmatics.md @@ -0,0 +1,40 @@ +--- +layout: linguistics +title: linguistics/pragmatics +--- + +# pragmatics + +Pragmatics is the study of **context**. + +How do we convey meaning not explicitly expressed?
+How can we unambiguously understand ambiguous sentences?
+How does the listener *reconstruct* the speaker's intended meaning? + +## Basic Principles + +### Cooperation + +The *Principle of Cooperation* states that **language is a fundamentally collaborative action**. When we speak to one another, we attempt to convey the pure notion of meaning through the fallible medium of words. Success of such speech requires both the speaker and the listener to operate under shared assumptions, in order to reconstruct intentions not explicitly communicated. + +The Principle of Cooperation is composed entirely of several sub-principles. +- **quantity**: Say as much as necessary. Do not say more than necessary. +- **quality**: Do not say that which you believe false. Do not say that which you lack evidence. +- **relation**: Be relevant. +- **manner**: Avoid obscurity. Avoid ambiguity. Be brief. Be orderly. + +These principles are also referred to as *Grice's Maxims* (named such after Paul Grice, a strong candidate for being the single most influential person on the field of pragmatics) can be arguably all bundled up into one core notion of **relevance**. (This is a hot topic of debate in the philosophy side of pragmatics, though). + +As in all of linguistics: The Cooperative Principle is an *observation*, meant to *model* language's implicatures. It is not a hard and fast rule (in so much as hard and fast rules even exist in a descriptive framework), but even as rules go, it's not particularly strict: it can be violated, and indeed, often is. + +The ways in which speakers can behave in a conversation, relative to the Principle of Cooperation, fall into the following categories: +- One can **observe** the principle, directly conforming to it +- One can **violate** the principle, subtly and misleadingly violating it +- One can **flout** the principle, clearly and straightforwardly violating it +- One can **opt out** of the principle, directly expressing an intention to not play by the rules of conversation. + +## References + +- Introduction to Pragmatics +- [Analyzing Meaning](https://langsci-press.org/catalog/book/231) +- [Invitation to Formal Semantics](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) diff --git a/ling/right-adjunct.png b/ling/right-adjunct.png new file mode 100644 index 0000000..12f3ba0 Binary files /dev/null and b/ling/right-adjunct.png differ diff --git a/ling/semantics.md b/ling/semantics.md new file mode 100644 index 0000000..747d9c3 --- /dev/null +++ b/ling/semantics.md @@ -0,0 +1,89 @@ +--- +layout: linguistics +title: linguistics/semantics +--- + +# notes on semantics + +Semantics is the study of **meaning**. + +How do we know what sentences are true and which are false?
+What does it *mean* for a sentence to be true?
+What conditions must hold for a sentence to be true? + +Formal semantics attempts to answer those questions by providing a *framework* for determining what *conditions* must hold for a sentence to be true. + +This framework is [first-order/predicate logic](../math/logic) and the [simply-typed lambda calculus](../plt/lambda-calculus). On top of this, we often build set theory, relying on *characteristic functions* of the lambda calculus as denotations of *set membership*. + + +## Basic Principles + +### Compositionality + +The *Principle of Compositionality* states that the meaning of a *constituent* is determined entirely by its *components*. This is *the* fundamental underlying principle behind formal logic and subsequently semantics. It holds for not just sentence composition (syntax), but also *word formation* (morphology), and what's of interest to us here - meaning (semantics). + +### Substitution + +The *Principle of Substitution* states that substituting one part of an expression with something else of the same meaning *preserves* the meaning of the expression as a whole. This might be thought of as a given, but semantics has its roots in philosophy, and philosophers care very much about enumerating their givens. + +### Predicate Logic & The Lambda Calculus + +Formal semantics begets a formal system for such semantics, and *first-order logic* and *the lambda calculus* are a natural fit. Semantics is the study of meaning - and what is logic but a system for expressing meaning? As discussed above, language functions by composition - and what are functions but their property of composition? + +[*An Invitation to Formal Semantics*](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) covers basic logic and the lambda calculus well in its first six chapters. Otherwise, for a worse introduction, see [logic](../math/logic), and [the lambda calculus](../plt/lambda-calculus). + +## Denotational Semantics + +With basic logic and the lambda calculus under our belt, we may simply get straight to assigning *meaning* to language. We consider two *basic types* to start: the type of entities, $e$, and the type of truth values, $t$. Our function types we denote by ordered pairs: that is, a function from $e$ to $t$ is of type $⟨e,t⟩$. This is perhaps clunkier notation than the type-theoretic $e→t$, but it is what it is. (And does avoid issues of precedence.) + +### Entities and Functions + +> *I am Alice.*
+> *Alice is bad.*
+> *The blue pigeon flew away.* + +- Noun: $⟨e,t⟩ ↝ λx.Noun(x)$ +- Verb (intransitive): $⟨e,t⟩ ↝ λx.Verb(x)$ +- Verb (transitive): $⟨e,⟨e,t⟩⟩ ↝ λy.λx.Verb(x, y)$ +- Verb (meaningless): $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λP.λx.P(x)$ +- Adj: $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λNoun.λx.[Adj(x) ∧ Noun(x)]$ + +- or (clausal): $⟨t,⟨t,t⟩⟩ ↝ λq.λp.[p ∨ q]$ +- and (clausal): $⟨t,⟨t,t⟩⟩ ↝ λq.λp.[p ∧ q]$ +- or (verbal): $⟨⟨e,t⟩,⟨⟨e,t⟩,⟨e,t⟩⟩⟩ ↝ λQ.λP.λx.[P(x) ∨ Q(x)]$ +- and (verbal): $⟨⟨e,t⟩,⟨⟨e,t⟩,⟨e,t⟩⟩⟩ ↝ λQ.λP.λx.[P(x) ∧ Q(x)]$ +- or (quantifiers): $⟨⟨e,⟨e,t⟩⟩,⟨⟨e,⟨e,t⟩⟩,⟨e,⟨e,t⟩⟩⟩⟩ ↝ λQ.λP.λy.λx.[P(x,y) ∨ Q(x,y)]$ +- and (quantifiers): $⟨⟨e,⟨e,t⟩⟩,⟨⟨e,⟨e,t⟩⟩,⟨e,⟨e,t⟩⟩⟩⟩ ↝ λQ.λP.λy.λx.[P(x,y) ∧ Q(x,y)]$ + +- not: $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λP.λx.¬P(x)$ + +### Quantification + +- every: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∀x.[P(x) → Q(x)]$ + - everything: $⟨⟨e,t⟩,t⟩ ↝ λP.∀x.P(x)$ +- some: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∃x.[P(x) ∧ Q(x)]$ + - something: $⟨⟨e,t⟩,t⟩ ↝ λP.∃x.P(x)$ +- no: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∀x.[P(x) → ¬Q(x)] (or λQ.λP.¬∃x.[P(x) ∧ Q(x)])$ + - nothing: $⟨⟨e,t⟩,t⟩ ↝ λP.¬∃x.P(x)$ (or $λP.∀x.¬P(x)$) + +### Reference + +### Numbers and Plurality + +### Event Semantics + +### Tense and Aspect + +## Beyond Truth + +### Necessity and Possibility + +### Command, Request, Obligation + +> *Alice, run!*
+> *Alice, please run.*
+> *Alice should run.* + +### Questions +## Resources +- ✨ [Invitation to Formal Semantics](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) diff --git a/ling/subject-movement.png b/ling/subject-movement.png new file mode 100644 index 0000000..1136f5e Binary files /dev/null and b/ling/subject-movement.png differ diff --git a/ling/syntax.md b/ling/syntax.md new file mode 100644 index 0000000..46708fa --- /dev/null +++ b/ling/syntax.md @@ -0,0 +1,556 @@ +--- +layout: linguistics +title: linguistics/syntax +--- + +# morphosyntax + +Morphology is the study of word formation. Syntax is the study of sentence formation.
+Specifically, both morphology and syntax focus on **structure**. + +The distinction between syntax and morphology varies cross-linguistically.
+They can be considered to form an overarching **morphosyntactic** theory. + +These notes are ordered in a way that I feel builds upon itself the best. This is not the order in which topics were covered in my syntax class, nor in my textbook. My syntax class covered Agree before Move, and my textbook deeply intertwined Merge with X'-theory and Move with Agree: and I think the both of them suffered a little bit pedagogically for that. + +Certainly, all of syntax cannot be taught at once. Yet the desire to generalize and apply what one has learned to real-world examples is strong, and it is extraordinarily difficult to teach syntax in a way that builds upon itself naturally. This is my best attempt, but it will fall flat in places: when it does, I do recommend either skipping ahead or being content with temporarily (hopefully temporarily) not knowing what's going on. + +
+Table of Contents + +- History of Syntax + - A wrong approach: Phrase Structure Rules +- Morphology [SKS 2] + - Syntactic Categories [SKS 2.1] + - Compositionality [SKS 2.3] + - Headedness [SKS 2.4] +- [Merge, Part I](#merge-part-i) + - Binary Branching + - Constituency [SKS 3] + - Heads, Specifiers and Complements + - [Silent Heads](#silent-heads) +- [Notation](#notation) + - X'-theory [SKS 6] + - [Bare Phrase Structure](#bare-phrase-structure) [n/a] + - Lexical Entries [SKS 6.8] +- [Minimalism](#minimalism) [n/a] +- [Merge, Part II](#merge-part-ii) + - [Projection](#projection) [SKS 5] + - Selection [SKS 8] +- [Move, Part I](#move-part-i) [SKS 8] + - [Affix Hopping](#affix-hopping) + - Verb Raising + - Subject-Auxiliary Inversion + - Head Movement [SKS 8.3] +- Move, Part II + - Wh- Movement [SKS 10] + - Topicalization + - Phrasal Movement + - [Subject Raising](#subject-raising) [SKS 12.4] +- Agree + - Theta Roles [SKS 6.8.1] + - Locality + - [Binding](#binding) [SKS 7] + - Raising & Control [SKS 9] +- Advanced Syntax + - [On languages other than English](#on-languages-other-than-english) + - Negation + - Ellipsis + - Shifting + - Scrambling + - Parsing +- References + +
+ +> Be warned! These notes are incomplete and almost certainly somewhat inaccurate. Proceed at your own risk. + +## history + +### a wrong approach: phrase structure rules + +## morphology + +### syntactic categories + +### compositionality + +### headedness + +## Merge, Part I + +We concluded the following from our excursion into morphology: +- words are composed of *morphemes* +- morphemes come in *categories* +- morphemes *combine in a regular fashion* +- morphemes can be *silent* + +Surprisingly (or unsurprisingly), we shall see that these ideas generalize to sentence structure as a whole. + +### constituency + +### heads, specifiers, and complements + +### silent heads + +Why are proper names $D$s? Why is it possible to say either *I moved the couches* and *I moved couches*, but only possible to say *I moved the couch* and not *I moved couch*? Why is the infinitive form of a verb identical to the present, in some cases? + +These inconsistencies can be all addressed by one (strange) concept: the idea of *silent morphemes*, invisible in writing and unpronounceable in speech. We represent such morphemes as ∅, and so may write the earlier strange sentence as *I moved ∅-couches*. + +These silent morphemes are extremely useful to our syntactic analyses. Consider our examples above. With silent morphemes, we can describe the verb *moved* as taking in two $D$s as arguments: *I* and *couches*. Without silent morphemes, we would have to account for both $D$ phrases and $N$ phrases as arguments, duplicating our lexical entry, destroying the structural similarity to *I moved the couches*, and ultimately duplicating our work. + +```latex +\begin{forest} +[V + [D [I, roof]] + [V_D + [V_{D,D} [moved]] + [D + [D_N [∅]] + [N [couches]]]]] +\end{forest} +``` + +So silent morphemes are extremely handy. But: what is stopping us from using an *excess* of silent morphemes? After all, if they're just morphemes, they can go anywhere. And if they're silent, we can't observe *where* they go. We will revisit this topic, but for know, we shall consider the list of silent morphemes to be very finite, and their use regular and a rarity. + +p-features | f-features | s-features +-----------|------------|----------- +the | $D_{N}$ | definite +a | $D_{N (-plural)}$ | indefinite +∅ | $D_{N (+plural)}$ | indefinite + +p-features | f-features | s-features +-----------|------------|----------- +will | $T_{D,V}$ | future +-ed | $T_{D,V}$ | past +∅ | $T_{D,V}$ | present +to | $T_{D,V}$ | infinitive + +These tables are using notation and language formally introduced at the end of the next section. Ignore them for now. + +## notation + +So far, we've been discussing syntax and giving examples using somewhat informal notation. We now formalize this notation. + +### X'-theory + +... + +### Bare Phrase Structure + +**Bare Phrase Structure** (BPS) is a more modern notation that does away with much of the notational cruft of X'-theory. Instead of bar levels and distinctions between bar levels and "phrases", we simply put the *formal features* of our lexicon in the chart itself and only indicate the *types* of phrases. Whether a phrase has yet to close yet or not (previously indicated by a 'bar) is now indicated by whether there are any unsatisfied selectional requirements on the phrase label. + +**Head-Initial Phrases** + +![`[X [X_Y (head)] [Y (complement)]]`](head-initial.png) +
+LaTeX + +```forest +\begin{forest} +[$X$ + [$X_Y$ [(head)]] + [$Y$ [(complement)]]] +\end{forest} +``` + +
+ +**Head-Final Phrases** + +![`[X [Y (complement)] [X_Y (head)]]`](head-final.png) +
+LaTeX + +```forest +\begin{forest} +[$X$ + [$Y$ [(complement)]] + [$X_Y$ [(head)]]] +\end{forest} +``` + +
+ +Recall that adjuncts are able to appear on either side of their head. Also recall that adjuncts *select* for their head. We indicate this in our labeling: adjuncts, like heads, have their selectional requirements marked, but do not propagate their type. While certain constructions may lead to notational ambiguity - an adjunct and a head of the same type, specifically - this is rare enough (only really occurring with adverbs) that we take the convenience of BPS notation regardless. + +**Left Adjuncts** + +![`[X [Y_X (left adjunct)] [X (head)]]`](left-adjunct.png) +
+LaTeX + +```forest +\begin{forest} +[$X$ + [$Y_X$ [(left adjunct)]] + [$X$ [(head)]]] +\end{forest} +``` + +
+ +**Right Adjuncts** + +![`[X [X (head)] [Y_X (right adjunct)]]`](right-adjunct.png) +
+LaTeX + +```forest +\begin{forest} +[$X$ + [$X$ [(head)]] + [$Y_X$ [(right adjunct)]]] +\end{forest} +``` + +
+ +As a reminder, English is not consistently head-initial. Subjects ("specifiers") in English appear before the verb, breaking this convention. This common structure is represented as the following: + +![`[X [Y (specifier)] [X_Y [X_{Y,Z} (head)] [Z (complement)]]]`](english-specifier.png) +
+LaTeX + +```forest +\begin{forest} +[$X$ + [$Y$ [(specifier)]] + [$X_Y$ + [$X_{Y,Z}$ [(head)]] + [$Z$ [(complement)]]]] +\end{forest} +``` + +
+ +The lexicon and structure are blended in bare phrase structure. This is useful, and allows us to indicate more specific selectional requirements on the tree itself. + +... + +It cannot be emphasized enough that notational conventions are *just that*: notational conventions. There's nothing stopping us from exclusively using X'-notation or exclusively using BPS, and the syntactic concepts they describe are *not* tied to any specific notation. I will pretty much exclusively use BPS going forth as I like it a whole lot more. + +### lexical entries + +We have stated that Bare Phrase Structure pulls aspects of the lexicon directly into the syntax tree. But what is a lexicon? + +A **lexicon** is a language speaker's internal collection of lexical entries. But what is a lexical entry? + +What exactly a lexical entry contains is up to some debate. The English language consists of (significantly) upwards of 400,000 words. How humans can hold that much information in our mind, and retrieve it so quickly? This is biologically interesting, and there are arguments for what such entries should and should not contain that come from such fields. For our purposes, we will focus entirely on syntactic analysis, and ignore biological motivations. We treat a **lexical entry** as containing the following information about an associated morpheme: +- phonetic features (**p-features**): how the word is pronounced + - With our focus on syntax, we shall simply consider this the standard written representation of the morpheme. But it should really be written in IPA. +- formal features (**f-features**): the type of the morpheme and what types it selects, if any + - These are often written directly on our tree in BPS. While most often they are simply the types of the arguments - heads can select for much more granular features, i.e. -tense, +animacy, etc. +- semantic features (**s-features**): the role of the entry and its arguments in the sentence + - Not all lexical entries have s-features. For tense/aspect/etc, these are their appropriate tense/aspect/etc. For verbs, these are typically *theta roles* (which we shall address later). + +This is the formalism expressed in our tables earlier. +Heads select for the features of their complements and project their own features. +Adjuncts select for the features of their heads but do not project their features. + +## Minimalism + +[Minimalism](https://en.wikipedia.org/wiki/Minimalist_program) is a *program* that aims to reduce much of the complexity surrounding syntactic analysis. While our theories may end up providing for adequate analyses of natural languages, this is not enough. Phrase structure rules, too, were *adequate*: yet we rejected them for their sheer complexity. If we can explain what we observe in a simpler framework, *we should adopt that framework*. Much of modern advancements in syntactic analysis have come out of Minimalism: the notation of bare phrase structure, in particular. + +As with most Chomskyan theories: Minimalism has a *strong* focus on natural language facilities. A core thesis is that *"language is an optimal solution to legibility conditions"*. I don't find this all too interesting, so I won't get much into it, and instead will focus on the definitions and usage of the basic operations rather than the motivation for them. + +Modern Minimalism considers into three *basic operations*: Merge, Move, and Agree. All that we will discuss can fall into one of these basic camps. + +## Merge, Part II + +Merge(α, β) is a function that takes in two arguments of type α and β and outputs a single node of either type α or β. + +Merge is *the* fundamental underlying aspect of syntax and arguably language as a whole. Compositionality, headedness, movement (in some camps), and a whole lot more can be considered to be rolled into it. + +### projection + +We have talked casually much about the idea of heads "projecting" their type, and this informing the syntactic structure of parsed sentences. We now formally discuss this. + +The **projection principle** states that *the properties of lexical items must be satisfied* (chief among lexical properties being selectional properties). This is a simple statement, but has profound implications: in particular, when we observe that properties of lexical items appear to *not* be satisfied, there is likely something deeper going on. + +... + +### selection + +## Move, Part I + +Move(α, β) + +All movement falls into one of the following categories: +- Head Movement + - T-to-V: affix hopping + - V-to-T: verb raising (was / be) + - T-to-C: subject-auxiliary inversion +- Phrasal Movement + - A-movement (argument movement) + - subject raising + - A'-movement (non-argument movement) + - topicalization + - wh-movement + +We shall visit these each in depth. + +### affix hopping + +![will](tense-no-tense.png) +
+LaTeX + +```forest +\begin{forest} +[$V$ + [$D$ [I, roof]] + [$V_D$ + [$V_{D,P}$ [walk]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [school]]]]]]] +\end{forest} +``` + +
+ +So far, we have not dealt with tense. We have diagrammed sentences with verbs in present and past forms by entirely ignoring their *-s* and *-ed* affixes. But tense is an aspect of grammar just like anything else, and writing it off as purely semantic does no good to anyone. Indeed, the English future having its tense marker *will* as a free-standing morpheme strongly suggests that we have to treat tense as a syntactic category in its own right, and not just as an inflectional property of verbs. + +A tense needs a *subject*. This is often stated as the **extended projection principle**, for how fundamentally it influences the structure of sentences. For now, we'll consider the verb to no longer be in charge of selecting the subject, and leave it to the tense. This is not in fact accurate - as we will see at the end of this section - but it is a simplification we shall make for the time being. + +![will](tense-will.png) +
+LaTeX + +```forest +\begin{forest} +[$T$ + [$D$ [I, roof]] + [$T_D$ + [$T_{D,V}$ [will]] + [$V$ + [$V_P$ [walk]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [school]]]]]]] +\end{forest} +``` + +
+ +While *will* is straightforward: what about *-ed* and and *-s*? These markers are *suffixes* (bound morphemes) and are directly attached to the end of the verb. Attempts to diagram these with our current knowledge of syntactic structure begin and end with nonsense. However, consider *will*: on analogy with *will*, it should follow that the tense markers *-ed* and *-s* should actually be in the same position, given our diagramming of *will* works without issue. But how can this be? + +In this section, we introduce the idea of *movement*: that certain portions of sentences can *move* in certain *constrained* fashions around the rest of the sentence, almost (but not quite) *copying* themselves to other places. We saw hints of this with our constituency tests earlier producing grammatically valid but unanalyzable sentences. For now, we shall simply assert that tense suffixes move on analogy with *will* without asserting anything about movement in general. We shall cover constraints on movement shortly. + +![(-ed)](tense-ed.png) +
+LaTeX + +```forest +\begin{forest} +[$T$ + [$D$ [I, roof]] + [$T_D$ + [$T_{D,V}$ [(-ed), name=tense]] + [$V$ + [$V_P$ [walked, name=walk]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [school]]]]]]] + \draw[->,dotted] (tense) to[out=south,in=west] (walk); +\end{forest} +``` + +
+ +(we say that *-ed* leaves a **trace** when moving to *walk*. we denote this here with *(-ed)*, but another common notation is to write *t*.) + +English's first-person present does not inflect the verb, and so we must introduce a null $T$. A similar example is given for the present tense in the third person, which does have an explicit tense marker. + +![()](tense-null.png) +
+LaTeX + +```forest +\begin{forest} +[$T$ + [$D$ [I, roof]] + [$T_D$ + [$T_{D,V}$ [∅, name=tense]] + [$V$ + [$V_P$ [walk, name=walk]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [school]]]]]]] + \draw[->,dotted] (tense) to[out=south,in=west] (walk); +\end{forest} +``` + +
+ +![(-s)](tense-s.png) +
+LaTeX + +```forest +\begin{forest} +[$T$ + [$D$ [Alice, roof]] + [$T_D$ + [$T_{D,V}$ [(-s), name=tense]] + [$V$ + [$V_P$ [walk, name=walk]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [school]]]]]]] + \draw[->,dotted] (tense) to[out=south,in=west] (walk); +\end{forest} +``` + +
+ +This now makes our top-level phrase type $T$ instead of $V$. It will not remain so for very long, as we shall see in Agree. + +### verb raising + +### subject-auxiliary inversion + +### head movement + +## Move, Part II + +### wh-movement + +### subject raising + +Consider the following sentence: *Alice will speak to the assembly*. With our current knowledge of syntax, we would diagram it as so: + +![`[T [D Alice] [T_D [T_{D,V} will] [V [V_P speak] [P [P_D to] [D [D_N the] [N assembly]]]]]]`](no-subject-movement.png) +
+LaTeX + +```forest +\begin{forest} + [$T$ + [$D$ [Alice, roof]] + [$T_D$ + [$T_{D,V}$ [will]] + [$V$ + [$V_P$ [speak]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [assembly]]]]]]] +\end{forest} +``` + +
+ +The $D$ *Alice* here is the subject. While replacing it with some $D$s produces grammatical sentences ex. *The prime minister will speak to the assembly*: this is not true of all $D$s. Slotting in inanimate $D$s like *Time will speak to the assembly* and *Knowledge will speak to the assembly* produces grammatically unacceptable sentences. So there is some *selection* occurring somewhere in the sentence that wants a particular *feature set* (f-features) from the subject $D$, specifically, animacy. + +Observe, however, that our tree structure suggests that $T$ - and only $T$ - is involved in the selection of $Alice$ as the subject, given locality of selection and the extended projection principle. But this can't be quite right. Plenty of other sentences involving the $T$ *will* are just fine with inanimate subjects: *Time will pass*, *Knowledge will be passed on*, etc. (Notice that *Alice will pass* and *Alice will be passed on* are similarly ungrammatical). How do we reconcile this? + +We now introduce the idea of **subject raising** / $vP$ shells. Our observations above point towards the $V$ of the sentence rather than the $T$ selecting for the subject $D$ - somehow. This selection would break our guiding principle of locality of selection. But this behavior *does* occur, and as an empirical science we must adjust our theory accordingly. Can we extend our model to explain this, *without* modifying the locality of selection that has been so useful thus far? We can, indeed, with movement, and illustrate so in the following tree. + +![`[T [D Alice] [T_D [T_{D,V} will] [V [D (subj)] [V_D [V_{D,P} speak] [P [P_D to] [D [D_N the] [N assembly]]]]]]]`](subject-movement.png) +
+LaTeX + +```forest +\begin{forest} + [$T$ + [$D$ [Alice, roof, name=Alice]] + [$T_D$ + [$T_{D,V}$ [will]] + [$V$ + [$D$ [(subj), roof, name=subj]] + [$V_D$ + [$V_{D,P}$ [speak]] + [$P$ + [$P_D$ [to]] + [$D$ + [$D_N$ [the]] + [$N$ [assembly]]]]]]]] + \draw[->,draw opacity=0.5] (subj) to[out=west,in=south] (Alice); +\end{forest} +``` + +
+ +So we say that *Alice* is originally selected by the $V$ and *moves* to its surface position in the $T$. *Alice* satisfies the projection principle by being selected by the $V$, satisfies the extended projection principle by fulfilling the role of the subject for the $T$, and satisfies locality of selection by being in complement and specifier position for the $V$ and the $T$, respectively. Our concept of movement allows *Alice* to play **both** roles at the same time. This does mean that every tree diagram we have drawn up until now is inaccurate, and that almost every tree we draw going forward will have to have this somewhat redundant subject raising. This is a fine tradeoff to make in exchange for accurately describing previously-unclear syntactic behavior. + +This subject raising is an example of **A-movement** (argument movement). A-movement exists in contrast to **A'-movement** (movement to a non-argument position), which is responsible for wh-movement and topicalization: two topics that we shall touch on shortly. + + +### small clauses + +## Agree + +### theta roles + +### locality + +### binding + +How do pronouns work? + +First, some definitions. We distinguish several classes of pronouns: +- **anaphors**: *reflexive* and *reciprocal* pronouns i.e. *herself*, *each other*, ... +- **personal pronouns**: *her*, *him*, *they*, *it*, ... +- **possessive pronouns**: *ours*, *theirs*, *hers*, *his*, ... +- ... + +Every pronoun (pro-form, really) has an **antecedent**: that is, the phrase or concept it is in *reference* to. In contrast to pronouns, we also have **r-expressions**: an **independently referential** expression. These are names, proper nouns, descriptions, epithets, and the like: ex. *Alice*, *British Columbia*, *the man on the corner*, *the idiot*, etc; and have no antecedent. + +We say that a node and another node are **coreferential** (or **co-indexed**) if they refer to the same concept or entity. On tree diagrams, we often refer to this with numerical ($_0$, $_1$, ...) or alphabetical ($_i$, $_j$, $_k$) subscripts. (Though we could also indicate this with arrows, we prefer to reserve those for movement, so as to not clutter our diagrams too much.) This is a useful notion when it comes to pronouns. + +... + +The theory of binding operates under three fundamental principles. +- **Principle A**: an anaphor must be bound in its domain. +- **Principle B**: a (personal) pronoun must be free in its domain. +- **Principle C**: an r-expression may never be bound. + +Our principles imply various things. Principle A implies that: +- a reflexive must be *coreferential* with its antecedent + - (agreeing in person, number, and gender) +- the antecedent of a reflexive must *c-command* the reflexive +- the reflexive and its antecedent must be *in all the same nodes* that have a subject + + +### raising and control + +Consider the following sentences: +- *Alice seems to sleep often.* +- *Alice hopes to sleep often.* + +With our current knowledge, we would diagram these sentences near-identically. Yet a closer investigation reveals that they are in fact deeply structurally different. +- *It seems Alice sleeps a lot.* +- \* *It hopes Alice sleeps a lot.* + +## Advanced Syntax + +### on languages other than english + +We have so far approached our discussion of syntax entirely from the point of view of the English language. All of our motivations - our rationales, our counterexamples - have been given and centred around English. This begs the question: just how much of this *holds*, cross-linguistically? What of all the other wide and varied languages of the world - which, clearly, our frameworks must have been built not only to accommodate but to represent *well*, given we are discussing them now, more than fifty years after the Chomskyian revolution and more than a century after the field of linguistics recognized its Indo-European biases? + +We have discussed some principles that, clearly, cannot be a feature of all natural languages: like do-support. However, other concepts - like the Subjacency Condition - seem possibly broad enough to be applied across a variety of languages. Is that true? Is *anything* broad enough? (Alternatively: does a [universal grammar](https://en.wikipedia.org/wiki/Universal_grammar) exist?) + +This notion of *principles* that occur for some languages and do not occur for others forms what is either the framework of *Principles and Parameters*, or *Government and Binding Theory*. I do not understand the difference between them, and suspect what is above to be a mixture of both as neither were explicitly mentioned. Nevertheless, everything given here is for English, not some cross-linguistic model of the mind. English remains useful by virtue of being mine and many's L1 language - and by being such a *mess* of a language that its structure cannot be explained away trivially. + +### negation + +### ellipsis + +## References + +- ✨ [An Introduction to Syntactic Analysis and Theory](https://annas-archive.org/md5/11bbf70ff9259025bc6985ba3fa4083b) +- ✨ [The Science of Syntax](https://pressbooks.pub/syntax/) +- MIT 24.902: [2017](https://web.mit.edu/norvin/www/24.902/24902.html), [2015](https://ocw.mit.edu/courses/24-902-language-and-its-structure-ii-syntax-fall-2015/), [2003](https://ocw.mit.edu/courses/24-902-language-and-its-structure-ii-syntax-fall-2003/) diff --git a/ling/tense-ed.png b/ling/tense-ed.png new file mode 100644 index 0000000..cac54d6 Binary files /dev/null and b/ling/tense-ed.png differ diff --git a/ling/tense-no-tense.png b/ling/tense-no-tense.png new file mode 100644 index 0000000..1ea1c6d Binary files /dev/null and b/ling/tense-no-tense.png differ diff --git a/ling/tense-null.png b/ling/tense-null.png new file mode 100644 index 0000000..106e0b9 Binary files /dev/null and b/ling/tense-null.png differ diff --git a/ling/tense-s.png b/ling/tense-s.png new file mode 100644 index 0000000..9f8120f Binary files /dev/null and b/ling/tense-s.png differ diff --git a/ling/tense-will.png b/ling/tense-will.png new file mode 100644 index 0000000..0d10a69 Binary files /dev/null and b/ling/tense-will.png differ diff --git a/ling/types.md b/ling/types.md new file mode 100644 index 0000000..437ab50 --- /dev/null +++ b/ling/types.md @@ -0,0 +1,4 @@ +--- +layout: linguistics +title: linguistics/types +--- diff --git a/linguistics/english-specifier.png b/linguistics/english-specifier.png deleted file mode 100644 index 4500211..0000000 Binary files a/linguistics/english-specifier.png and /dev/null differ diff --git a/linguistics/glossary.md b/linguistics/glossary.md deleted file mode 100644 index 5bfb895..0000000 --- a/linguistics/glossary.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -layout: linguistics -title: linguistics/glossary ---- diff --git a/linguistics/head-final.png b/linguistics/head-final.png deleted file mode 100644 index bbce198..0000000 Binary files a/linguistics/head-final.png and /dev/null differ diff --git a/linguistics/head-initial.png b/linguistics/head-initial.png deleted file mode 100644 index 920f94e..0000000 Binary files a/linguistics/head-initial.png and /dev/null differ diff --git a/linguistics/index.md b/linguistics/index.md deleted file mode 100644 index 074db20..0000000 --- a/linguistics/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -layout: linguistics -title: linguistics ---- - -
-
-
-
-
- -# linguistics is the study of *language* diff --git a/linguistics/left-adjunct.png b/linguistics/left-adjunct.png deleted file mode 100644 index 306483d..0000000 Binary files a/linguistics/left-adjunct.png and /dev/null differ diff --git a/linguistics/no-subject-movement.png b/linguistics/no-subject-movement.png deleted file mode 100644 index 3f87211..0000000 Binary files a/linguistics/no-subject-movement.png and /dev/null differ diff --git a/linguistics/pragmatics.md b/linguistics/pragmatics.md deleted file mode 100644 index d1b181a..0000000 --- a/linguistics/pragmatics.md +++ /dev/null @@ -1,40 +0,0 @@ ---- -layout: linguistics -title: linguistics/pragmatics ---- - -# pragmatics - -Pragmatics is the study of **context**. - -How do we convey meaning not explicitly expressed?
-How can we unambiguously understand ambiguous sentences?
-How does the listener *reconstruct* the speaker's intended meaning? - -## Basic Principles - -### Cooperation - -The *Principle of Cooperation* states that **language is a fundamentally collaborative action**. When we speak to one another, we attempt to convey the pure notion of meaning through the fallible medium of words. Success of such speech requires both the speaker and the listener to operate under shared assumptions, in order to reconstruct intentions not explicitly communicated. - -The Principle of Cooperation is composed entirely of several sub-principles. -- **quantity**: Say as much as necessary. Do not say more than necessary. -- **quality**: Do not say that which you believe false. Do not say that which you lack evidence. -- **relation**: Be relevant. -- **manner**: Avoid obscurity. Avoid ambiguity. Be brief. Be orderly. - -These principles are also referred to as *Grice's Maxims* (named such after Paul Grice, a strong candidate for being the single most influential person on the field of pragmatics) can be arguably all bundled up into one core notion of **relevance**. (This is a hot topic of debate in the philosophy side of pragmatics, though). - -As in all of linguistics: The Cooperative Principle is an *observation*, meant to *model* language's implicatures. It is not a hard and fast rule (in so much as hard and fast rules even exist in a descriptive framework), but even as rules go, it's not particularly strict: it can be violated, and indeed, often is. - -The ways in which speakers can behave in a conversation, relative to the Principle of Cooperation, fall into the following categories: -- One can **observe** the principle, directly conforming to it -- One can **violate** the principle, subtly and misleadingly violating it -- One can **flout** the principle, clearly and straightforwardly violating it -- One can **opt out** of the principle, directly expressing an intention to not play by the rules of conversation. - -## References - -- Introduction to Pragmatics -- [Analyzing Meaning](https://langsci-press.org/catalog/book/231) -- [Invitation to Formal Semantics](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) diff --git a/linguistics/right-adjunct.png b/linguistics/right-adjunct.png deleted file mode 100644 index 12f3ba0..0000000 Binary files a/linguistics/right-adjunct.png and /dev/null differ diff --git a/linguistics/semantics.md b/linguistics/semantics.md deleted file mode 100644 index a6cb615..0000000 --- a/linguistics/semantics.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -layout: linguistics -title: linguistics/semantics ---- - -# notes on semantics - -Semantics is the study of **meaning**. - -How do we know what sentences are true and which are false?
-What does it *mean* for a sentence to be true?
-What conditions must hold for a sentence to be true? - -Formal semantics attempts to answer those questions by providing a *framework* for determining what *conditions* must hold for a sentence to be true. - -This framework is [first-order/predicate logic](../mathematics/logic) and the [simply-typed lambda calculus](../plt/lambda-calculus). On top of this, we often build set theory, relying on *characteristic functions* of the lambda calculus as denotations of *set membership*. - - -## Basic Principles - -### Compositionality - -The *Principle of Compositionality* states that the meaning of a *constituent* is determined entirely by its *components*. This is *the* fundamental underlying principle behind formal logic and subsequently semantics. It holds for not just sentence composition (syntax), but also *word formation* (morphology), and what's of interest to us here - meaning (semantics). - -### Substitution - -The *Principle of Substitution* states that substituting one part of an expression with something else of the same meaning *preserves* the meaning of the expression as a whole. This might be thought of as a given, but semantics has its roots in philosophy, and philosophers care very much about enumerating their givens. - -### Predicate Logic & The Lambda Calculus - -Formal semantics begets a formal system for such semantics, and *first-order logic* and *the lambda calculus* are a natural fit. Semantics is the study of meaning - and what is logic but a system for expressing meaning? As discussed above, language functions by composition - and what are functions but their property of composition? - -[*An Invitation to Formal Semantics*](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) covers basic logic and the lambda calculus well in its first six chapters. Otherwise, for a worse introduction, see [logic](../mathematics/logic), and [the lambda calculus](../plt/lambda-calculus). - -## Denotational Semantics - -With basic logic and the lambda calculus under our belt, we may simply get straight to assigning *meaning* to language. We consider two *basic types* to start: the type of entities, $e$, and the type of truth values, $t$. Our function types we denote by ordered pairs: that is, a function from $e$ to $t$ is of type $⟨e,t⟩$. This is perhaps clunkier notation than the type-theoretic $e→t$, but it is what it is. (And does avoid issues of precedence.) - -### Entities and Functions - -> *I am Alice.*
-> *Alice is bad.*
-> *The blue pigeon flew away.* - -- Noun: $⟨e,t⟩ ↝ λx.Noun(x)$ -- Verb (intransitive): $⟨e,t⟩ ↝ λx.Verb(x)$ -- Verb (transitive): $⟨e,⟨e,t⟩⟩ ↝ λy.λx.Verb(x, y)$ -- Verb (meaningless): $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λP.λx.P(x)$ -- Adj: $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λNoun.λx.[Adj(x) ∧ Noun(x)]$ - -- or (clausal): $⟨t,⟨t,t⟩⟩ ↝ λq.λp.[p ∨ q]$ -- and (clausal): $⟨t,⟨t,t⟩⟩ ↝ λq.λp.[p ∧ q]$ -- or (verbal): $⟨⟨e,t⟩,⟨⟨e,t⟩,⟨e,t⟩⟩⟩ ↝ λQ.λP.λx.[P(x) ∨ Q(x)]$ -- and (verbal): $⟨⟨e,t⟩,⟨⟨e,t⟩,⟨e,t⟩⟩⟩ ↝ λQ.λP.λx.[P(x) ∧ Q(x)]$ -- or (quantifiers): $⟨⟨e,⟨e,t⟩⟩,⟨⟨e,⟨e,t⟩⟩,⟨e,⟨e,t⟩⟩⟩⟩ ↝ λQ.λP.λy.λx.[P(x,y) ∨ Q(x,y)]$ -- and (quantifiers): $⟨⟨e,⟨e,t⟩⟩,⟨⟨e,⟨e,t⟩⟩,⟨e,⟨e,t⟩⟩⟩⟩ ↝ λQ.λP.λy.λx.[P(x,y) ∧ Q(x,y)]$ - -- not: $⟨⟨e,t⟩,⟨e,t⟩⟩ ↝ λP.λx.¬P(x)$ - -### Quantification - -- every: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∀x.[P(x) → Q(x)]$ - - everything: $⟨⟨e,t⟩,t⟩ ↝ λP.∀x.P(x)$ -- some: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∃x.[P(x) ∧ Q(x)]$ - - something: $⟨⟨e,t⟩,t⟩ ↝ λP.∃x.P(x)$ -- no: $⟨⟨e,t⟩,⟨⟨e,t⟩,t⟩⟩ ↝ λQ.λP.∀x.[P(x) → ¬Q(x)] (or λQ.λP.¬∃x.[P(x) ∧ Q(x)])$ - - nothing: $⟨⟨e,t⟩,t⟩ ↝ λP.¬∃x.P(x)$ (or $λP.∀x.¬P(x)$) - -### Reference - -### Numbers and Plurality - -### Event Semantics - -### Tense and Aspect - -## Beyond Truth - -### Necessity and Possibility - -### Command, Request, Obligation - -> *Alice, run!*
-> *Alice, please run.*
-> *Alice should run.* - -### Questions -## Resources -- ✨ [Invitation to Formal Semantics](https://eecoppock.info/bootcamp/semantics-boot-camp.pdf) diff --git a/linguistics/subject-movement.png b/linguistics/subject-movement.png deleted file mode 100644 index 1136f5e..0000000 Binary files a/linguistics/subject-movement.png and /dev/null differ diff --git a/linguistics/syntax.md b/linguistics/syntax.md deleted file mode 100644 index 46708fa..0000000 --- a/linguistics/syntax.md +++ /dev/null @@ -1,556 +0,0 @@ ---- -layout: linguistics -title: linguistics/syntax ---- - -# morphosyntax - -Morphology is the study of word formation. Syntax is the study of sentence formation.
-Specifically, both morphology and syntax focus on **structure**. - -The distinction between syntax and morphology varies cross-linguistically.
-They can be considered to form an overarching **morphosyntactic** theory. - -These notes are ordered in a way that I feel builds upon itself the best. This is not the order in which topics were covered in my syntax class, nor in my textbook. My syntax class covered Agree before Move, and my textbook deeply intertwined Merge with X'-theory and Move with Agree: and I think the both of them suffered a little bit pedagogically for that. - -Certainly, all of syntax cannot be taught at once. Yet the desire to generalize and apply what one has learned to real-world examples is strong, and it is extraordinarily difficult to teach syntax in a way that builds upon itself naturally. This is my best attempt, but it will fall flat in places: when it does, I do recommend either skipping ahead or being content with temporarily (hopefully temporarily) not knowing what's going on. - -
-Table of Contents - -- History of Syntax - - A wrong approach: Phrase Structure Rules -- Morphology [SKS 2] - - Syntactic Categories [SKS 2.1] - - Compositionality [SKS 2.3] - - Headedness [SKS 2.4] -- [Merge, Part I](#merge-part-i) - - Binary Branching - - Constituency [SKS 3] - - Heads, Specifiers and Complements - - [Silent Heads](#silent-heads) -- [Notation](#notation) - - X'-theory [SKS 6] - - [Bare Phrase Structure](#bare-phrase-structure) [n/a] - - Lexical Entries [SKS 6.8] -- [Minimalism](#minimalism) [n/a] -- [Merge, Part II](#merge-part-ii) - - [Projection](#projection) [SKS 5] - - Selection [SKS 8] -- [Move, Part I](#move-part-i) [SKS 8] - - [Affix Hopping](#affix-hopping) - - Verb Raising - - Subject-Auxiliary Inversion - - Head Movement [SKS 8.3] -- Move, Part II - - Wh- Movement [SKS 10] - - Topicalization - - Phrasal Movement - - [Subject Raising](#subject-raising) [SKS 12.4] -- Agree - - Theta Roles [SKS 6.8.1] - - Locality - - [Binding](#binding) [SKS 7] - - Raising & Control [SKS 9] -- Advanced Syntax - - [On languages other than English](#on-languages-other-than-english) - - Negation - - Ellipsis - - Shifting - - Scrambling - - Parsing -- References - -
- -> Be warned! These notes are incomplete and almost certainly somewhat inaccurate. Proceed at your own risk. - -## history - -### a wrong approach: phrase structure rules - -## morphology - -### syntactic categories - -### compositionality - -### headedness - -## Merge, Part I - -We concluded the following from our excursion into morphology: -- words are composed of *morphemes* -- morphemes come in *categories* -- morphemes *combine in a regular fashion* -- morphemes can be *silent* - -Surprisingly (or unsurprisingly), we shall see that these ideas generalize to sentence structure as a whole. - -### constituency - -### heads, specifiers, and complements - -### silent heads - -Why are proper names $D$s? Why is it possible to say either *I moved the couches* and *I moved couches*, but only possible to say *I moved the couch* and not *I moved couch*? Why is the infinitive form of a verb identical to the present, in some cases? - -These inconsistencies can be all addressed by one (strange) concept: the idea of *silent morphemes*, invisible in writing and unpronounceable in speech. We represent such morphemes as ∅, and so may write the earlier strange sentence as *I moved ∅-couches*. - -These silent morphemes are extremely useful to our syntactic analyses. Consider our examples above. With silent morphemes, we can describe the verb *moved* as taking in two $D$s as arguments: *I* and *couches*. Without silent morphemes, we would have to account for both $D$ phrases and $N$ phrases as arguments, duplicating our lexical entry, destroying the structural similarity to *I moved the couches*, and ultimately duplicating our work. - -```latex -\begin{forest} -[V - [D [I, roof]] - [V_D - [V_{D,D} [moved]] - [D - [D_N [∅]] - [N [couches]]]]] -\end{forest} -``` - -So silent morphemes are extremely handy. But: what is stopping us from using an *excess* of silent morphemes? After all, if they're just morphemes, they can go anywhere. And if they're silent, we can't observe *where* they go. We will revisit this topic, but for know, we shall consider the list of silent morphemes to be very finite, and their use regular and a rarity. - -p-features | f-features | s-features ------------|------------|----------- -the | $D_{N}$ | definite -a | $D_{N (-plural)}$ | indefinite -∅ | $D_{N (+plural)}$ | indefinite - -p-features | f-features | s-features ------------|------------|----------- -will | $T_{D,V}$ | future --ed | $T_{D,V}$ | past -∅ | $T_{D,V}$ | present -to | $T_{D,V}$ | infinitive - -These tables are using notation and language formally introduced at the end of the next section. Ignore them for now. - -## notation - -So far, we've been discussing syntax and giving examples using somewhat informal notation. We now formalize this notation. - -### X'-theory - -... - -### Bare Phrase Structure - -**Bare Phrase Structure** (BPS) is a more modern notation that does away with much of the notational cruft of X'-theory. Instead of bar levels and distinctions between bar levels and "phrases", we simply put the *formal features* of our lexicon in the chart itself and only indicate the *types* of phrases. Whether a phrase has yet to close yet or not (previously indicated by a 'bar) is now indicated by whether there are any unsatisfied selectional requirements on the phrase label. - -**Head-Initial Phrases** - -![`[X [X_Y (head)] [Y (complement)]]`](head-initial.png) -
-LaTeX - -```forest -\begin{forest} -[$X$ - [$X_Y$ [(head)]] - [$Y$ [(complement)]]] -\end{forest} -``` - -
- -**Head-Final Phrases** - -![`[X [Y (complement)] [X_Y (head)]]`](head-final.png) -
-LaTeX - -```forest -\begin{forest} -[$X$ - [$Y$ [(complement)]] - [$X_Y$ [(head)]]] -\end{forest} -``` - -
- -Recall that adjuncts are able to appear on either side of their head. Also recall that adjuncts *select* for their head. We indicate this in our labeling: adjuncts, like heads, have their selectional requirements marked, but do not propagate their type. While certain constructions may lead to notational ambiguity - an adjunct and a head of the same type, specifically - this is rare enough (only really occurring with adverbs) that we take the convenience of BPS notation regardless. - -**Left Adjuncts** - -![`[X [Y_X (left adjunct)] [X (head)]]`](left-adjunct.png) -
-LaTeX - -```forest -\begin{forest} -[$X$ - [$Y_X$ [(left adjunct)]] - [$X$ [(head)]]] -\end{forest} -``` - -
- -**Right Adjuncts** - -![`[X [X (head)] [Y_X (right adjunct)]]`](right-adjunct.png) -
-LaTeX - -```forest -\begin{forest} -[$X$ - [$X$ [(head)]] - [$Y_X$ [(right adjunct)]]] -\end{forest} -``` - -
- -As a reminder, English is not consistently head-initial. Subjects ("specifiers") in English appear before the verb, breaking this convention. This common structure is represented as the following: - -![`[X [Y (specifier)] [X_Y [X_{Y,Z} (head)] [Z (complement)]]]`](english-specifier.png) -
-LaTeX - -```forest -\begin{forest} -[$X$ - [$Y$ [(specifier)]] - [$X_Y$ - [$X_{Y,Z}$ [(head)]] - [$Z$ [(complement)]]]] -\end{forest} -``` - -
- -The lexicon and structure are blended in bare phrase structure. This is useful, and allows us to indicate more specific selectional requirements on the tree itself. - -... - -It cannot be emphasized enough that notational conventions are *just that*: notational conventions. There's nothing stopping us from exclusively using X'-notation or exclusively using BPS, and the syntactic concepts they describe are *not* tied to any specific notation. I will pretty much exclusively use BPS going forth as I like it a whole lot more. - -### lexical entries - -We have stated that Bare Phrase Structure pulls aspects of the lexicon directly into the syntax tree. But what is a lexicon? - -A **lexicon** is a language speaker's internal collection of lexical entries. But what is a lexical entry? - -What exactly a lexical entry contains is up to some debate. The English language consists of (significantly) upwards of 400,000 words. How humans can hold that much information in our mind, and retrieve it so quickly? This is biologically interesting, and there are arguments for what such entries should and should not contain that come from such fields. For our purposes, we will focus entirely on syntactic analysis, and ignore biological motivations. We treat a **lexical entry** as containing the following information about an associated morpheme: -- phonetic features (**p-features**): how the word is pronounced - - With our focus on syntax, we shall simply consider this the standard written representation of the morpheme. But it should really be written in IPA. -- formal features (**f-features**): the type of the morpheme and what types it selects, if any - - These are often written directly on our tree in BPS. While most often they are simply the types of the arguments - heads can select for much more granular features, i.e. -tense, +animacy, etc. -- semantic features (**s-features**): the role of the entry and its arguments in the sentence - - Not all lexical entries have s-features. For tense/aspect/etc, these are their appropriate tense/aspect/etc. For verbs, these are typically *theta roles* (which we shall address later). - -This is the formalism expressed in our tables earlier. -Heads select for the features of their complements and project their own features. -Adjuncts select for the features of their heads but do not project their features. - -## Minimalism - -[Minimalism](https://en.wikipedia.org/wiki/Minimalist_program) is a *program* that aims to reduce much of the complexity surrounding syntactic analysis. While our theories may end up providing for adequate analyses of natural languages, this is not enough. Phrase structure rules, too, were *adequate*: yet we rejected them for their sheer complexity. If we can explain what we observe in a simpler framework, *we should adopt that framework*. Much of modern advancements in syntactic analysis have come out of Minimalism: the notation of bare phrase structure, in particular. - -As with most Chomskyan theories: Minimalism has a *strong* focus on natural language facilities. A core thesis is that *"language is an optimal solution to legibility conditions"*. I don't find this all too interesting, so I won't get much into it, and instead will focus on the definitions and usage of the basic operations rather than the motivation for them. - -Modern Minimalism considers into three *basic operations*: Merge, Move, and Agree. All that we will discuss can fall into one of these basic camps. - -## Merge, Part II - -Merge(α, β) is a function that takes in two arguments of type α and β and outputs a single node of either type α or β. - -Merge is *the* fundamental underlying aspect of syntax and arguably language as a whole. Compositionality, headedness, movement (in some camps), and a whole lot more can be considered to be rolled into it. - -### projection - -We have talked casually much about the idea of heads "projecting" their type, and this informing the syntactic structure of parsed sentences. We now formally discuss this. - -The **projection principle** states that *the properties of lexical items must be satisfied* (chief among lexical properties being selectional properties). This is a simple statement, but has profound implications: in particular, when we observe that properties of lexical items appear to *not* be satisfied, there is likely something deeper going on. - -... - -### selection - -## Move, Part I - -Move(α, β) - -All movement falls into one of the following categories: -- Head Movement - - T-to-V: affix hopping - - V-to-T: verb raising (was / be) - - T-to-C: subject-auxiliary inversion -- Phrasal Movement - - A-movement (argument movement) - - subject raising - - A'-movement (non-argument movement) - - topicalization - - wh-movement - -We shall visit these each in depth. - -### affix hopping - -![will](tense-no-tense.png) -
-LaTeX - -```forest -\begin{forest} -[$V$ - [$D$ [I, roof]] - [$V_D$ - [$V_{D,P}$ [walk]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [school]]]]]]] -\end{forest} -``` - -
- -So far, we have not dealt with tense. We have diagrammed sentences with verbs in present and past forms by entirely ignoring their *-s* and *-ed* affixes. But tense is an aspect of grammar just like anything else, and writing it off as purely semantic does no good to anyone. Indeed, the English future having its tense marker *will* as a free-standing morpheme strongly suggests that we have to treat tense as a syntactic category in its own right, and not just as an inflectional property of verbs. - -A tense needs a *subject*. This is often stated as the **extended projection principle**, for how fundamentally it influences the structure of sentences. For now, we'll consider the verb to no longer be in charge of selecting the subject, and leave it to the tense. This is not in fact accurate - as we will see at the end of this section - but it is a simplification we shall make for the time being. - -![will](tense-will.png) -
-LaTeX - -```forest -\begin{forest} -[$T$ - [$D$ [I, roof]] - [$T_D$ - [$T_{D,V}$ [will]] - [$V$ - [$V_P$ [walk]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [school]]]]]]] -\end{forest} -``` - -
- -While *will* is straightforward: what about *-ed* and and *-s*? These markers are *suffixes* (bound morphemes) and are directly attached to the end of the verb. Attempts to diagram these with our current knowledge of syntactic structure begin and end with nonsense. However, consider *will*: on analogy with *will*, it should follow that the tense markers *-ed* and *-s* should actually be in the same position, given our diagramming of *will* works without issue. But how can this be? - -In this section, we introduce the idea of *movement*: that certain portions of sentences can *move* in certain *constrained* fashions around the rest of the sentence, almost (but not quite) *copying* themselves to other places. We saw hints of this with our constituency tests earlier producing grammatically valid but unanalyzable sentences. For now, we shall simply assert that tense suffixes move on analogy with *will* without asserting anything about movement in general. We shall cover constraints on movement shortly. - -![(-ed)](tense-ed.png) -
-LaTeX - -```forest -\begin{forest} -[$T$ - [$D$ [I, roof]] - [$T_D$ - [$T_{D,V}$ [(-ed), name=tense]] - [$V$ - [$V_P$ [walked, name=walk]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [school]]]]]]] - \draw[->,dotted] (tense) to[out=south,in=west] (walk); -\end{forest} -``` - -
- -(we say that *-ed* leaves a **trace** when moving to *walk*. we denote this here with *(-ed)*, but another common notation is to write *t*.) - -English's first-person present does not inflect the verb, and so we must introduce a null $T$. A similar example is given for the present tense in the third person, which does have an explicit tense marker. - -![()](tense-null.png) -
-LaTeX - -```forest -\begin{forest} -[$T$ - [$D$ [I, roof]] - [$T_D$ - [$T_{D,V}$ [∅, name=tense]] - [$V$ - [$V_P$ [walk, name=walk]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [school]]]]]]] - \draw[->,dotted] (tense) to[out=south,in=west] (walk); -\end{forest} -``` - -
- -![(-s)](tense-s.png) -
-LaTeX - -```forest -\begin{forest} -[$T$ - [$D$ [Alice, roof]] - [$T_D$ - [$T_{D,V}$ [(-s), name=tense]] - [$V$ - [$V_P$ [walk, name=walk]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [school]]]]]]] - \draw[->,dotted] (tense) to[out=south,in=west] (walk); -\end{forest} -``` - -
- -This now makes our top-level phrase type $T$ instead of $V$. It will not remain so for very long, as we shall see in Agree. - -### verb raising - -### subject-auxiliary inversion - -### head movement - -## Move, Part II - -### wh-movement - -### subject raising - -Consider the following sentence: *Alice will speak to the assembly*. With our current knowledge of syntax, we would diagram it as so: - -![`[T [D Alice] [T_D [T_{D,V} will] [V [V_P speak] [P [P_D to] [D [D_N the] [N assembly]]]]]]`](no-subject-movement.png) -
-LaTeX - -```forest -\begin{forest} - [$T$ - [$D$ [Alice, roof]] - [$T_D$ - [$T_{D,V}$ [will]] - [$V$ - [$V_P$ [speak]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [assembly]]]]]]] -\end{forest} -``` - -
- -The $D$ *Alice* here is the subject. While replacing it with some $D$s produces grammatical sentences ex. *The prime minister will speak to the assembly*: this is not true of all $D$s. Slotting in inanimate $D$s like *Time will speak to the assembly* and *Knowledge will speak to the assembly* produces grammatically unacceptable sentences. So there is some *selection* occurring somewhere in the sentence that wants a particular *feature set* (f-features) from the subject $D$, specifically, animacy. - -Observe, however, that our tree structure suggests that $T$ - and only $T$ - is involved in the selection of $Alice$ as the subject, given locality of selection and the extended projection principle. But this can't be quite right. Plenty of other sentences involving the $T$ *will* are just fine with inanimate subjects: *Time will pass*, *Knowledge will be passed on*, etc. (Notice that *Alice will pass* and *Alice will be passed on* are similarly ungrammatical). How do we reconcile this? - -We now introduce the idea of **subject raising** / $vP$ shells. Our observations above point towards the $V$ of the sentence rather than the $T$ selecting for the subject $D$ - somehow. This selection would break our guiding principle of locality of selection. But this behavior *does* occur, and as an empirical science we must adjust our theory accordingly. Can we extend our model to explain this, *without* modifying the locality of selection that has been so useful thus far? We can, indeed, with movement, and illustrate so in the following tree. - -![`[T [D Alice] [T_D [T_{D,V} will] [V [D (subj)] [V_D [V_{D,P} speak] [P [P_D to] [D [D_N the] [N assembly]]]]]]]`](subject-movement.png) -
-LaTeX - -```forest -\begin{forest} - [$T$ - [$D$ [Alice, roof, name=Alice]] - [$T_D$ - [$T_{D,V}$ [will]] - [$V$ - [$D$ [(subj), roof, name=subj]] - [$V_D$ - [$V_{D,P}$ [speak]] - [$P$ - [$P_D$ [to]] - [$D$ - [$D_N$ [the]] - [$N$ [assembly]]]]]]]] - \draw[->,draw opacity=0.5] (subj) to[out=west,in=south] (Alice); -\end{forest} -``` - -
- -So we say that *Alice* is originally selected by the $V$ and *moves* to its surface position in the $T$. *Alice* satisfies the projection principle by being selected by the $V$, satisfies the extended projection principle by fulfilling the role of the subject for the $T$, and satisfies locality of selection by being in complement and specifier position for the $V$ and the $T$, respectively. Our concept of movement allows *Alice* to play **both** roles at the same time. This does mean that every tree diagram we have drawn up until now is inaccurate, and that almost every tree we draw going forward will have to have this somewhat redundant subject raising. This is a fine tradeoff to make in exchange for accurately describing previously-unclear syntactic behavior. - -This subject raising is an example of **A-movement** (argument movement). A-movement exists in contrast to **A'-movement** (movement to a non-argument position), which is responsible for wh-movement and topicalization: two topics that we shall touch on shortly. - - -### small clauses - -## Agree - -### theta roles - -### locality - -### binding - -How do pronouns work? - -First, some definitions. We distinguish several classes of pronouns: -- **anaphors**: *reflexive* and *reciprocal* pronouns i.e. *herself*, *each other*, ... -- **personal pronouns**: *her*, *him*, *they*, *it*, ... -- **possessive pronouns**: *ours*, *theirs*, *hers*, *his*, ... -- ... - -Every pronoun (pro-form, really) has an **antecedent**: that is, the phrase or concept it is in *reference* to. In contrast to pronouns, we also have **r-expressions**: an **independently referential** expression. These are names, proper nouns, descriptions, epithets, and the like: ex. *Alice*, *British Columbia*, *the man on the corner*, *the idiot*, etc; and have no antecedent. - -We say that a node and another node are **coreferential** (or **co-indexed**) if they refer to the same concept or entity. On tree diagrams, we often refer to this with numerical ($_0$, $_1$, ...) or alphabetical ($_i$, $_j$, $_k$) subscripts. (Though we could also indicate this with arrows, we prefer to reserve those for movement, so as to not clutter our diagrams too much.) This is a useful notion when it comes to pronouns. - -... - -The theory of binding operates under three fundamental principles. -- **Principle A**: an anaphor must be bound in its domain. -- **Principle B**: a (personal) pronoun must be free in its domain. -- **Principle C**: an r-expression may never be bound. - -Our principles imply various things. Principle A implies that: -- a reflexive must be *coreferential* with its antecedent - - (agreeing in person, number, and gender) -- the antecedent of a reflexive must *c-command* the reflexive -- the reflexive and its antecedent must be *in all the same nodes* that have a subject - - -### raising and control - -Consider the following sentences: -- *Alice seems to sleep often.* -- *Alice hopes to sleep often.* - -With our current knowledge, we would diagram these sentences near-identically. Yet a closer investigation reveals that they are in fact deeply structurally different. -- *It seems Alice sleeps a lot.* -- \* *It hopes Alice sleeps a lot.* - -## Advanced Syntax - -### on languages other than english - -We have so far approached our discussion of syntax entirely from the point of view of the English language. All of our motivations - our rationales, our counterexamples - have been given and centred around English. This begs the question: just how much of this *holds*, cross-linguistically? What of all the other wide and varied languages of the world - which, clearly, our frameworks must have been built not only to accommodate but to represent *well*, given we are discussing them now, more than fifty years after the Chomskyian revolution and more than a century after the field of linguistics recognized its Indo-European biases? - -We have discussed some principles that, clearly, cannot be a feature of all natural languages: like do-support. However, other concepts - like the Subjacency Condition - seem possibly broad enough to be applied across a variety of languages. Is that true? Is *anything* broad enough? (Alternatively: does a [universal grammar](https://en.wikipedia.org/wiki/Universal_grammar) exist?) - -This notion of *principles* that occur for some languages and do not occur for others forms what is either the framework of *Principles and Parameters*, or *Government and Binding Theory*. I do not understand the difference between them, and suspect what is above to be a mixture of both as neither were explicitly mentioned. Nevertheless, everything given here is for English, not some cross-linguistic model of the mind. English remains useful by virtue of being mine and many's L1 language - and by being such a *mess* of a language that its structure cannot be explained away trivially. - -### negation - -### ellipsis - -## References - -- ✨ [An Introduction to Syntactic Analysis and Theory](https://annas-archive.org/md5/11bbf70ff9259025bc6985ba3fa4083b) -- ✨ [The Science of Syntax](https://pressbooks.pub/syntax/) -- MIT 24.902: [2017](https://web.mit.edu/norvin/www/24.902/24902.html), [2015](https://ocw.mit.edu/courses/24-902-language-and-its-structure-ii-syntax-fall-2015/), [2003](https://ocw.mit.edu/courses/24-902-language-and-its-structure-ii-syntax-fall-2003/) diff --git a/linguistics/tense-ed.png b/linguistics/tense-ed.png deleted file mode 100644 index cac54d6..0000000 Binary files a/linguistics/tense-ed.png and /dev/null differ diff --git a/linguistics/tense-no-tense.png b/linguistics/tense-no-tense.png deleted file mode 100644 index 1ea1c6d..0000000 Binary files a/linguistics/tense-no-tense.png and /dev/null differ diff --git a/linguistics/tense-null.png b/linguistics/tense-null.png deleted file mode 100644 index 106e0b9..0000000 Binary files a/linguistics/tense-null.png and /dev/null differ diff --git a/linguistics/tense-s.png b/linguistics/tense-s.png deleted file mode 100644 index 9f8120f..0000000 Binary files a/linguistics/tense-s.png and /dev/null differ diff --git a/linguistics/tense-will.png b/linguistics/tense-will.png deleted file mode 100644 index 0d10a69..0000000 Binary files a/linguistics/tense-will.png and /dev/null differ diff --git a/linguistics/types.md b/linguistics/types.md deleted file mode 100644 index 437ab50..0000000 --- a/linguistics/types.md +++ /dev/null @@ -1,4 +0,0 @@ ---- -layout: linguistics -title: linguistics/types ---- diff --git a/math/algebra.md b/math/algebra.md new file mode 100644 index 0000000..3717445 --- /dev/null +++ b/math/algebra.md @@ -0,0 +1,69 @@ +--- +layout: algebra +title: mathematics/algebra +--- + +# algebra + +Modern algebra is the study of **algebraic structures**: groups, rings, fields, modules, and the like. These structures are very abstract: and so results can be applied to a wide variety of situations. + +## structures + +An **algebraic structure** is a set with a collection of *operations* and a finite set of *axioms* those operations must satisfy. + +A [**group**](group-theory) $G$ is a set with a single binary operation $⋆$ satisfying the following axioms: +- associativity: $∀a,b,c : (a⋆b)⋆c = a⋆(b⋆c)$ +- identity: $∃e, ∀a : e⋆a = a⋆e = a$ +- inverse: $∀a, ∃a^{-1} : a⋆a^{-1} = e$ +- An Abelian or **commutative group** satisfies an additional axiom: + - commutativity: $∀a,b : a⋆b=b⋆a$ + +A **monoid** is a group without an inverse operation. + +A [**ring**](ring-theory) $R$ is a set with two binary operations $+$ and $×$ satisfying the following axioms: +- $(R, +)$ is a *commutative group*: + - associativity: $∀a,b,c : (a+b)+c = a+(b+c)$ + - additive identity: $∃0, ∀a : 0+a = a+0 = a$ + - additive inverse: $∀a, ∃-a : a+(-a) = 0$ + - commutativity: $∀a,b : a+b=b+a$ +- $(R, ×)$ is a *monoid* + - associativity: $∀a,b,c : (a×b)×c = a×(b×c)$ + - multiplicative identity: $∃1, ∀a : 1×a = a×1 = a$ +- The *distributive laws* hold for + and ×: + - $∀a,b,c : (a+b) × c = (a×c)+(b×c)$ + - $∀a,b,c : a × (b+c) = (a×b) + (a×c)$ +- An Abelian or **commutative ring** satisfies an additional axiom: + - commutativity (of $×$): $∀a,b : a×b=b×a$ + +A **field** is a *commutative ring* where all elements sans $0$ have an inverse $a^{-1}$ under multiplication. Subsequently, $0 ≠ 1$. A field may be also thought of as a set on which addition, subtraction, multiplication, and division are defined and behave as they do on $ℝ$. + +A [**vector space**](linear-algebra) $V$ over a field $F$ of scalars is a set with a binary operation $+$ and a binary function satisfying the following axioms: +- $(V, +)$ is a *commutative group*: + - associativity: $∀u,v,w : (u+v)+w = u+(v+w)$ + - additive identity: $∃0, ∀v: 0+v = v+0 = v$ + - additive inverse: $∀v, ∃-v: v+(-v) = 0$ + - commutativity: $∀u,v : u+v=v+u$ +- $(V, )$ is a *scalar operation*: + - scalar identity: $∃1 ∈ F, ∀v ∈ V : 1v = v1 = v$ + - commutativity: $∀a,b ∈ F, ∀v ∈ V : (ab)v = a(bv)$ +- The *distributive laws* hold: + - $∀a ∈ F, ∀u,v ∈ V : a(u+v) = au+av$ + - $∀a,b ∈ F, ∀v ∈ V : (a+b)v = av + bv$ + +A **module** $M$ is a generalization of a *vector space* to function over a ring $R$ instead of a field. + +A [**lattice**](order-theory) $L$ is a set with two binary operations $∧$ and $∨$ satisfying the following axioms: +- commutativity: + - $∀a,b : a ∧ b = b ∧ a$ + - $∀a,b : a ∨ b = b ∨ a$ +- associativity: + - $∀a,b,c : a ∧ (b ∧ c) = (a ∧ b) ∧ c$ + - $∀a,b,c : a ∨ (b ∨ c) = (a ∨ b) ∨ c$ +- absorption: + - $∀a,b : a ∧ (a ∨ b) = a$ + - $∀a,b : a ∨ (a ∧ b) = a$ +- idempotence: + - $∀a : a ∧ a = a$ + - $∀a : a ∨ a = a$ + +An **algebra** $A$ over a field $F$ is a *vector space* equipped with an additional *bilinear product*. It is also common to consider algebras over a *ring* (and thus $A$ as a *module* with an additional product). diff --git a/math/analysis.md b/math/analysis.md new file mode 100644 index 0000000..7bfef62 --- /dev/null +++ b/math/analysis.md @@ -0,0 +1,6 @@ +--- +layout: analysis +title: mathematics/analysis +--- + +# analysis diff --git a/math/category-theory.md b/math/category-theory.md new file mode 100644 index 0000000..19100a7 --- /dev/null +++ b/math/category-theory.md @@ -0,0 +1,5 @@ +--- +layout: foundations +title: mathematics/foundations/category theory +--- + diff --git a/math/coding-theory.md b/math/coding-theory.md new file mode 100644 index 0000000..b080ed6 --- /dev/null +++ b/math/coding-theory.md @@ -0,0 +1,1049 @@ +--- +layout: algebra +title: mathematics/algebra/coding theory +--- + +# MATH 342: Coding Theory + + + +Sections 1-9, 11-12, 14 + +## Things I forget +- $M$ = number of codewords. the sphere packing bound +- $n$ = length of codewords +- $d$ = distance between codewords. or minimal distance of a code +- $k$ = the dimension of the code, number of row vectors in a generator matrix +- $q$ = the alphabet +- $V(n, q) = F_q^n$ +- (n, ?, ?)-code +- [?, ?, ?]-code +- r = ??? +- $C$: a code. +- $G$: the generator matrix. +- $H$: the parity-check matrix. + +## What is coding theory? + + + + +Coding theory deals with **codes**: ways to represent data robustly & securely. +MATH 342 will mostly cover the theory behind *detecting & correcting* errors. + + + +Definition: A **code** C is a set of codewords, for example: + +``` +C = { + w1 = 00000 + w2 = 01011 + w3 = 10101 + w4 = 11110 +} +``` + +A goal of codes is to *detect and correct errors*. How can we do this? Coming back to it later. + +Definition: A **q-ary** (n, M, D) code C is a set of codewords such that: +- $q$ = size of alphabet +- $n$ = length of codewords (when consistent) +- $M$ = number of codewords +- $d$ = *minimal distance* of the code + +A *2-ary code* is a **binary code**, a *10-ary code* is a **decimal code**. etc. + +Let $(F_q)^n$ denote the set of all ordered $n$-tuples $a = a_1 a_2 ... a_n$, where each $a_i \in F_q$. +The elements of $(F_q)^n$ are called *vectors* or *words*. +The order of the set $(F_q)^n$ is $q^n$. +A $q$-ary code of length $n$ is a subset of $(F_q)^n$. + +### Distance functions & Hamming distance + +Consider the distance: +- $d(x, y) = \text{# of places where they differ}$ +- $d(C) = min(d(x, y))$ such that $x ≠ y$ + +Definition: The **Hamming distance** of two codes a, b is the number of *places* where a and b differ. + +The **distance function** $dist(x, y)$ on a set $S$ satisfies: +- $dist(x, y) == dist(y, x)$ +- $dist(x, y) ≥ 0$ and $dist(x, y) == 0 ⟺ x == y$ +- $dist(x, z) ≤ dist(x, y) + dist(y, z)$ + +The last one is known as the *triangle inequality*. + +Aside: A set $S$ plus a distance function $dist$ is known as a **metric space**. + +### Error detection & correction + +Definition: The **minimum distance** between distinct codewords is denoted $d(C)$, and given to be $d(C) = min \{dist(x, y) | x, y ∈ C, x ≠ y\}$, for some code $C$. + +Given a code $C$, $d(C) = d$ (where $d(C)$ is the minimum distance in the code) +1. The code can *detect* up to $d-1$ errors. +2. The code can *correct* up to $d-1 / 2$ errors. + +### Nearest neighbour decoding + +Just how should we go about *decoding* these codes? Particularly when there is error correction involved. This is obvious, but... +the **nearest neighbour decoding** is the decoding such that the distance between the received word and a codeword is as *small* as possible. + +This *maximizes* the likelihood of correcting errors given the following assumptions: +- Every symbol is equally likely to have been received in error +- If a symbol is received in error, then each of the $q-1$ possible errors is equally likely. + +This is also known as a $q$-ary **symmetric channel**. + +### What makes a good code? + +**The main coding theory problem.** A good (n, M, D)-code has: +- a small $n$: for compactness & subsequently speed +- a large $M$: to fit many codewords & subsequently a variety of messages +- a large $d$: to correct many errors + +What is known as **the main coding theory problem** is a solution of how to balance these conflicting aims, by the way of *optimizing* one of the parameters for *given values* of the other two. + +The **usual version of the main coding problem** is to find the largest possible $M$, i.e. the code with the most codewords, given a fixed $n, d, q$. + +We denote $A_q(n, d)$ to be the largest M s.t. there exists a $q$-ary $(n, M, d)-code$. +- This is known for $d=1$: $A_q(n, 1) = q^n$ +- This is known for $d=n$: $A_q(n, n) = q$ + + + + + +### Equivalence of codes + +**Definition**: Two codes are *equivalent* if we can go from one to the other using elementary operations. Elementary operations are: +1. Permute rows. +2. Permute columns. +3. Permute all the symbols in a column. + +Any $q$-ary $(n,M,d)$-code over an alphabet $\{0,1,...,q-1\}$ is also equivalent to an $(n,M,d)$-code which contains the zero vector. +- Proof: This is trivial by ordering some code at the top and permuting the non-zero symbols to zero. + +### Weight of a vector + +The weight of a vector in $(F_2)^n$ is simply the count of $1$ bits. +- If $x, y \in (F_2)^n$, then $d(x,y) = w(x+y)$. +- If $x, y \in (F_2)^n$, then $d(x,y) = w(x) + w(y) - 2w(x ∩ y)$. + +Some theorems about oddness and evenness: +- If $d$ is odd, then a binary $(n,M,d$-code exists if and only if a binary $(n+1, M, d+1)$-code exists. +- If $d$ is odd, then $A_2(n+1, d+1) = A_2(n,d)$. Equivalently, if $d$ is even, then $A_2(n,d) = A_2(n-1,d-1)$. + +### Binomial coefficients + +Note the definition of the binomial: +$$\binom{n}{m} = \frac{n!}{m!(n-m)!}$$ + +An ordered selection of $m$ distinct objects from a set of $n$ distinct objects can be made in $\frac{n!}{(n-m)!}$ ways. + +An unordered selection of $m$ distinct objects from a set of $n$ distinct objects can be made in $\binom{n}{m} = \frac{n!}{m!(n-m)!}$ ways. + +todo: binomial coeffients +sphere packing bounds +perfect codes +balanced blocks +baby finite fields + + + +### Sphere Packing Bound + +Consider $(n, M, d)$ for some q-ary code C. +- n: length of words +- M: number of words +- d: minimum distance +- q: alphabet set (??) + +(n, M, d)-codes +[n, k, d]-codes are the same as (n, q^k, d)-codes - linear, so M = q^k and linear +[n, k]-codes leave d implied (also linear) + +Ham(r, q) + +Sphere packing is an application of the main coding theory problem. Fix q, n, d. Then $A_q(n, d) = max(M)$. + +Assume d is odd, and d = 2t + 1. + +$F_q^n$ = all words of length n + +Consider the idea of fitting codewords in a box: we can represent those as spheres, and so the distance does fancy stuff + +$S(w_i, t)$ = all words of distance ≤ t from $w_i$ +- where $w_i$ = the center +- t = radius + +$A_q(n, d)$ = the maximum number of disjoint spheres of radius t that fit into $F_q^n$ + +$N_t$ = the number of points in a sphere of radius t +$F_q^n$ has $q^n$ points + +**Sphere packing bound** +M = number of spheres ≤ (# points in $F_q^n$ / # points in a sphere) = ($q^n/N_t$) + +The **sphere-packing bound** or **Hamming bound** states that a q-ary $(n, M, 2t+1)$-code satisfies $M ≤ q^n$. + + +A code that achieves the sphere-packing bound is called a **perfect code**. Intuitively, for a perfect code, the $M$ speheres of radius $t$ centred on the words "fill" the whole space $(F_q)^n$ without overlapping. + +Example: Binary reptition codes: $n=3$, $d=3$, $q=2$: i.e. $C = \{000, 111\}$. +- $M ≤ 2^3 / N_t = 8/4 = 2$ + + +All binary repitition codes are perfect, i.e. when n = d + +Finding $N_t$ for a general n, q, t: +choose which $n$ symbol to +choose a new symbol: q-1 choices ??? +n(q-1) + + +Example: finding the bound for $q=3$, $n=7$, $d=5$. How big of a code can we find for these parameters? $d=5$, so $t=2$: $d=2t+1$ + +$N_t$ = 1 + (n 1)(q-1) + (n 2)(q-1)^2 = 99$ +$M ≤ 3^7/99 = 81*27/99 = 22.9$ +$A_3(n=7, d=5) ≤ 22$ + + + +## A Taste of Abstract Algebra + + + + +Recall: sphere packing. in $ℝ^2$, it's easy: points form a lattice. +If you fix the zero point my center is closed under addition +A **lattice** is a set of points closed under addition. + +$ℤ^n ⊆ ℝ^n$ + +We want to do the same for any code C that is closed under addition. +How do we add two vectors? + +$C ⊆ F_q^n =~ ℝ^n$ + +### Rings and Fields + +A **ring** $R$ is a set of elements with *two* operations + (addition) and • (multiplication) satisfying: +- $F$ is *closed* under $+$ and $•$ +- + is commutative, associative, and distributive; • is associative and distributive + - commutative on addition: $a+b = b+a$ + - associative: $(a+b)+c = a+(b+c)$, $a•(b•c) = (a•b)•c$ + - distributive: $a•(b+c) = a•b + a•c$ +- The additive identity element $0$ must exist + - $∀a ∈ R : a + 0 = a$ +- The multiplicative identity element $1$ must exist +- The additive inverse element $(-a)$ must exist + - $∀a ∈ F : a + (-a) = 0$ + +We will take "ring" to refer to **abelian/commutative rings**. This adds commutativity on multiplication (), and the multiplicative identity element $1$ ($∀a ∈ R : a • 1 = a$). +- unital + +- why with an identity????? do rings not normally have both additive and multiplicative identities????????? + +in every ring, 0 times something is 0 +0 + a = b +0b + ab = ab +R = \{0\} is a ring, not a field + +2ℤ has no 1 + +A **field** $F$ is a set of elements with *two* operations + (addition) and • (multiplication) satisfying: +- $F$ is *closed* under + and • +- + and • are commutative, associative, and distributive + - commutative: $a+b = b+a$, $a•b = b•a$ + - associative: $(a+b)+c = a+(b+c)$, $a•(b•c) = (a•b)•c$ + - distributive: $a•(b+c) = a•b + a•c$ +- The additive identity element $0$ must exist + - $∀a ∈ F : a + 0 = a$ +- The multiplicative identity element $1$ must exist + - $∀a ∈ F : a • 1 = a$ +- The additive inverse element $(-a)$ must exist + - $∀a ∈ F : a + (-a) = 0$ +- The multiplicative inverse element $a^{-1}$ must exist, when $a ≠ 0$ + - $∀a ∈ F : a • a^{-1} = 1$ + - is this true for 0???? + +Fields behave as standard operations on the rationals and reals. The only difference between a field and an (abelian) ring is the existence of a *multiplicative inverse*. + +A ring can also be regarded as having *three* operations, with - given by its status as an additive inverse, i.e. $a-b = a+(-b)$. + +A field can also be regarded as having *four* operations, with - and ÷ given by their definitions as inverses, i.e. $a-b = a+(-b)$ and $a÷b = a(b^{-1})$ for $b ≠ 0$. + +The operation $a•b$ can be more succienctly written as $ab$. The operation $a÷b$ can be alternatively writtten as $a/b$. + +### Euclidean Arithmetic + +For a given number $q + r/m$, $r$ is the **principle remainder**. + +$m$ **divides** $n$ $m|n$ if $n/m$ is an integer. + +A number is **prime** if its divisors are $1, n$ only. + +Theorem: $gcd(m, n) = gcd(m, n-q•m)$ + + + +### The modular integers + +Consider $ℤ_m = (\{0,1\}, +, •)$ +For example: $ℤ_2 = \{0,1\}$. +- $0+1=1$ +- $1+1=0$ +- $0•1=0$ +- $1•1=1$ + +Another example: $ℤ_4 = \{0,1,2,3\}$ + +`+` | 0 1 2 3 +----------- +0 | 0 1 2 3 +1 | 1 2 3 0 +2 | 2 3 0 1 +3 | 3 0 1 2 + +`•` | 0 1 2 3 +----------- +0 | 0 0 0 0 +1 | 0 1 2 3 +2 | 0 2 0 2 +3 | 0 3 2 1 + +Note the difference between the integers and the reals. In ℤ, $a/b$ may not exist - ℤ is a *ring*. In ℝ, $a/b$ always exists if $b != 0$ - ℝ is a *field*. + +### Division in $ℤ_m$ + +When does $1/n$ exist in $ℤ_m$? +- $1/n = a ⟺ a•n ≡ 1 (mod m)$ +- $⟺ a•n + b•m = 1$ for some b +- $⟺ gcd(n,m) = 1$ + +Some examples of modular math: +- $a•5 + b•7 = 1$, $3•5 + 2•7 = 1$ + +Ex. $ℤ_4$, $1/2$ does not exist +- gcd(2, 4) != 1 + +Theorem: $ℤ_m$ is a field if and only if $m$ is prime. +Proof: +- Assume $M$ is prime. +- We must show every $n$ in $\{1, 2, 3, ...\}$ is invertible if and only if $gcd(m, n) = 1$. +- If m$m is not prime, then $m = m_1*m_2$, $m_1, m_2 > 1$. Then $0 != m$ has no inverse gcd($m_1, m_1) = m_1 > 1$. +- ???? + +The **invertible elements** of $ℤ_m$ (units of ℤ_m) is expressed as $ℤ_m^x$. It is the **group of units**. +- ex. $ℤ_7^x = \{0, 1, 2, 3, 4, 5, 6\} = ℤ_7$ +- ex. $ℤ_8^x = \{1, 3, 5, 7\} ⊂ ℤ_7$ + +A **finite or Galois field** $𝔽_p, GF(p)$ is a field that, well, is finite. Every $ℤ_p$, $p$ is prime, is a finite field. Every $ℤ_{p^n}$ is also a field! + +$𝔽_4$ is a four element field. It is *not* necessarily $ℤ_4$! +- clarify: what? ℤ_7 counts as 𝔽_7? exact numbers do not matter? does 𝔽_4 exist, then? + +Consider $ℤ_6$. $2•3 = 0$. +A field has *no* zero divisors. Why? +- $a*b=0$, a and b are not zero +- $1/a (a*b = 0)$ and $a != 0$. b = $0/a = 0$ + +The **cancellation property** of a field states that if $a*b = a*c$ and $a != 0$, then $b=c$. + +### Back to coding theory + +We can now bridge our knowledge of fields back to coding theory: consider *alphabets* to be *finite fields*, $𝔽_q^n$ to be a *vector space*, with *codewords* as vectors ($w = (a_1, a_2, ..., a_n))$ + +An example is an ISBN code. +- textbook: $0-19-853803-0$ +- language, publisher, book number, checksum +- codewords: all vectors $(x_1, ..., x_{10})$ that satisfy the linear equation +- $1x_1 + 2x_2 + ... + 10x_{10} = 0$ +- because the checksum changes to satisfy that equation + +checksum: set such that it satisfies the equation + +why is the distance between isbns two? cause you have it 1 normally and 2 with the checksum + +The ISBN code can: +- Detect a single error +- Detect if we switch two numbers +- Can correct a single error *if we know where it is* + +Given an equation $1x_1 + 2x_2 + ... + 10x_{10} = 0$, $x_7 = -\frac{1}{7}(1x_1 + 2x_2 + ... + 10x{10} - 7x_7)$ + +These linear equations are linear, so linear algebra comes in handy... + +... missed a bunch ... + + + +### Vector Spaces on Finite Fields + +Our finite fields can be viewed as **vector spaces**, with the *elements* as **scalars** and the ordered $n$-tuples over $GF(q)$ as **vectors**. We denote the set of all vectors of length $n$ as $V(n,q)$. We must define two additional operations within $V(n,q)$: +- *addition* of vectors: if $x = (x_1, x_2, ..., x_n)$ and $y = (y_1, y_2, ..., y_n)$ and $x,y \in V(n, q)$, then $x+y = (x_1 + y_1, x_2 + y_2, ..., x_n + y_n$. +- *multiplication* of a vector by a scalar: if $x = (x_1, x_2, ..., x_n) \in V(n, q)$, $a \in GF(q)$, then $ax = (ax_1, ax_2, ..., ax_n)$. + +The formal axioms for a vector space are as follows. Given $\textbf{u} = (u_1, u_2, ..., u_n)$, $\textbf{v} = (v_1, v_2, ..., v_n)$, $u,v \in V(n, q)$: +- $\textbf{u}+\textbf{v} \in V(n, q)$ +- $(\textbf{u}+\textbf{v}) + \textbf{w} = \textbf{u} + (\textbf{v}+\textbf{w})$ +- $\textbf{0} = (0, 0, ..., 0) \in V(n, q)$ satisfying $\textbf{u} + \textbf{0} = \textbf{0} + \textbf{u} = \textbf{u}$. +- $-\textbf{u} = (-u_1, -u_2, ..., -u_n) \in V(n, q)$ +- $\textbf{u} + \textbf{v} = \textbf{v} + \textbf{u}$ +- (note that the above show $V(n, q)$ forms a commutative group under addition) +- $a\textbf{v} \in V(n, q)$ +- $a(\textbf{u} + \textbf{v}) = a\textbf{u} + a\textbf{v}$, $(a+b)\textbf{u} = a\textbf{u} + b\textbf{u}$ +- $(ab)\textbf{u} = a(b\textbf{u})$ +- $1\textbf{u} = \textbf{u}$ + +A (non-empty) *subset* $C$ of $V(n,q)$ is called a **subspace** of $V(n,q)$ if and only if $C$ is closed under addition and scalar multiplication. i.e: +- if $x,y \in C$, then $x+y \in C$ +- if $a \in GF(q)$ and $x \in C$, then $ax \in C$. + +The set $\{0\}$ and the set $V(n, q)$ are **trivial subspaces** and present for every vector space. A subspace is *non-trivial* if it contains at least one vector other than 0. +- q: is the subspace that is just the space trivial? + +A **linear combination** of $r$ vectors $\vec{v_1}, \vec{v_2}, ..., \vec{v_r}$ in $V(n, q)$ is a vector of the form $a_1\vec{v_1} + a_2\vec{v_2} + ... + a_r\vec{v_r}$ where $a_i$ are scalars. + +A set of vectors is **linearly dependent** if they can form the zero vector as a linear combination, without all scalars being 0. + +A set of vectors is **linearly independent** if they *cannot* form the zero vector as a linear combination without all scalars 0. + +Let $C$ be a subspace of $V(n,q)$. Then a *subset* of $C$ is called a **generating set** (or spanning set) of $C$ if every vector in $C$ can be expressed as a linear combination of the vectors in the subset. A generating set which is linearly independent is also called a **basis**. + +For a (non-trivial) subspace $C$ of $V(n,q)$, any generating set of $C$ contains a basis of $C$. + +For a basis of a subspace $C$ of $V(n,q)$, every vector of $C$ can be expressed *uniquely* as a linear combination of the basis vectors, and $C$ contains exactly $q^k$ vectors. It follows that any two bases of $C$ contain the same number $k$ vectors: this is called the **dimension** of $C$ and denoted $dim(C)$. + +chapter 4 done + +## Linear Codes + + + + + +A **linear code** over $GF(q)$ is a subspace of $V(n,q)$ for some positive integer $n$. Thus, a *subset* $C$ of $V(n,q)$ is a linear code if and only if it is: +- closed under addition: $u + v \in C$ for all $u,v \in C$ +- closed under scalar multiplication: $au \in C$, for all $u \in C$, $a \in GF(q)$ +- q: why subset? + +If $C$ is a k-dimensional subspace of $V(n,q)$, then the linear code $C$ is also called an **[n,k]-code** or a **[n,k,d]-code**. +- n: length of codewords +- k: dimension of $C$ +- d: minimum distance of $C$ + + +Note: a q-ary [n,k,d]-code is also a q-ary (n,q^k,d)-code but not every (n,q^k,d)-code is an [n,k,d]-code? do not understand +- also known as group codes + +The **weight** $w(\vec{x})$ of a vector $\vec{x} \in V(n,q)$ is the number of non-zero entries of $\vec{x}$. + +For an $x,y \in V(n,q)$, $d(x,y) = w(x-y)$. From this, the **minimum distance** of a linear code is equal to the smallest non-zero weight of the codewords. + +Linear codes are nice because we can determine the weight by simply examining all $M$ codewords. They're also nice because we can describe them by simply giving a basis. + +A $k×n$ matrix whose rows form a basis of a linear $[n,k]-code$ is called a **generator matrix** of the code. +- ex. the generator matrix $[0 1 1][1 0 1]$ describes a certain $[3,2,2]-code$ + +Linear $q-ary$ codes *cannot* be defined unless $q$ is a prime power. However, it's pretty easy to transform non-prime-power $q-ary$ codes into prime-power $q-ary$ codes. +- q: what's q again? (size of alphabet) what does a prime power entail again? + +### Equivalence of Linear Codes + +Two linear codes over $GF(q)$ are called **equivalent** if one can be obtained from the other by a combination of operations being: +- permutation of positions of the code +- multiplication of the symbols in a fixed position by a (non-zero) scalar +- q: ??? what? + +Two $k×n$ matrices generate equivalent linear $[n,k]-codes$ over $GF(q)$ if one matrix can be obtained from the other by a sequence of operations: +- (R1) Permutation of rows +- (R2) Multiplication of a row by a (non-zero) scalar +- (R3) Addition of a scalar multiple of one row to another +- (C1) Permutation of columns +- (C2) Multiplication of any column by a (non-zero) scalar + +Let $G$ be a generator matrix of an $[n,k]-code$. Then by performing the above operations, $G$ can be transformed to the **standard form** $[I_k | A]$, where $I_k$ is the $k×k$ identity matrix, and $A$ is a $k×(n-k)$ matrix. + +If two generator matrices can be transformed to the same standard form by *row operations* only, they generate the **same** code. If they must use the column operations to reach the same standard form, they are **equivalent**, but not necessarily the *same* code. +- q: not *necessarily* the same? if there is no way without column operations must they be different? + +chapter 5 done + +### Encoding with linear codes + + + +A **coset** of an $[n,k]$-code $C$ over $GF(q)$... + +Suppose that $C$ is an $[n,k]$-code over $GF(q)$ and that $\vec{a}$ is any vector in $V(n, q)$. Then the set $\vec{a} + C$ defined by $\vec{a} + C = \{\vec{a}+\vec{x} | \vec{x} \in C\}$ is called a **coset** of $C$. + + +For an $[n,k]$-code $C$ over $GF(q)$: +- *every vector* of $V(n,q)$ is in some coset of $C$ +- every coset contains *exactly* $q^k$ vectors +- *every coset* is either disjoint or identical (no partial overlap) + - Proof: todo... + +todo: proof + +The **coset leader** is the vector having minimum weight in a coset. (there may be multiple. it is generally fine to pick one randomly.) + +A (Slepian) **standard array** for an $[n,k]$-code $C$ is a $q^{n-k} × q^k$ matrix of *all* of the vectors in $V(n,q)$, with the first row consisting of the code $C$, and the following rows consisting of the cosets of $C$, each arranged with the coset leader first ($\vec{0}$ for $C$) and the codewords in consistent order, so that they line up by columns. + +$$\begin{bmatrix} + 0000 & 1011 & 0101 & 1110\\ + 1000 & 0011 & 1101 & 0110\\ + 0100 & 1111 & 0001 & 1010\\ + 0010 & 1001 & 0111 & 1100\\ +\end{bmatrix}$$ + +Simple decoding of linear codes may be performed with the observation that *cosets correspond to distance* with regard to error correction (rephrase). By choosing coset leaders to be minimum weight vectors, the standard array decoding is a nearest neighbour decoding scheme. An entry may be looked up in the standard array, and the corresponding codeword may be found by looking at the *top entry* of its column. + +### Probability of error correction + + +### Probability of error detection + + +### cosets + +### Parity-check matrices + +... missed a bunch ... + + +## Chapter 7: The dual code etc + +### Dual Codes + +The **inner product** $u ⋅ v$ of vectors $u$ and $v$ is the *dot product*. (todo: learn relation) If $u ⋅ v = 0$, $u$ and $v$ are **orthogonal**. +- $u ⋅ v = v ⋅ u$ +- $(λu + μv) ⋅ w = λ(u ⋅ w) + μ(v ⋅ w)$ + +For some linear $[n,k]$-code $C$, the **dual code** of $C$ (denoted $C^⊥$) is the set of vectors in $V(n,q)$ which are *orthogonal to every codeword of $C$*. We may generalize this to say: a vector $v ∈ V(n,q)$ belongs to $C^⊥$ if and only if $v$ is orthogonal to every row of $G$, the generator matrix for $C$. +- $C^⊥ = \{v ∈ V(n,q) | v ⋅ u = 0\}$, for all $u ∈ C$ + +$C^⊥$ has dimension $n-k$: in other words, if $C$ is an $[n,k]$-code over $GF(q)$, the dual code $C^⊥$ is a linear $[n,n-k]$-code. + +We know that $(C^⊥)^⊥ = C$. + +A **parity-check matrix** $H$ for an $[n,k]$-code $C$ is a generator matrix of $C^⊥$. Note that this means any linear code is completely specified by a parity-check matrix. If $G = [I_k | A]$ is the standard form generator matrix of an $[n,k]$-code $C$, then a parity-check matrix in **standard form** is $H = [-A^T | I_{n-k}]$. + +## Syndrome Decoding + +For a parity-check matrix $H$, a **syndrome** of a given row vector is... + +## Hamming Codes + + + + +The Hamming codes are an important family of single-error-correcting codes which are easy to encode and decode. They are linear and definable over any finite field $GF(q)$. + +Let $r$ be a positive integer and let $H$ be an $r×(2^r - 1)$ matrix whose columns are the distinct non-zero vectors of $V(r, 2)$. Then the code having $H$ as its parity-check matrix is called a **binary Hamming code** and is denoted by $Ham(r,2)$. + +To find a Hamming code, you start with the parity-check matrix $H$, then find the generator matrix $G$, then find the code $C$. + +$$H = [-A^⊤ | I_{n-k}]$$ +$$G = [I_k | A]$$ + +For a binary Hamming code of $r ≥ 2$: +- it is a $[2^r - 1, 2^r - 1 - r]$-code +- it has minimum distance $d=3$ +- it is a perfect code. + + + +### How to find $dist(C)$ from H +C = Nul(H), codewords = $[a_1 a_2 ... a_n]$ +$H·\begin{bmatrix}a_1\\ a_2\\ ...\\ a_n\end{bmatrix}$ = $\vec{0} \iff [a_1 a_2 ... a_n]·H^t = \vec{0}$ +where $H_i$ are the columns of $H$, $H = \begin{bmatrix}1&1&...&1\\H_1&H_2&...&H_n\\1&1&...&1\end{bmatrix}$ + +$H·\begin{bmatrix}a_1\\ a_2\\ ...\\ a_n\end{bmatrix} = a_1 H_1 + a_2 H_2 + ... a_n H_n = \vec{0}$ + +A codeword = a linear relation among $H_i$ +The smallest weight of a codeword = the shortest linear relation among $H_i$ + +ex. Consider a codeword of weight = 1, i.e. $\vec{w} = [0 ... 0 a_i 0 ... 0]$ +Relation: $a_i H_i = \vec{0} \iff H_i = \vec{0}$ +There is a codeword of weight 1 iff some $H_i = \vec{0}$ + +ex. Consider a codeword of weight 2, i.e. $\vec{w} = [0 a_i a_j 0]$ +Relation: $a_i H_i + a_j H_j _ \vec{0} \iff H_i = -\frac{a_j}{a_i} H_j$ + + +### Theorems about distance + +For an [n, k] q-ary code, any set of $d-1$ columns in the matrix H (parity check matrix) have to be linearly independent. + +A Hamming code has to have distance 3. + + +Suppose $C$ is a linear $[n, k]$-code over $GF(q)$ with parity-check matrix $H$. THen the minimum distance of $C$ is $d$ if and only if $d-1$ columns of $H$ are linearly independent but some $d$ columns are linearly dependent. + +... missed a bunch... + +## Polynomials + +We will denote the field $GF(q)$ by $F_q$, or frequently $F$ (with $q$ implicit). Then, $F[x]$ is the set of polynomials in $x$ with **coefficients** in $F$. + +Polynomials in $F[x]$ form a **ring**. They: +- Contain the zero vector $\vec{0}$ +- Are closed under addition: $(5x^2 + x^1) + (x^3 + 2x^2) = x^3 + 7x^2 + x^1$ +- Are closed under subtraction: $(5x^2 + x^1) - (x^3 + 2x^2) = -x^3 + 3x^2 + x^1$ +- Are closed under multiplication: $(5x^2 + x^1) · (x^3 + 2x^2)$ = 5x^5 + 11x^4 + 2x^3$ +- *Do not* have multiplicative inverses! So they are not a field. + +Just as $Z_m$ is a ring, so also is $F[x] / f(x)$: it is called + + +For some polynomial $f(x) = f_m x^m + f_{m-1} x^{m-1} + ... f_1 x + f_0$, $m$ is called the **degree** of $f(x)$, and $f_m$ is called the **leading coefficient** of $f(x)$. By convention, the degree of the zero polynomial is $-∞$. (why??) A polynomial is called **monic** if its leading coefficient is 1. Note that if $f(x), g(x) \in F[x]$, $deg(f(x)g(x)) = deg(f(x)) + deg(g(x))$. + +The division algorithm exists for polynomials just as it exists for the integers. For any pair of polynomails $a(x), b(x) \in F[x]$, there exists a unique pair of polynomials $a(x)$ and $b(x)$ + + +### The division algorithm for polynomials + +Some polynomial $a$ in $Z_p$ is a **root* of $f(x)$ ($\in Z_p [x]$) if $f(a) = 0$. +A polynomial $a$ is a root of $f(x)$ if and only if $x-a$ divides $f(x)$. + +A polynomial $f(x) \in F[x]$ is said to be **reducible** if $f(x) = a(x)b(x)$, where $a(x), b(x) \in F[x]$ and $deg(a(x)) < deg(f(x))$ and $deg(b(x)) < deg(f(x))$, i.e. it is a product of smaller degree polynomials. If $f(x)$ is not reducible, it is called **irreducible**. Just as any positive integer can be factorized uniquely into a product of prime numbers, any *monic* polynomial in $F[x]$ can be factorized uniquely into a product of irreducible monic polynomials. Some useful lemmas: +- A polynomial $f(x)$ has a linear factor $x-a$ if and only if $f(a) = 0$. +- A polynomial $f(x) \in F[x]$ of degree 2 or 3 is irreducible if and only if $f(a) ≠ 0$ for all $a \in F$. +- Over any field, $x^n - 1 = (x-1)(x^n-1 + x^n-2 + ... + x + 1)$. + +Proof for a polynomial of degree 2 or 3 being irreducible: ... + +The property of a polynomial being irreducible corresponds directly to the property in $ℤ$ of a number being prime. +- The ring $Z_m$ is a field if and only if $m$ is prime. +- The ring $F[x]/f(x)$ is a field if and only if $f(x)$ is irreducible in $F[x]$. +- For any prime number $p$ and any positive integer $h$, there exists an irreducible polynomial over $GF(p)$ of degree $h$. This corresponds to the existence of fields $GF(p^h)$ for all integers $h ≥ 1$. + +--- + +Factoring polynomials + +$f(x) \in Z_p [x]$ +$f(x) = C·p_1(x) p_2(x) ... p_m(x)$, $p_1 (x)$ implies it is monic, and irreducible + +$n \in ℤ$ +$n = \pm p_1 p_2 ... p_m$ + +4 | +irreducible polynomials: +x, x+1 +0b10 = 2 +0b11 = 3 +x^2 + x + 1 +0b111 = 7 +x^3 + x + 1 +0b1011 = 11 +x^3 + x^2 + 1 +0b1101 = 13 + +but no 5???? huh x^2 + 1 + +### Factorization of polynomials + +Factorization: a polynomial is fully reduced if it is fully a product of irreducible polynomials. +How to factor: just, kinda do it... try dividing by every irreducible polynomial + +All (monic) irreducible polynomials in $Z_2 [x]$: + +deg. | all. | reducible. | irreducible +-----|------|------------|------------ +1 | $x$, $x+1$ | - | $x$, $x+1$ +2 | $x^2$, $x^2 + 1$, $x^2 + x$, $x^2 + x + 1$ | $x^2 = x·x$, $x^2 + x = x(x+1)$, $x^2 + 1 = x^2 + 2x + 1 = (x+1) (x+1)$ | $x^2 + x + 1$ +3 | 8 poly | 6 poly | $x^3 + x + 1$, $x^3 + x^2 + 1$ + +Note: while these irreducible polynomials can be written as binary numbers, ex. $x+1 ~ 11_2 = 3$, and all of the above examples are prime, there is *not* a correspondence with primes. See $x^2 + 1$: $101_2 = 5$, but $x^2 + 1 ≡ (x+1)(x+1)$ (in $Z_2 [x]$). + +ex. Factor +$$f(x) = x^6 + x^5 + x^4 + x^3 + x^2 + x$$ + +1. Check whether an irreducible polynomial divides $f(x)$. + - Does $f$ have roots? Yes, $f(0) = 0$. So $x-0$ divides $f$. + - $f(x) = x g(x) = x(x^5 + x^4 + x^3 + x^2 + x + 1)$ + - $g(x) = (x^5 + x^4 + x^3 + x^2 + x + 1)$ + +2. Keep checking whether a linear? polynomial divides $f(x)$. + - Does $f$ have roots? Yes, $f(1) = 0$. So $x-1$ divides $f$. + - Note that $x-1 ≡ x+1$ mod $2$. + - $f(x) = x (x+1) h(x) = x(x+1)(x^4 + x^2 + 1)$ + - $h(x) = (x^4 + x^2 + 1)$ + +3. Now no linear? polynomials divide $f(x)$. What to do? + - $h(x)$ may still be factored... + - By an identity in $Z_2$, $(a + b + c)^2 ≡ (a^2 + b^2 + c^2)$ (in $Z_2$) + - Verify by long division on $(x^4 + x^2 + 1)$: $(x^2 + x + 1)^2$ + - $f(x) = x(x+1)(x^2+x+1)^2$ + +### Modular polynomials + +The idea behind modular polynomials: just modular arithmetic! $Z_p [x] / f(x)$ denotes the set of polynomials in $Z_p [x]$ that are congruent mod $f(x)$, i.e. $f(x) ≡ 0$. + +Definition: $g(x), f(x) \in Z_p [x]$ are congruent (mod $f(x)$), i.e. +$$g ≡ h (\mod f)$$ +if $g(x) = h(x) + t(x) f(x)$ for some $t(x)$. + +Every $g(x)$ is congruent to a unique polynomial $r(x)$ when $\deg r(x) < \deg f(x)$. +- Proof: just divide $g(x)$ by $f(x)$! +- $\frac{g}{f} = q(x) + \frac{r(x)}{f(x)}$ +- $g = f·q + r$ +- $f·q ≡ 0$ + + +Note: for $ℤ/(m)$, every $n$ is congruent to an element in $\{0, 1, ..., m-1\}$. +i.e. $Z_m = ℤ/(m)$. ??? what is this? + +Definition: Let $f(x) \in Z_p [x]$, $\deg f = n > 0$. +Then $Z_p [x] / f(x)$ = the set of all polynomials $r(x)$ with deg. r < n. + +Operations `+`, `*`: +- Do the usual operations with polynomials +- If the result has degree ≥ n, then take its residue mod $f(x)$. +- This is a *ring*! We have addition, subtraction (additive inverse), multiplication... no division +- *also* a vector space! isomorphic to $Z_p^n$ + - In a vector space: need addition and *scalar multiplication* + +ex. $Z_2 [x] / (x^3 + x)$ +(we'll consider $f(x) = x^3 + x$) + +Addition: polynomial addition +- $(1+x^2) + (x+x^2) = 1+x$ (as $2??? ≡ 0$ in $Z_2$) + +Multiplication: +- $(1+x^2)(x+x^2) = x+x^2+x^3+x^4 ≡ r(x)$ (mod $f$) +- $x^4 + x^3 + x^2 + x ÷ x^3 + x = x+1$ +- by long division +- So $(1+x^2)(x+x^2) ≡ x+1$ + + +### Quotient polynomial rings + +We've previously? defined quotient rings: $Z_m = ℤ/(m)$, where: +- $m ≡ 0$ +- $n ≡ r$ (mod $m$) +- $r =$ the principle residue +- $n/m = q + r/m$, for $0 <= r <= m$ +- $Z_m$ then equals the set of *prime residues*, i.e. $\{0, 1, 2, ..., m-1\}$ for $m$ + +We can also have quotient polynomial rings: $Z_p [x] / f(x)$, where +- $f(x) ≡ 0$ +- $g(x) ≡ r(x)$ (mod $f$) +- $r(x) =$ the prime residue +- $g(x)/f(x) = q(x) + r(x)/f(x)$ for some $deg(r) < deg(f)$ +- $Z_p [x] / f(x)$ then equals the set of *prime residues*, i.e. $r(x)$ st. $deg(r) < deg(f)$ (???) + +Operations on quotient rings are the usual: `+`, `·`. But when can we divide? +- We can divide when $1/n$ exists, which is equivalent to $\gcd(n, m) = 1$ +- To have a field: $m = p$ prime + +Ex. $Z_2 [x] / (x^3 + x + 1)$ +- Note: this is a field! $(x^3 + x + 1)$ is irreducible +- Goal: find $1/(x+1)$ +- $gcd(x^3 + x + 1, x+1)$ +- $gcd(x^3 + x + 1 - (x+1)(x^2 + x), x+1) = gcd(1, x+1)$ +- $gcd(1, x+1 - 1·(x+1)) = gcd(1, 0)$ +- $gcd(f, g)$ +- $gcd(f - g(x^2 + x), g)$ +- $1 = f-g(x^2 + x)$ +- $1 = -(x^2 + x) g$ (mod $f$) +- $1/g = x^2 + x$ + +Ex. $Z_3 [x] / (x^2 + x + 2)$ +- Goal: find $1/x$ +- $gcd(x^2 + x + 2, x)$ +- $gcd(x^2 + x + 2 - (x+1)·x, x)$ +- $gcd(2, x)$ +- $gcd(f, g)$ +- $gcd(f - (x+1)·g, g)$ +- $2 = f - (x+1)·g$ +- $1 = 2(f - (x+1)·g)$ +- $1/g = -2(x+1) = x+1$ + +So what about fields? $Z_p [x] / f(x)$, where: +- The characteristic $p$ is prime +- $f(x)$ (of degree n) is irreducible +- $\{a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1 | a_i \in Z_p\}$ $\simeq$ $Z_p^n$ +- The field has $q = p^n$ elements +- Fact: every finite field is of this type, $𝔽_q$ ++ Two fields with the same number of elements turn out to be the same field! They're *isomorphic*. + +Ex. $Z_2 [x] / (x^3 + x + 1) = 𝔽_8$ -> $Z_2 [x] / (x^3 + x^2 + 1) = 𝔽_8$ +- x -> x+1 + +Ex. $F_4 = Z_2 [x] / (x^2 + x + 1) \neq Z_4$ +- $\{0, 1, x, x+1\} \neq \{0, 1, 2, 3\}$ + +𝔽_4 | 0 | 1 | x | x+1 +----|---|---|---|---- +0 | 0 | 0 | 0 | 0 +1 | 0 | **1** | x | x+1 +x | 0 | x | x+1 | **1** +x+1 | 0 | x+1 | **1** | x + +Z_4 | 0 | 1 | 2 | 3 +----|---|---|---|--- +0 | 0 | 0 | 0 | 0 +1 | 0 | **1** | 2 | 3 +2 | 0 | 2 | 0 | 2 +3 | 0 | 3 | 2 | **1** + +Every $𝔽_q$ has a primitive element $a(x)$: +$$a(x), a^2(x), ..., a^{q-1}(x)$$ +are all distinct nonzero elements of $𝔽_q$ + +Ex. In $𝔽_4$, is $x$ primtive? +- $x x^2 = x+1$, $x^3 = x(x+1) = 1$ +- $b^q-1 = 1$ for any $0 \neq b \in 𝔽_q$ + +Little Fermat's theorem +- $b = [a(x)]^i$, $b^q-1 = [a(x)^q-1]^i = 1^i = 1$ + +Example from first midterm: +- In $Z_m$, $b^m-1 = 1$ holds iff $m=p$ is prime +- ex. Z_123$, $b^122 = 1$ +- $Z_m^x = \{n \in Z_m | 1/n exists\}$ +- $b^{|Z_m^x|} = 1$ if $b \in Z_m^x$ +- $Z_4^x = \{1,3\}$, $b^2 = 1$, $3^3 \neq 1$ + +## Cyclic codes + + + +A code $C$ is **cyclic** if $C$ is a linear code, and any cyclic shift of a codeword is also a codeword. In other words, whenever $a_0 a_1 ... a_n-1 \in C$, $a_n-1 a_0 a_1 ... a_n-2 \in C$. +- ex. the binary code $\{0000, 0101, 1010, 1111\}$ is cyclic. + +Consider the *ring* $R_n = 𝔽_q [x] / (x^n - 1)$: as as a vector space, $\simeq 𝔽_q^n$ + +A cyclic code $C$ is a (nonempty) $C \subseteq R_n$, where: +- $C$ is a subspace closed under `+` +- $C$ is closed under scalar multiplication +- $C$ is closed under `·` +- This is a *linear code* (??) + +We can consider a cyclic shift to be a *multiplication* by $x$. +- ex. $a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1$ = $(a_0, a_1, ..., a_n-1)$ +- $x·(a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1)$ +- $a_0 x + a_1 x^2 + a_2 x^3 + ... + a_n-1 x^n$ +- $x^n ≡ 1$, so... +- $a_n-1 + a_0 x + a_1 x^2 + ... + a_n-2 x^n-1$ + +$C$ is a **module** on the ring $R_n$, where a **module** is a vector space working over a ring, not a field +... + +missed some stuff... + +Every cyclic code is of the form $$. The polynomial $g(x)$ is uniquely determined by: +- $g(x)$ is monic +- $g(x)$ divides $x^n - 1$ in $𝔽_q [x]$ + +As a corollary, cyclic codes correspond with monic divisors of $x^n - 1$. + +Ex. find all binary cyclic codes of length $n=3$. +$$x^3 - 1 = (x+1)(x^2 + x + 1)$$ +So our two codes are: +- $ = \{\text{all even words}\}$ +- $ = \{000, 111\}$ + +Ex. find all binary cyclic codes of length 4 +- $x^4 - 1 = (x-1)^4$ +So our codes are: +- $<1> = R_n$ +- $ = \{\text{even words}\}$ +- $(x-1)^2 = ?$ +- $(x-1)^3 = \{000, 111\}$ +- $(x-1)^4 = 0$ + +Let $C \subseteq R_n$ be a cyclic code. Let $g(x) \in C$ be the lowest degree polynomial. +Then, $ = C$. Proof: +- To show $ \subseteq C$: $g(x) \in C \implies f·g \in C$ +- To show $ \supseteq C$: we must show that $h(x) = f(x)·g(x)$ for some unknown $h(x) \in C$. + - $\frac{h}{g} = f + {r}{g}$, $h = f·g + r$, $r = h-fg \in C$. + - $deg(r) < deg(g)$ by the definition of division. + - $r=0$ as the degree of $r$ is less than the lowest degree polynomial. + +Ex. $R_3 = 𝔽_2 [x] / (x^3 - 1)$ +- $g(x) = (x^2 + 1) = (x+1)^2$ +- $g(x) = (x^3 - 1) = (x+1)(x^2 + x + 1)$ +- $ =$ a cyclic code +- 0, $1·(x^2 + 1) = x^2 + 1$ +- $x(x^2 + 1) = x^3 + x \equiv 1+x$ +- $(x+1)(x^2 + 1) = x^3 + x^2 + x + 1 \equiv x^2 + x$ +- $x^2 (x^2 + 1) = x^4 + x^2 = x + x^2$ +- $(x^2 + 1)(x^2 + 1) = x^4 + 1 = x + 1$ +- $(x^2 + x + 1) (x^2 + 1) = x^3 + 1 + x^3 + x \equiv x + 1 + 1 + x \equiv 0$ +- $C = \{0, 1+x, x^2 + 1, x^2 + x\} = <1+x>$ + +Let $g(x)$ divide $x^n - 1$ in $𝔽_q (x)$. Then $dim = dim(C) = n-deg(g)$. +- Matrix: $g(x) = a_0 + a_1 x + a_2 x^2 + ... + a_m x^m$ +- i.e. $g(x), x·g(x), x^2 g(x), ..., x^{n-m-1} g(x)$ form a basis for the vector space $C \subseteq R_n$. + + +ideal + +--- + +### Vandermonde matrix + +$𝔽$: field, $ + +V = [1, 1, 1, ...; x_1, x_2, ..., x_n; x_1^2, x_2^2, ..., x_n^2; x_1^n-1, x_2^n-2, ..., x_n^n-1] + +If $x_i$ are distinct, then $V$ is invertible (i.e. the determinant is nonzero). + +The determinant: $dev V = Π_{i>j} (x_i - x_j)$ +examples... +the determinant of a vandermonde matrix equals its transpose + +### Lagrange interpolation + +ex. a continuous *function*: distinct x, not necessarily distinct y + +Theorem: If $x_i$ are distinct (for any numbers y_i), then there is a unique $p(x)$ of degree ≤ $n-1$ that interpolates the points (x_i, y_i). + +Proof: $P_n-1 = \{a_6 + a_1x + ... + a_n x^n-1 | a_i ∈ 𝔽\}$ +$P_n-1 kindaeq 𝔽^n$ +$p(x) <--> (a_0, a_1, ... a_n-1)$ + +find a $p(x)$ such that when evaluated at $x_1, x_2, ...$ it equals $y_1, y_2, ...$ + +theorem is equivalent to saying that T is invertible: for $T : P_n-1 --> 𝔽^n$ + +invertible means 1-1 and onto (equivalent, if you have one property you have the other) + +Rank-Nullity theorem: $dim P_n-1 = dim N(T) + dim R(T)$ + +1-1 means that the null space is zero +- why? + +onto means that the dimension of the range is all of $dim 𝔽^n = n$ + +The matrix of T: $P_n-1 -> 𝔽^n$ + +T = [T(1), T(x), T(x^2)...] + +### BCH codes + +Fix $x_1 x_2... x_n$ in 𝔽_q distinct +Fix some $r = redundancy ≤ n$ + +H = [1, 1, ..., 1; x_1, x_2, ... x_n; ...; x_1^n-1, x_2^n-1, ..., x_n^n-1] +dim C^⟂ = r, dim C = n - r + +the data of the codeword: n - r, the redundancy of the codeword: r \ +distance of C: any r columsn of H are independent (r columns form a rxr Vandermonde matrix) + +ex. construct a length 10 (n) code, distance 5 (r + 1) (can correct 2 errors) +Choose 𝔽_q, x_1, ..., x_10 in 𝔽_q +Take 𝔽_11 = Z_11, x_1 = 1, x_2 = 2, ..., x_10 = 10 + +H = [1, 1, ..., 1; 1, 2, ..., 10; 1, 2^2, ... 10^2, 1, 2^3, ..., 10^3] (# of rows: 4) + +BCH codes are not perfect, usually? but they do achieve the singleton bound + +## Singleton bound + +C: length n, distance d, q-ary, then: +M ≤ q^n-d+1 which (if C is linear): <=> dim C ≤ n-d + +BCH code: d = r+1 +singleton bound +dim C ≤ n-(d-1) = n-r + +M ≤ Singleton bound ≤ Sphere packing bound +M ≤ ex. 10-5+1 = 11^6 ≤ 11^10 + +next time: how to decode BCH codes + + + + + + +## The main linear coding theory problem + +The main linear coding theory problem: What is the "best" linear code? + + +Question: What is the "best" linear code? + +Answer: we construct H in 𝔽_q such that + +$$H = [H_1 H_2 \ldots H_4]$$ + +as n goes to the maximum for some fixed r. + +How many vectors $H_1 ... H_n$ in 𝔽_q^n, such that any d-1 are linearly independent, n = max(q, r) <--- ???? + +Last time: for $d=3$: the best codes are $Ham(q, r)$ + +Today: for $d=4$: we need to think about algebraic geometry?? + +How many vectors $H_1, ... H_n$ in 𝔽_q^n such that no 3 vectors lie on a planes + + + +### Projective spaces + +$$ℙ^{n-1} = \{\text{all lines through 0 in 𝔽_q^n}\}\}$$ + +For r=3: + +Ex. for $r=3$, $q=2$: we have a cube with its corners as $000, 010, 100, 110, 001, 011, 101, 111$ + + +``` +010 +| +| 110 +| +| +011------111 +| | +| | +| | +| | +011------101-----------100 +``` + + +A projective space reduces the dimension by 1. +- ex. a line $L$ in $𝔽_q^n$ = point in $ℙ^{n-1}$ +- ex. a plane $W$ in $𝔽_q^n$ = a line in $ℙ^{n-1}$ +- ex. a 3d subspace in $𝔽_q^n$ = a plane in $ℙ_q^n$ + +In ℙ^2, any two different lines intersect at a point. + +Ex. ℙ_2 over 𝔽_2 +``` + 010 + + 011 111 110 + +001 101 100 +``` +(the triangle with points intersecting) + + +### MLCT problem d=4 +Find the maximum number of lines L_1, L_2, ..., L_n in 𝔽_q^n such that no 3 lines lie on a plane. +This is the same as our earlier problem! +This is also the same as finding the maximum number of points in the projective space ℙ^r-1 such that 3 points are on a line. + +Ex. for r=3, 𝔽_q = 𝔽_2 +- Take all 4 finite points r > 3, 𝔽_q = 𝔽_2 +- Take all 2^r-1 finite points +- Ham(2, r) + parity bit = 2^r - 1 + 1 = 2^r = HamHat(2, r) +- for redundancy r+1 + +Ex. for the case q ≠ 2 +q = 3 +Cannot take all the finite points +We consider a parabola intersecting a line M in a plane, and consider the points intersected with it +For q > 2, r = 3, the maximum n is q+1 if q is odd, and q+2 if q is even. + + diff --git a/math/foundations.md b/math/foundations.md new file mode 100644 index 0000000..9057bea --- /dev/null +++ b/math/foundations.md @@ -0,0 +1,6 @@ +--- +layout: foundations +title: mathematics/foundations +--- + +# foundations diff --git a/math/index.md b/math/index.md new file mode 100644 index 0000000..7e03385 --- /dev/null +++ b/math/index.md @@ -0,0 +1,12 @@ +--- +layout: mathematics +title: mathematics +--- + +
+
+
+
+
+ +# mathematics is the art of *proof* diff --git a/math/information-theory.md b/math/information-theory.md new file mode 100644 index 0000000..d0cd75c --- /dev/null +++ b/math/information-theory.md @@ -0,0 +1,6 @@ +--- +layout: mathematics +title: mathematics/information theory +--- + +# information theory diff --git a/math/linear-algebra.md b/math/linear-algebra.md new file mode 100644 index 0000000..3d58b39 --- /dev/null +++ b/math/linear-algebra.md @@ -0,0 +1,666 @@ +--- +layout: algebra +title: mathematics/linear algebra +--- + +# Linear Algebra + +$$ℝ^n$$ + +## Fields and Vector Spaces + +A **field** $F$ is a *set* with two *binary operation* $+$ and $×$ satisfying the following axioms: +- $(F, +)$ is a *commutative group*: + - associativity: $∀a,b,c : a + (b + c) = (a + b) + c$ + - additive identity: $∃0, ∀a : 0 + a = a + 0 = a$ + - additive inverse: $∀a, ∃-a : a + (-a) = 0$ + - commutativity: $∀a,b : a + b = b + a$ +- $(F, ×)$ is a *commutative group* + - associativity: $∀a,b,c : a(bc) = (ab)c$ + - multiplicative identity: $∃1, ∀a : 1a = a1 = a$ + - multiplicative inverse: $∀a ≠ 0, ∃\frac{1}{a} : a\frac{1}{a} = 1$ + - commutativity: $∀a,b : ab = ba$ +- $×$ is distributive with respect to $+$ + - distributivity: $∀a,b,c : a(b + c) = (ab) + (ac)$ + +Intuitively, a field is a set on which addition, subtraction, multiplication and division are defined and behave as they do on $ℝ$. We often omit the multiplication sign, and write $a × a$ as simply $aa$. It can also be thought of as a *commutative ring* with a multiplicative inverse (sans 0). + +A **vector space** $V$ *over* a field $F$ is a set with a binary operation $+$ and a binary function satisfying the following axioms: +- $(V, +)$ is a *commutative group*: + - associativity: $∀u,v,w : u + (v + w) = (u + v) + w$ + - additive identity: $∃0, ∀v : 0 + v = v + 0 = v$ + - additive inverse: $∀v, ∃-v : v + (-v) = 0$ + - commutativity: $∀u,v : u + v = v + u$ +- $(V,)$ is a *scalar operation* + - scalar identity: $∃1 ∈ F, ∀v : 1v = v1 = v$ + - commutativity: $∀a,b ∈ F, ∀v : (ab)v = a(bv)$ +- The *distributive laws* hold: + - $∀a ∈ F, ∀u,v ∈ V : a(u + v) = au + av$ + - $∀a,b ∈ F, ∀v ∈ V : (a + b)v = av + bv$ + +Our definition of our vector space leads us to some facts: +- The zero vector is *unique* and always present. + - Proof: Suppose there were two zero vectors: $0$ and $0'$. Then $0' = 0 + 0' = 0' + 0 = 0$. +- Vector spaces are *non-empty*. + - Proof: By definition, the zero vector exists. +- The additive inverse for some $x$ is *unique*. + - Proof: Suppose there were two inverses: $-x$ and $-x'$. Then $-x + x = 0$ and $-x + x + -x' = -x'$, and so as $x + -x' = 0$ $-x = -x'$. +- If $V$ is a vector space over $F$ and $V ≠ \\{0\\}$, then $V$ is an *infinite set* over $F$. + - Proof: you can just keep adding things + +
+Examples + +Let $S = \\{(a_1, a_2) : a_1, a_2 ∈ ℝ\\}$. +For $(a_1, a_2), (b_1, b_2) ∈ S$ and $c ∈ ℝ$, we define: +- $(a_1, a_2) + (b_1, b_2) = (a_1 + b_1, a_2 - b_2)$ +- $c(a_1, a_2) = (ca_1, ca_2)$. + +This fails commutativity! It is thus not a vector space. + +Let $S = \\{(a_1, a_2) : a_1, a_2 ∈ ℝ\\}$. We define: +- $(a_1, a_2) + (b_1, b_2) = (a_1 + b_1)$ +- $c(a_1, a_2) = (ca_1, 0)$ + +This fails the existence of zero! It is thus not a vector space. +
+ +## Subspaces + +A subset $W$ of a vector space $V$ over a field 𝔽 is called a **subspace** of $V$ if $W$ is a *vector space* over 𝔽 with the operations of addition and scalar multiplication from $V$. +- . +- . + +A subset of $V$ is a **subspace** of V iff: +- the subset is non-empty +- the subset contains the zero vector +- it is closed under addition and multiplication + +Let $V$ be a vector space over $F$ and $S$ a nonempty subset of $V$. A vector $v \in V$ is a **linear combination** of vectors $s,t ∈ V$ if there exists a *finite* number of vectors $u_1, u_2, ..., u_n ∈ S$ and scalars $a_1, a_2, ..., a_n ∈ F$ such that $v = a_1 u_1 + a_2 u_2 + ... a_n u_n$. + +We call $a_1 ... a_n$ the *coefficients* of the linear combination. + + + +--- + + +## Introduction: The Complex Numbers + +A **complex number** is of the form $a + b\text{i}$, where $a, b$ are real numbers and $i$ represents the imaginary base. + +We denote the set of complex numbers as $ℂ$. +The complex numbers form a *vector space*. +Every element in the vector space can be expressed as a *linear combination* of $a + bi$. + +- ℂ: the set of complex numbers +- ℝ: the set of real numbers +- ℚ: the set of rational numbers + +Elementary operations on the complex numbers as are follows: +- $(a + bi) ± (c + di) = a ± c ± (b ± d)$ +- $(a + bi)(c + di) = (ac - bd) + (ad + bc)$ +- $i^2 = -1$ + +The **conjugate** of $z = a + bi$ is ... defined as $\bar{z} = a - bi$. +As long as $a^2 + b^2 ≠ 0$, the inverse of $z = a + bi$ is given by $z^{-1} = \frac{a - b}{a^2 + b^2} = \frac{\bar{z}}{a^2 + b^2}$. + +**Theorem**: Let $z, w ∈ ℂ$. Then: +- $\bar{\bar{z}} = z$ the conjugate of the conjugate +- $\bar{z ± w} = \bar{z} ± \bar{w}$ +- $\bar{zw} = \bar{z}\bar{w}$ +- $\bar{\frac{z}{w}} = \frac{\bar{z}}{\bar{w}}$ (if $w ≠ 0$) + - (where $\frac{z}{w} = z ⋅ w^{-1} = z \frac{1}{w}$) +- $z$ is a *real number* iff $\bar{z} = z$ + +Let $z = a + bi$, where $a, b ∈ ℝ$. The **absolute value** of $z$ is defined as the real number $\sqrt{a^2 + b^2}$. + +This absolute value shares many properties. For $z, w ∈ ℂ$: +- $z \bar{z} = a^2 + b^2 = \|z\|^2$, where $\|z\| = \sqrt{a^2 + b^2}$ +- $\frac{z}{w} = \frac{\|z\|}{\|w\|}$ where $w ≠ 0$ +- $\|z + w\| ≤ \|z\| + \|w\|$ +- $\|z\| - \|w\| ≤ \|z + w\|$ + +**The Fundamental Theorem of Algebra**: +Consider the polynomial $p(z) = a_n z^n + a_{n-1}z^{n-1} + ... + a_1 z + a_0$ where all $a$ are complex numbers. +If $n ≥ 1$, then $p(z)$ has a zero (*root*), i.e. there exists a complex number $c$ such tbar $p(c) = 0$. + +If $c$ is a root, $\bar{c}$ is also a root! + +Let 𝔽 be one of the following sets: ℚ, ℝ, ℂ + +https://math.stackexchange.com/questions/3492590/linear-combination-span-independence-and-bases-for-infinite-dimensional-vector + +Let $S$ be a nonempty subset of a vector space $V$. The **span** of $S$, denoted $span(S)$, is the set consisting of all linear combination of vectors in S. For convenience, we define $span(∅) = \\{0\\}$. + +The span of any subset $S$ of a vector space $V$ is a subspace of $V$. + +--- + +A subspace $S$ over a vector space $V$ is **linearly dependent** if there exists a finite number of distinct vectors $u_1, u_2, ..., u_n ∈ S$ and scalars $a_1, a_2, ..., a_n ∈ F$ such that $a_1, a_2, ..., a_n$ are not all zero and $a_1 u_1 + a_2 u_2 + ... a_n u_n = 0$. + +A subset $S$ of a vector space that is not linearly dependent is **linearly independent**. + + +Example: Consider the following set: $S = \\{(1, 0, 0, -1), (0, 1, 0, -1), (0, 0, 1, -1), (0, 0, 0, 1)\\}$ +Assume that $a v_1 + a_2 v_2 + a_3 v_3 + a_4 v_4 = 0$. then... + +as the determinant is nonzero, S is linearly independent. + + +Let $V$ be a vector space, and let $S_1 ⊆ S_2 ⊆ V$. If $S_1$ is linearly dependent, then $S_2$ is linearly dependent. If $S_2$ is linearly independent, then $S_1$ is also linearly independent. + +Let $S$ be a linearly independent subset of a vector space $V$, and let $v ∈ V : v ∉ S$. Then $S ∪ \\{v\\}$ is linearly independent iff $v ∈ span(S)$. + +A basis $B$ for a vector space $V$ is a *linearly independent* subset of $V$ that *spans* $V$. If $B$ is a basis for $V$, we also say that the vectors of $B$ form a basis for $V$. + +Let $V$ be a vector space and $β = \\{v_1, ..., v_n\\}$ be a subset of V. Then β is a basis for V iff every $v ∈ V$ can be **uniquely expressed** as a linear combination of vectors of β. that is, V can be written in the form $v = a_1 u_1 + a_2 u_2 ... a_n u_n$ for unique scalars a. + +If a vector space V is spanned by a finite set S, then some subset of S is a basis of V. So, V has a finite basis. +Proof: If $S = ∅$, then $V = span{S} = span{∅} = \span{𝕆}$ in which case ∅ is a basis for $V$. +If S ≠ ∅, then ∃ u_1 ∈ S : u_1 ≠ 𝕆. and we have two cases: span(u_1) = V we are done... + +(Replacement theorem) Let $V$ be a vector space that is spanned by a set G containing exactly n vectors, and let L be a linearly independent subset of V containing exactly m vectors. Then m ≤ n. Moreover, you can find a subset H of G + +Let V be a vector space with dimension n. +- any finite spanning set for V contains at least n vectors, and a spanning set for V that contains exactly n vectors is a basis for V. + +Theorem 1.4: Let $W$ be a subspace of a finite-dimensional vector space $V$. Then $W$ is also finite-dimensional (and dim W ≤ dim V). Moreover if dim W = dim V, then V = W. + +--- + +## Linear Transformations + +Let $V$ and $W$ be vector spaces (over a field $F$). + +A function $T: V → W$ is a **linear transformation** from $V$ into $W$ if $∀x,y ∈ V, c ∈ F$ we have $T(cx + y) = cT(x) + T(y)$. Subsequently: +- $T(x + y) = T(x) + T(y)$ +- $T(cx) = cT(x)$ +- $T(0) = 0$ +- $T(\sum_{i=1}^n a_i x_i) = \sum_{i=1}^n a_i T(x_i)$ + +Let $T: V → W$ be a linear transformation. + +The **kernel** (or null space) $N(T)$ of $T$ is the set of all vectors in $V$ such that $T(x) = 0$: that is, $N(T) = \\{ x ∈ V : T(x) = 0 \\}$. + +The **image** (or range) $R(T)$ of $T$ is the subset of $W$ consisting of all images (under $T$) of elements of $V$: that is, $R(T) = \\{ T(x) : x ∈ V \\}$. + +Theorem: The kernel $N(T)$ and image $R(T)$ are subspaces of $V$ and $W$, respectively. +
+Proof +We shall denote the zero vector of $V$ and $W$ as $0_v$ and $0_w$, respectively. + +Let $x,y ∈ N(T)$ and $c ∈ F$. As $T(0_v) = 0_w$, $0_v ∈ N(T)$. Then $T(cx + y) = cT(x) + T(y) = 0_w + 0_w = 0_w$, as $x$ and $y$ are in the null space. Hence any linear combination of $x$ and $y$ in the null space is in the null space. So as $N(T) ⊆ V$ by definition, $N(T)$ is a subspace of $V$. ∎ + +Let $x,y ∈ R(T)$ and $c ∈ F$. As $T(0_v) = 0_w$, $0_w ∈ R(T)$. Then there exist $v,w ∈ V$ such that $T(v) = x$ and $T(w) = y$. So $T(v + cw) = T(v) + cT(w) = x + cy$. Hence any linear combination of $x$ and $y$ is in the image. So as $R(T) ⊆ W$ by definition, $R(T)$ is a subspace of $W$. ∎ + +
+ +Theorem: If $β = \\{ v_1, v_2, ... v_n \\}$ is a basis for $V$, then $R(T) = span(\\{ T(v_1), T(v_2), ..., T(v_n) \\})$. +
+Proof + +Clearly, $T(v_i) ∈ R(T)$ for all $i$. As $R(T)$ is a subspace of $W$ by the previous theorem, $R(T)$ *contains* $span(\\{ v_1, v_2, ... v_n \\}) = span(T(β))$. + +Now consider an arbitrary $w ∈ R(T)$. By definition, there exists a $v ∈ V$ such that $w = T(v)$. As $β$ is a basis for $V$, we can consider $v$ as a linear combination of some basis vectors in $β$. As $T$ is linear, it thus must be the case that $T(v) ∈ span(T(β))$. So $R(T) = span(T(β))$. ∎ + +
+ +For a finite-dimensional $N(T)$ and $R(T)$: the **nullity** and **rank** of $T$ are the dimensions of $N(T)$ and $R(T)$, respectively. + +**Rank-Nullity Theorem**: If $V$ is *finite-dimensional*, then $dim(V) = nullity(T) + rank(T)$. +
+Proof + +Let $dim(V) = n$, let $nullity(T) = k$, and let $β_N$ be a basis for $N(T)$. As $N(T) ⊆ V$, we may extend $β_N$ to a basis $β$ for $V$. + +We assert that $S = \\{T(v_{k+1}), T(v_{k+2}), ..., T(v_n) \\}$. We must prove this. + +... + +So $S$ is linearly independent. As it contains $n-k$ linearly independent vectors in $R(T)$, it is a basis for $R(T)$. + +
+ +Recall that a *function* definitionally maps *each* element of its domain to *exactly* one element of its codomain. +- A function is **injective** (or **one-to-one**) iff each element of its domain maps to a *distinct* element of its codomain, that is, $f(x) = f(y) → x = y$. +- A function is **surjective** (or **onto**) iff each element of the codomain is mapped to by *at least* one element in the domain, that is, $R(T) = W$. +- A function is **bijective** iff it is surjective and injective. Necessarily, a bijective function is invertible, which will be formally stated & proven later. + +Theorem: $T$ is injective iff $N(T) = \\{0\\}$. +
+Proof + +Suppose that $T$ is injective. Consider some $x ∈ N(T)$. Then $T(x) = 0 = T(0)$. As $T$ is injective, $x = 0$, and so $N(T) = \\{0\\}$. ∎ + +Now suppose that $N(T) = \\{0\\}$. Consider some $T(x) = T(y)$. Then $T(x) - T(y) = T(x-y) = 0$. So $x-y ∈ N(T) = \\{0\\}$, and so $x-y = 0$ and $x = y$. So $T$ is injective. ∎ + +
+ +Theorem: For $V$ and $W$ of equal (and finite) dimension: $T$ is injective iff it is surjective. +
+Proof + +We have that $T$ is injective iff $N(T) = \\{0\\}$ and thus iff $nullity(T) = 0$. So $T$ is injective iff $rank(T) = dim(R(T)) = dim(W)$, from the Rank-Nullity Theorem. $dim(R(T)) = dim(W)$ is equivalent to $R(T) = W$, which is the definition of surjectivity. So $T$ is injective iff it is surjective. ∎ + +
+ +Theorem: Suppose that $V$ has a finite basis $\\{ v_1, v_2, ..., v_n \\}$. For any vectors $w_1, w_2, ... w_n$ in $W$, there exists *exactly* one linear transformation such that $T(v_i) = w_i$ for $i = 1, 2, ..., n$. +
+Proof +... +
+ +Theorem: Suppose that $V$ has a finite basis $\\{ v_1, v_2, ..., v_n \\}$. If $U, T : V → W$ are linear and $U(v_i) = T(v_i)$ for all $i$, then $U = T$. +
+Proof +... +
+ +## Composition of Linear Transformations + +Let $V$, $W$, and $Z$ be vector spaces. + +Theorem: Let $T, U : V → W$ be linear. Then their sum and scalar products are linear. +
+Proof +... +
+ +Theorem: The set of all linear transformations (via our definitions of addition and scalar multiplication above) $V → W$ forms a vector space over $F$. We denote this as $\mathcal{L}(V, W)$. +- If $V = W$, we write $\mathcal{L}(V)$. +
+Proof +... +
+ +Let $T, U : V → W$ be arbitrary functions. We define **addition** $T + U : V → W$ as $∀x ∈ V : (T + U)(x) = T(x) + U(x)$, and **scalar multiplication** $aT : V → W$ as $∀x ∈ V : (aT)(x) = aT(x)$ for all $a ∈ F$. + +Theorem: Let $T : V → W$ and $U : W → Z$ be linear. Then their composition $UT : V → Z$ is linear. +
+Proof +Let $x,y ∈ V$ and $c ∈ F$. Then: + +$$UT(cx + y)$$ + +$$= U(T(cx + y)) = U(cT(x) + T(y))$$ + +$$= cU(T(x)) + U(T(y)) = c(UT)(x) + UT(y)$$ + +
+ +Theorem: Let $T, U_1, U_2 ∈ \mathcal{L}(V)$, and $a ∈ F$. Then: +- $T(U_1 + U_2) = TU_1 + TU_2$ +- $(U_1 + U_2)T = U_1 T + U_2 T$ +- $T(U_1 U_2) = (TU_1) U_2$ +- $TI = IT = T$ +- $a(U_1 U_2) = (aU_1) U_2 = U_1 (aU_2)$ +
+Proof +... + +
+ +## Invertibility and Isomorphism + +- Let $V$ and $W$ be vector spaces. +- Let $T: V → W$ be a linear transformation. +- Let $I_V: V → V$ and $I_W: W → W$ denote the identity transformations within $V$ and $W$, respectively. + +A function $U: W → V$ is an **inverse** of $T$ if $TU = I_W$ and $UT = I_V$.
+If $T$ has an inverse, then $T$ is **invertible**. + +Theorem: Consider a linear function $T: V → W$. +- If $T$ is invertible, it has a *unique* inverse $T^{-1}$. +- If $T$ is invertible, $T^{-1}$ is invertible with the inverse $T$. +- A function is invertible if and only iff it is bijective. +
+Proof +... +
+ +Theorem: If $T$ is linear and invertible, $T^{-1}$ is linear and invertible. +
+Proof + +Let $y_1, y_2 ∈ W$ and $c ∈ F$. As $T$ is bijective, there exist unique vectors $x_1$ and $x_2$ such that $T(x_1) = y_1$ and $T(x_2) = y_2$. So $x_1 = T^{-1}(y_1)$ and $x_2 = T^{-1}(y_2)$. So: + +$$T^{-1}(y_1 + cy_2) = T^{-1}[T(x_1) + cT(x_2)] = T^{-1}[T(x_1 + cx_2)] = x_1 + cx_2 = T^{-1}(y_1) + cT^{-1}(y_2)$$ + +Thus $T^{-1}$ is linear. We may see it is invertible by definition. ∎ + +
+ +$V$ is **isomorphic** to $W$ if there exists an *invertible* linear $T : V → W$ (an **isomorphism**). + +Lemma: For finite-dimensional $V$ and $W$: If $T: V → W$ is invertible, then $dim(V) = dim(W)$. +
+Proof + +As $T$ is invertible, it is bijective. So $nullity(T) = 0$, $rank(T) = dim(R(T)) = dim(W)$. So by the Rank-Nullity Theorem $dim(V) = dim(W)$. ∎ + +
+ +Theorem: $V$ is isomorphic to $W$ iff $dim(V) = dim(W)$. Subsequently: +- $V$ is isomorphic to $F^n$ iff $dim(V) = n$. +
+Proof + +Suppose that $V$ is isomorphic to $W$ and that $T : V → W$ is an isomorphism. As $T$ is an isomorphism, it is invertible, and so by our earlier lemma $dim(V) = dim(W)$. + +Now suppose that $dim(V) = dim(W)$. Let $β = \\{v_1, v_2, ..., v_n\\}$ and $γ = \\{w_1, w_2, ..., w_n\\}$ be bases for $V$ and $W$, respectively. Then as $V$ and $W$ are of equal dimension there must exist $T : V → W$ such that $T$ is linear and $T(v_i) = w_i$ for all $i$. Thus $R(T) = span(T(β)) = span(γ) = W$, and $T$ is surjective. As $V$ and $W$ are of equal dimension, $T$ is also injective. So $T$ is bijective, and so $T$ is an isomorphism. ∎ + +
+ +## Linear Transformations as Matrices + +- Let $V, W$ be finite-dimensional vector spaces (over a field $F$). +- Let $T, U : V → W$ be linear transformations from $V$ to $W$. +- Let $β$ and $γ$ be ordered bases of $V$ and $W$, respectively. +- Let $a ∈ F$ be a scalar. + +An **ordered basis** of a finite-dimensional vector space $V$ is, well, an ordered basis of $V$. We represent this with exactly the same notation as a standard unordered basis, but will call attention to it whenever necessary. +- For the vector space $F^n$ we call $\\{ e_1, e_2, ..., e_n \\}$ the **standard ordered basis** for $F^n$. +- For the vector space $P_n(F)$ we call $\\{ 1, x, ..., x^n \\}$ the **standard ordered basis** for $P_n(F)$. + +Let $a_1, a_2, ... a_n$ be the unique scalars such that $x = Σ_{i=1}^n a_i u_i$ for all $x ∈ V$. The **coordinate vector** of $x$ relative to $β$ is $\begin{pmatrix} a_1 \\ a_2 \\ ... \\ a_n \end{pmatrix}$ (vert) and denoted $[x]_β$. + +The $m × n$ matrix $A$ defined by $A_{ij} = a_{ij}$ is called the **matrix representation of $T$ in the ordered bases $β$ and $γ$**, and denoted as $A = [T]_β^γ$. If $V = W$ and $β = γ$, we write $A = [T]_β$. + +Theorem: $[T + U]_β^γ = [T]_β^γ + [U]_β^γ$ and $[aT]_β^γ = a[T]_β^γ$. +
+Proof +... +
+ +--- + +Let $A$ be a $n × n$ matrix. Then $A$ is **invertible** iff there exists an $n × n$ matrix $B$ such that $AB = BA = I$. + +Theorem: If $A$ is invertible, the matrix $B$ is unique, and denoted $A^{-1}$. +
+Proof +Suppose there existed another inverse matrix $C$. Then $C = CI = C(AB) = (CA)B = IB = B$. +
+ +Theorem: For finite-dimensional $V$ and $W$: $T$ is invertible iff $[T]_β^γ$ is invertible, and $[T^{-1}]_γ^β = ([T]_β^γ)^{-1}$. Subsequently: +- Let $U : V → V$ be linear. $U$ is invertible iff $[U]_β$ is invertible, and $[U^{-1}]_β = ([T]_β)^{-1}$. +- Let $A$ be an $n × n$ matrix. $A$ is invertible iff $[L_A]$ is invertible, and $(L_A)^{-1} = L_{A-1}$. +
+Proof +... +
+ +Theorem: Let $V$ and $W$ be of finite dimensions $n$ and $m$ with ordered finite bases $β$ and $γ$, respectively. The function $Φ : \mathcal{L}(V,W) → M_{m×n}(F)$ where $Φ(T) = [T]_β^γ$ for all $T ∈ \mathcal{L}(V,W)$ is an isomorphism. +That is, the set (really *vector space*) of all linear transformations between two vector spaces $V$ and $W$ itself is isomorphic to the vector space of all $m × n$ matrices. Subsequently: +- For $V$ and $W$ of finite dimensions $n$ and $m$, $\mathcal{L}(V,W)$ is of finite dimension $mn$. +
+Proof +... +
+ +The **standard representation** of an $n$-dimensional $V$ with respect to its ordered basis $β$ is the function $ϕ_β : V → F^n$ where $ϕ_β(x) = [x]_β$ for all $x ∈ V$. + +Theorem: For any $V$ with ordered basis $β$, $ϕ_β$ is an isomorphism. +
+Proof +... +
+ +## The Change of Coordinate Matrix + +- Let $V$ be a finite-dimensional vector space (over a field $F$). +- Let $B$ and $β'$ be two ordered bases for $V$. +- Let $T$ be a linear operator on $V$. + +Theorem: Let $Q = [I_V]_{β'}^β$. Then $Q$ is invertible, and for any $v ∈ V$, $[v]_β = Q[v]_{β'}$. +
+Proof +... +
+ +We call such a matrix $Q = [I_V]_{β'}^β$ a **change of coordinate matrix** and say that $Q$ **changes β'-coordinates into β-coordinates**. + +Let $Q = [I_V]_{β'}^β$. + +Theorem: $Q^{-1}$ changes β-coordinates into β'-coordinates. +
+Proof +... +
+ +Theorem: $[T]_{β'} = Q^{-1} [T]_β Q$ +
+Proofs +... +
+ +## Dual Spaces + +The **dual space** of a vector space $V$ is the vector space $\mathcal{L}(V,F)$ and is denoted by $V^*$. + +- Let $V, W$ be finite-dimensional vector spaces (over a field $F$). +- Let $T, U : V → W$ be linear transformations from $V$ to $W$. +- Let $β$ and $γ$ be ordered bases of $V$ and $W$, respectively. + +Theorem: For a finite-dimensional $V$: $dim(V^*) = dim(\mathcal{L}(V,F)) = dim(V) ∙ dim(F) = dim(V)$. + +Corollary: $V$ and $V^*$ are isomorphic. +
+Proof +$dim(V) = dim(V^*)$ and so they are isomorphic. +
+ +The **dual basis** of a basis $β$ (of a vector space $V$) is the ordered basis $β^* = \\{ f_1, f_2, ..., f_n \\}$ of $V^*$ that satisfies the $i$th coordinate function $f_i(x_j) = δ_{ij} (1 ≤ i, j ≤ n)$. Recall that $δ_{ij} = 1$ if $i=j$, and $0$ otherwise. + +Theorem: Let $β = \\{ x_1, x_2, ..., x_n \\}$. Let $f_i(1 ≤ i ≤ n)$ be the $i$th coordinate function wrt. $β$, and let $β^* = \\{ f_1, f_2, ..., f_n \\}$. Then $ + +... + +Theorem: For any linear transformation $T : V → W$, the mapping $T^t : W^\* → V^\*$ defined by $T^t(g) = gT$ for all $g ∈ W^\*$ is a linear transformation, and $[T^t]_{γ^\*}^{β^\*} = ([T]_β^γ)^t$ +
+Proof +... +
+ + + +## Systems of Linear Equations + +This section is mostly review. + + + +--- + +## Determinants, summarized + +Determinants are important for future sections. We state facts here without proof. + +Let $A$ be a matrix containing entries from a field $F$. + +The **determinant** of an $n×n$ (square) matrix $A$ is a scalar in $F$, and denoted $\|A\|$. The determinant is calculated as follows: +- For a $1 × 1$ matrix $A = [a]$, $\|A\| = a$ +- For a $2 × 2$ matrix $A = \begin{bmatrix} a & b \\\\ c & d \end{bmatrix}$, $\|A\| = ac - bd$ +- For an $n × n$ matrix $A = \begin{bmatrix} a_{1,1} & a_{1,2} & ... & a_{1,n} \\\\ a_{2,1} & a_{2,2} & ... & a_{2,n} \\\\ ⋮ & ⋮ & ⋱ & ⋮ \\\\ a_{n,1} & a_{n,2} & ... & a_{n,n} \end{bmatrix}$, $\|A\| = Σ^n_{i=1} (-1)^{i+j} A_{i,j} \|A^\~_{i,j}\|$. + +The last one deserves some additional exposition: todo + +The determinant has a number of nice properties that make it of fair interest. +1. $\|B\| = -\|A\|$ if $B$ is a matrix obtained by exchanging any two rows or columns of $A$ +2. $\|B\| = k\|A\|$ if $B$ is a matrix obtained by multiplying $A$ by some scalar $k$ +3. $\|B\| = \|A\|$ if $B$ is a matrix obtained by adding a multiple of a column or row to a *different* column or row +4. $\|A\| = 1 $ if $A$ is the identity matrix +5. $\|A\| = 0 $ if either the rows or columns of $A$ are not linearly independent +6. $\|AB\| = \|A\|\|B\|$ if $A$ and $B$ are both $n×n$ matrices +7. $\|A\| ≠ 0 $ iff $A$ is invertible +8. $\|A\| = \|A^t\|$ +9. $\|A\| = \|B\|$ if $A$ and $B$ are *similar* + +Thus, we can say that the determinant *characterizes* square matrices (and thus linear operations), somewhat. It is a scalar value with a deep relation to the core identity of the matrix, and changes regularly as the matrix changes. + +## Eigenvalues and Eigenvectors + +- Let $V$ be a finite-dimensional vector space over a field $F$. +- Let $T: V → V$ be a linear operator on $V$. +- Let $β$ be an ordered basis on $V$. +- Let $A$ be in $M_{n×n}(F)$ (a square $n×n$ matrix with entries in $F$). + +$T$ is **diagonalizable** if there exists an ordered basis $β$ for $V$ such that $[T]_β$ is a *diagonal matrix*. +$A$ is **diagonalizable** if $L_A$ is diagonalizable. + +A nonzero vector $v ∈ V$ is an **eigenvector** if $∃λ ∈ F$: $T(v) = λv$. +The corresponding scalar $λ$ is the **eigenvalue** corresponding to the eigenvector $v$. +A nonzero vector $v ∈ F^n$ is an **eigenvector** of $A$ if $v$ is an eigenvector of $L_A$ (that is, $∃λ ∈ F$: $Av = λv$) +The corresponding scalar $λ$ is the **eigenvalue** of $A$ corresponding to the eigenvector $v$. + +The terms *characteristic vector* and *proper vector* are also used in place of *eigenvector*. +The terms *characteristic value* and *proper value* are also used in place of *eigenvalue*. + +Theorem: $T: V → V$ is diagonalizable if and only if $V$ has an ordered basis $β$ consisting of eigenvectors of $T$. +
+Proof +... +
+ +Corollary: If $T$ is diagonalizable, and $β = \\{v_1, v_2, ..., v_n\\}$ is an ordered basis of eigenvectors of $T$, and $D = \[T\]_β$, then $D$ is a diagonal matrix with $D_{i,i}$ being the eigenvalue corresponding to $v_n$ for any $i ≤ n$. +
+Proof +... +
+ +To *diagonalize* a matrix (or a linear operator) is to find a basis of eigenvectors and the corresponding eigenvalues. + +Theorem: A scalar $λ$ is an eigenvalue of $A$ if and only if $\|A - λI_n\| = 0$, that is, the eigenvalues of a matrix are exactly the zeros of its characteristic polynomial. +
+Proof +A scalar $λ$ is an eigenvalue of $A$ iff $∃v ≠ 0 ∈ F^n$: $Av = λv$, that is, $(A - λI_n)(v) = 0$. +... todo +
+ +The **characteristic polynomial** of $A$ is the polynomial $f(t) = \|A - tI_n\|$. +The **characteristic polynomial** of $T$ is the characteristic polynomial of $[T]_β$, often denoted $f(t) = \|T - tI\|$. + +Theorem: The characteristic polynomial of $A$ is a polynomial of degree $n$ with leading coefficient $(-1)^n$. +
+Proof +... +
+ +Corollary: $A$ has at most $n$ distinct eigenvalues. +
+Proof +... +
+ +Theorem: A vector $v ∈ V$ is an eigenvector of $T$ corresponding to an eigenvalue $λ$ iff $v ∈ N(T - λI)$ and $v ≠ 0$. +
+Proof +... +
+ + + +## Diagonalizability + +- Let $V$ be a finite-dimensional vector space over a field $F$. +- Let $T: V → V$ be a linear operator on $V$. +- Let $A$ be in $M_{n×n}(F)$ (a square $n×n$ matrix with entries in $F$). +- Let $λ$ be an eigenvalue of $T$. +- Let $λ_1, λ_2, ..., λ_k$ be distinct eigenvalues of $T$. + +Theorem: Let $λ_1, λ_2, ..., λ_k$ be distinct eigenvalues of $T$. If $v_1, v_2, ..., v_k$ are eigenvectors of $T$ such that $λ_i$ corresponds to $v_i$ (for all $i ≤ k$), then $\\{v_1, v_2, ..., v_k\\}$ is linearly independent. In fewer words, eigenvectors with distinct eigenvalues are all linearly independent from one another. +
+Proof +... +
+ + +Corollary: If $T$ has $n$ distinct eigenvalues, where $n = dim(V)$, then $T$ is diagonalizable. +
+Proof +... +
+ +A polynomial $f(t) ∈ P(F)$ **splits over** $F$ if there are scalars $c, a_1, a_2, ..., a_n ∈ F$: $f(t) = c(a_1 - t)(a_2 - t)...(a_n - t)$. + +Theorem: The characteristic polynomial of any diagonalizable linear operator splits. +
+Proof +... +
+ +Let $λ$ be an eigenvalue of a linear operator or matrix with characteristic polynomial $f(t)$. The **(algebraic) multiplicity** of $λ$ is the largest positive integer $k$ for which $(t-λ)^k$ is a factor of $f(t)$. + +Let $λ$ be an eigenvalue of $T$. The set $E_λ = \\{x ∈ V: T(x) = λx\\} = N(T - λI_V)$ is called the **eigenspace** of $T$ with respect to the eigenvalue $λ$. Similarly, the **eigenspace** of $A$ is the eigenspace of $L_A$. + +Theorem: If $T$ has multiplicity $m$, $1 ≤ dim(E_λ) ≤ m$. +
+Proof +... +
+ +Lemma: Let $S_i$ be a finite linearly independent subset of the eigenspace $E_{λ_i}$ (for all $i ≤ k$). Then $S = S_1 ∪ S_2 ∪ ... ∪ S_k$ is a linearly independent subset of $V$. +
+Proof +... +
+ +Theorem: If the characteristic polynomial of $T$ splits, then $T$ is diagonalizable iff the multiplicity of $λ_i$ is equal to $dim(E_{λ_i})$ (for all $i ≤ k$). +
+Proof +... +
+ +Corollary: If the characteristic polynomial of $T$ splits, $T$ is diagonalizable, $β_i$ is an ordered basis for $E_{λ_i}$ (for all $i ≤ k$), then $β = β_1 ∪ β_2 ∪ ... ∪ β_k$ is an ordered basis for $V$ consisting of eigenvectors of $T$. +
+Proof +... +
+ +$T$ is diagonalizable iff both of the following conditions hold: +1. The characteristic polynomial of $T$ splits. +2. The multiplicity of $λ$ equals $n - rank(T-λI)$, where $n = dim(V)$ and for each eigenvalue $λ$ of $T$. + +## Direct Sums + +- Let $V$ be a finite-dimensional vector space over a field $F$. +- Let $T: V → V$ be a linear operator on $V$. +- Let $W_1, W_2, ..., W_k$ be subspaces of $V$. + +The **sum** of some subspaces $W_i$ (for $1 ≤ i ≤ k$) is the set $\\{v_1 + v_2 + ... + v_k : v_i ∈ W_i \\}$, denoted $W_1 + W_2 + ... + W_k$ or $Σ^k_{i=1} W_i$. + +The subspaces $W_i$ (for $1 ≤ i ≤ k$) form a **direct sum** of $V$, denoted $W_1 ⊕ W_2 ⊕ ... ⊕ W_k$, if $V = Σ^k_{i=1} W_i$ and $W_j ∩ Σ_{i≠j} W_i = \\{0\\}$ for all $j ≤ k$. + +Theorem: The following conditions are equivalent: +1. $V = W_1 ⊕ W_2 ⊕ ... ⊕ W_k$. +2. $V = Σ^k_{i=1} W_i$ and ??? todo +3. Every vector $v ∈ V$ can be uniquely written as $v = v_1 + v_2 + ... + v_k$ where $v_i ∈ W_i$. +4. If $γ_i$ is an ordered basis for $W_i$ (for $1 ≤ i ≤ k$), then $γ_1 ∪ γ_2 ∪ ... ∪ γ_k$ is an ordered basis for $V$. +5. There exists an ordered basis $γ_i$ for $W_i$ for every $1 ≤ i ≤ k$ such that $γ_i ∪ γ_2 ∪ ... γ_k$ is an ordered basis for $V$. +
+Proof +... +
+ +Theorem: $T: V → V$ is diagonalizable if and only if $V$ is the direct sum of the eigenspaces of $T$. +
+Proof +... +
diff --git a/math/logic.md b/math/logic.md new file mode 100644 index 0000000..b07e867 --- /dev/null +++ b/math/logic.md @@ -0,0 +1,154 @@ +--- +layout: foundations +title: mathematics/logic +--- + +# logic, mathematics, & that around + in between + +## kinds of logic + +there are different types of logic! + +some such systems: +- classical logic +- intuitionistic logic +- paraconsistent logic + +and many others. + +## orders of logic + +### propositional logic + +[**propositional logic**](https://ncatlab.org/nlab/show/propositional+logic) or **zeroth-order logic** deals with raw *propositions*. +**propositions** are statements that *reduce* to a **truth value**. +truth values are classically either true or false. in non-classical logics, this can differ. + +the basic foundations of propositional logic are as follows: + +notation | definition +---------|-------------- +0 | *false* +1 | *true* +p | a *proposition* +¬p | *not* p +p → q | *if* p *then* q, p *implies* q + +several logical connectives are *derivable* from the above: + +notation | derivation | definition +------|---------------------|---- +p ∨ q | ¬p → q | p *or* q, *disjunction* +p ∧ q | ¬(p → ¬q) | p *and* q, *conjunction* +p → q | ¬p ∨ q | p *implies* q, (material) *implication* +p ↔ q | (p → q) ∧ (q → p) | p *if and only if* q, p *iff* q +p ⊕ q | (p ∨ q) ∧ ¬(p ∧ q) | p *exclusively or* q, p *xor* q +p ↑ q | ¬(p ∧ q) | p *not both* q, p *nand* q +p ↓ q | ¬(p ∨ q) | *neither* p *nor* q, p *nor* q + +note that several of these definitions are circular. +our choice in ¬ and → as the primitive connectives is thus arbitrary. +interestingly, ↑ and ↓ are *functionally complete*: we may define all other connectives in terms of them. + +
+aside: nand and nor + +notation | definition +---------|----------- +¬p | p ↑ p +p → q | p ↑ ¬q +p ∨ q | ¬p ↑ ¬q +p ∧ q | (p ↑ q) ↑ (p ↑ q) +p ↔ q | (p ↑ q) ↑ (p ∨ q) + +notation | definition +---------|----------- +¬p | p ↓ p +p → q | (¬p ↓ q) ↓ (¬p ↓ q) +p ∨ q | (p ↓ q) ↓ (p ↓ q) +p ∧ q | ¬p ↓ ¬q +p ↔ q | ... + +
+ +### [predicate logic](https://ncatlab.org/nlab/show/predicate+logic) + +**predicate logic** or **first-order logic** adds variables and quantifiers to propositions: +* ∀x: for all x +* ∃y: there exists a y + +these additions are enough to serve as a foundation of mathematics... + +## philosophy of mathematics + +what mathematics *is* is not universally agreed upon. + +there are two primary + + +logicism is generally widely regarded as correct + +these philosophies + +there are others, but none that i am interested in. + +## foundations of mathematics + +### set theory + +there have been a number of set theories, of varying consistency + + +naive set theory was shown inconsistent + + +### type theory + +*type theories* can serve as an alternative (equivalent) foundation of mathematics. + + +but what is the point in fashioning equivalent foundations when standard ZFC works just fine? + +an alternative (equivalent) foundation of mathematics are *type theories*. + + +## the curry-howard correspondence + +the **curry-howard correspondence** or *propositions-as-types* + + +symbol | logic | types +---|-------------|-------------- +⊤ | true | top type +⊥ | false | bottom type +→ | implication | function type +∧ | conjunction | product type +∨ | disjunction | sum type +∀ | universal | dependent product type +∃ | existential | dependent sum type +¬ | negation | many such representations + +a couple of other notes. +programs as proofs comes into play when +you MUST have a constructed thing with a type + +no negation?? +don't fully understand the distinction between dependent and nondependent types +implication over types? + + + +## [principle of explosion](https://en.wikipedia.org/wiki/Principle_of_explosion) + +what's wrong with inconsistency, anyway? + +it may or may not be intuitive that a contradiction can lead to further contradictions. +either way, an example can be helpful. +the following logic proves any arbitrary statement B: + +```logic +A = ⊤, ¬A = ⊤ +A ∨ B = ⊤ +¬A ⇒ A = ⊥ +⊥ ∨ B ⇒ B = ⊤ +``` diff --git a/math/number-theory.md b/math/number-theory.md new file mode 100644 index 0000000..47e63e7 --- /dev/null +++ b/math/number-theory.md @@ -0,0 +1,441 @@ +--- +layout: algebra +title: mathematics/number theory +--- + +# MATH 312: Number Theory + +## Chapter 1: The Integers + +## Chapter 2: Greatest Common Divisors and Prime Factorization + +## Greatest Common Divisors + +**Definition**: The **greatest common divisor** of two integers $a$ and $b$ not both zero is the largest integer which divides both $a$ and $b$. It is written as $(a,b)$. + +**Definition**: The integers $a$ and $b$ are called **relatively prime** if $a$ and $b$ have greatest common divisor $(a,b) = 1$. + +**Proposition**: Let $a,b,c$ be integers with $(a,b) = d$. Then +- $(a/d, b/d) = 1$ +- $(a+cb, b) = (a,b)$. + +**Definition**: If $a,b$ are integers, then a **linear combination** of $a$ and $b$ is a sum of the form $ma + nb$, where $m,n$ are integers. + +**Theorem**: The greatest common divisor of the integers $a,b$ not both zero is the least positive integer that is a linear combination of $a$ and $b$. + +**Definition**: Let $a_1, a_2, ... a_n$ be integers not all zero. The **greatest common divisor** of those integers is the largest integer which is a divisor of all of the integers in the set. It is written as $(a_1, a_2, ..., a_n)$. + +**Lemma**: If $a_1, a_2, ... a_n$ are integers not all zero, then $(a_1, a_2, ..., a_{n-1}, a_n)$ = $(a_1, a_2, ..., (a_{n-1}, a_n))$. + +**Definition**: We say that integers $a_1, a_2, ..., a_n$ are **mutually relatively prime** if their greatest common divisor is 1. These integers are **pairwise relatively prime** if for each pair of integers their greatest common divisor is 1. + +## The Euclidean Algorithm + +**The Euclidean Algorithm**: Let $r_0 = a$ and $r_1 = b$ be nonnegative integers with $b ≠ 0$. If the division algorithm is successively applied to obtain $r_j = r_{j+1} q_{j+1} + r_{j+2}$ with $0 < r_{j+2} < r_{j+1}$ for $j = 0,1,2,...,n-1$ and $r_n = 0$, then $(a,b) = r_{n-1}$, the last nonzero remainder. + +**Lemma**: If $c$ and $d$ are integers and $c = dq + r$, then $(c,d) = (d,r)$. + +**Definition**: The **Fibonacci numbers**... todo + +**Theorem**: Let $n$ be a positive integer and let $α = (1 + \sqrt{5})/2$. Then $u_n > α^{n-2}$ for $n ≥ 3$. + +**Theorem**: todo + +**Lame's Theorem**: The number of divisions needed to find the greatest common divisor of two positive integers using the Euclidean algorithm does not exceed five times the number of digits in the smaller of the two integers. + +**Corollary**: The number of bit operations needed to find the greatest common divisor of two positive integers $a$ and $b$ with $b > a$ is $O((log_2 a)^3)$. + +**Theorem**: Let $a$ and $b$ be positive integers. Then $(a,b) = s_n a + t_n b$ for $n = 0,1,2,...$ where $s_n$ and $t_n$ are the $n$th terms of the sequences defined by $s_0 = 1, s_1 = 0, t_0 = 0, t_1 = 1$ and $s_j = s_{j-2} - q_{j-1} q_{j-1}, t_j = t_{j-2} - q_{j-2} t_{j-t}$ for $j = 2,3,...,n$, where $q_j$ are the quotients in the divisions of the Euclidean algorithm when it is used to find $(a,b)$. + +## The Fundamental Theorem of Arithmetic + +**The Fundamental Theorem of Arithmetic**: Every positive integer can be written uniquely as a product of primes, in order of nondecreasing size. + +**Lemma**: If $a,b,c$ are positive integers such that $(a,b) = 1$ and $a | bc$, then $a | c$. + +**Corollary**: If $p$ divides $a_1 a_2 ... a_n$ where $p$ is a prime and $a_1, a_2, ..., a_n$ are positive integers, then there is an integer $i$ with $1 ≤ i ≤ n$ such that $p$ divides $a_i$. + +**Definition**: The **least common multiple** of two positive integers $a$ and $b$ is the smallest positive integer that is divisible by both $a$ and $b$. It is written as $[a,b]$. + +**Lemma**: If $x,y$ are real numbers, then $max(x,y) + min(x,y) = x+y$. + +**Theorem**: If $a,b$ are positive integers, then $[a,b] = ab/(a,b)$. + +**Lemma**: Let $m,n$ be relatively prime positive integers. Then if $d$ is a positive divisor of $mn$, there is a pair of unique positive divisors $d_1$ of $m$ and $d_2$ of $n$ such that $d = d_1 d_2$. Conversely, if $d_1$ and $d_2$ are positive divisors of $m$ and $n$ respectively, then $d = d_1 d_2$ is a positive divisor of $mn$. + +**Dirichlet's Theorem on Primes in Arithmetic Progressions**: Let $a,b$ be relatively prime positive integers. Then the arithmetic progression $an + b$, $n = 1,2,3,...$ contains infinitely many primes. + +**Proposition**: There are infinitely many primes of the form $4n + 3$, where $n$ is a positive integer. + +**Lemma**: If $a$ and $b$ are integers both of the form $4n + 1$, then the product $ab$ is also of this form. + +## Factorization of Integers and the Fermat Numbers + +**Lemma**: If $n$ is an odd positive integer, then there is a one-to-one correspondence between factorizations of $n$ into two positive integers and differences of two squares that equal $n$. + +**Proof**: The Fermat number $F_5 = 2^{2^5} + 1$ is divisible by 641. +$641 = 5⋅2^7 + 1 = 2^4 + 5^4$. So $2^{2^5} + 1 = 2^32 + 1 = 2^4 ⋅ 2^28 + 1$ = $(641 - 5^4)2^28 + 1$. Subsequently, we can rearrange this to pull out 641. + +**Proposition**: Every prime divisor of the Fermat number $F_n = 2^{2^n} + 1$ is of the form $2^{n+2}k + 1$. + +**Lemma**: Let $F_k = 2^{2^k} + 1$ denote the $k$th Fermat number, where $k$ is a nonnegative integer. Then for all positive integers $n$, we have $F_0 F_1 F_2 ... F_{n-1} = F_n - 2$. + +**Theorem**: Let $m,n$ be distinct nonnegative integers. Then the Fermat numbers $F_m$ and $F_n$ are relatively prime. + +**Theorem**: A regular polygon of $n$ sides can be constructed using a ruler and compass if and only if $n$ is of the form $n = 2^a p_1 ... p_i$ where $p_i, i = 1, 2, ..., t$ are distinct Fermat primes and $a$ is a nonnegative integer. + +## Linear Diophantine Equations + +**Theorem**: Let $a$ and $b$ be positive integers with $d = (a,b)$. The equation $ax + by = c$ has no integral solutions if $d ∤ c$. If $d | c$, then there are infinitely many integral solutions. If $x = x_0$, $y = y_0$ is a particular solution, then all solutions are given by $x = x_0 + (b/d) n$, $y = y_0 - (a/d) n$ for some integer $n$. + +## Chapter 3: Congruences + +## Introduction to Congruences + +**Definition**: If $a$ and $b$ are integers, we say that $a$ is **congruent to b modulo m** if $m ∣ (a-b)$. + +**Proposition**: If $a$ and $b$ are integers, then $a ≡ b$ (mod $m$) if and only if there is an integer $k$ such that $a = b + km$. + +**Proposition**: Let $m$ be a positive integer. Congruences modulo $m$ satisfy the following properties: +- Reflexivity: $a ≡ a$ (mod $m$) +- Symmetry: $a ≡ b$ (mod $m$) ⟹ $b ≡ a$ (mod $m$) +- Transitivity: $a ≡ b$ and $b ≡ c$ (mod $m$) ⟹ $a ≡ c$ (mod $m$) + +**Definition**: A **complete system of residues modulo m** is a set of integers such that every integer is congruent modulo $m$ to exactly one integer of the set. +- ex. $\{0, 1, 2, ..., m-1\}$ is a complete set of residues modulo $m$. +- ex. $\{0, -1, -2, ... -(m-1)\}$ is a complete set of residues modulo $m$. + +**Theorem**: If $a, b, c, m$ are integers with $m > 0$ and $a ≡ b$ (mod $m$), then: +- $a + c ≡ b + c$ (mod $m$) +- $a - c ≡ b - c$ (mod $m$) +- $ac ≡ bc$ (mod $m$) + +**Theorem**: If $a,b,c,m$ are integers with $m > 0$, $d = (c,m)$, and $ac ≡ bc$ (mod $m$), then $a ≡ b$ (mod $m/d$). + +**Theorem**: If $a,b,c,d,m$ are integers with $m > 0$, $a ≡ b$ (mod $m$), and $c ≡ d$ (mod $m$), then +- $a + c ≡ b + d$ (mod $m$) +- $a - c ≡ b - d$ (mod $m$) +- $ac ≡ bd$ (mod $m$) + +**Theorem**: If $r_1 r_2 ... r_m$ is a complete system of residues modulo $m$ and $a$ is a positive integer satisfying $(a,m) = 1$, then $ar_1 + b, a_r_2 + b, ..., ar_m + b$ is a complete system of residues modulo $m$. + +**Theorem**: If $a,b,k,m$ are integers such that $k > 0$, $m > 0$, $a ≡ b$ (mod $m$), then $a^k ≡ b^k$ (mod $m$). +- Proof: $a ≡ b$ (mod $m$), so $m ∣ (a - b)$. Since we may pull $(a-b)$ out of $a^k - b^k$, $m ∣ (a^k - b^k)$. So $a^k ≡ b^k$ (mod $m$). + +**Theorem**: If $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$) for integers and positive $m$, then $a ≡ b$ (mod $[m_1,m_2,...,m_k]$) where $[m_1,m_2,...,m_k]$ is the least common multiple of $m_1,m_2,...,m_k$. +- Proof: $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$). So $m_1 ∣ (a-b)$, $m_2 ∣ (a-b)$, ..., $m_k ∣ (a-b)$. Therefore, $[m_1,m_2,...,m_k] ∣ (a-b)$, and so $a ≡ b$ (mod $[m_1,m_2,...,m_k]$). + +**Corollary**: If $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$) for integers and positive relatively prime $m$, then $a ≡ b$ (mod $m_1 m_2... m_k$). + +**Proposition**: Let $b,m,N$ be positive integers with $b < m$. Then the least positive residue of $b^N$ modulo $m$ can be computed using $O((log_2 m)^2 log_2 N)$ bit operations. + +## Linear Congruences + +A congruence of the form $ax ≡ b$ (mod $m$) for an unknown integer $x$ is called a **linear congruence in one variable**. + +**Theorem**: Let $a,b,m$ be integers with $m > 0$ and $(a,m) = d$. If $d ∤ b$, then $ax ≡ b$ (mod $m$) has no solutions. If $d | b$, then $ax ≡ b$ (mod $m$) has exactly $d$ incongruent solutions modulo $m$. + +**Proposition**: Let $p$ be prime. The positive integer $a$ is its own inverse modulo $p$ if and only if $a ≡ 1$ (mod $p$) or $a ≡ -1$ (mod $p$). + +## The Chinese Remainder Theorem + +**The Chinese Remainder Theorem**: Let $m_1, m_2, ..., m_r$ be *pairwise relatively prime* positive integers. Then the system of congruence +- $x ≡ a_1$ (mod $m_1$) +- $x ≡ a_2$ (mod $m_2$) +- $...$ +- $x ≡ a_r$ (mod $m_r$) + +has a unique solution modulo $M = m_1 m_2 ... m_r$. + +**Lemma**: If $a$ and $b$ are positive integers, then the least positive residue of $2^a - 1$ (mod $2^b - 1$) is $2^r - 1$, where $r$ is the least positive residue of $a$ (mod $b$). + +**Lemma**: If $a$ and $b$ are positive integers, then the greatest common divisor of $2^a - 1$ and $2^b - 1$ is $2^(a,b) - 1$. + +**Proposition**: The positive integers $2^a - 1$ and $2^b - 1$ are relatively prime if and only if $a$ and $b$ are relatively prime. + +## Systems of Linear Congruences + +**Theorem**: Let $a,b,c,d,e,f,m$ be integers with $m > 0$ such that $(Δ, m) = 1$, where $Δ = ad - bc$. Then, the system of congruences +- $ax + by ≡ e$ (mod $m$) +- $cx + dy ≡ f$ (mod $m$) + +has a unique solution modulo $m$ given by +- $x ≡ \hat{Δ} (de - bf)$ (mod $m$) +- $y ≡ \hat{Δ} (af - ce)$ (mod $m$) + +where $\hat{Δ}$ is an inverse of $Δ$ modulo $m$. + +**Definition**: Let $A$ and $B$ be $n×k$ matrices with integer entries, with $(i,j)$th entries $a_ij$ and $b_ij$ respectively. We say that **A is congruent to B modulo m** if $a_ij ≡ b_ij$ (mod $m$) for all pairs $(i,j)$ with $1 ≤ i ≤ n$ and $1 ≤ j ≤ k$. We write $A ≡ B$ (mod $m$) if $A$ is congruent to $B$ (mod $m$). + +**Proposition**: If $A$ and $B$ are $n×k$ matrices with $A ≡ B$ (mod $m$), $C$ is a $k×p$ matrix, and $D$ is a $p×n$ matrix, all with integer entries, then $AC ≡ BC$ (mod $m$) and $DA ≡ DB$ (mod $m$). + +**Definition**: Definition of $\hat{A}$ as an inverse. todo + +**Proposition**: Let $A = \begin{bmatrix} a & b \\ c & d\end{bmatrix}$ be a matrix of integers such that $Δ = det A = ad - bc$ is relatively prime to the positive integer $m$. Then, the matrix $\hat{A} = \hat{Δ} \begin{bmatrix} d & -b \\ -c & a\end{bmatrix}$ is an inverse of $A$ modulo $m$. + +**Definition**: The **adjoint** of an $n×n$ matrix $A$ is the $n×n$ matrix with $(i,j)$th entry $C_ji$, where $C_ji$ is $(-1)^{i+j}$ times the determinant of the matrix obtained by deleting the $i$th row and $j$th column from $A$. The adjoint of $A$ is denoted by $adj(A)$. + +**Theorem**: If $A$ is an $n×n$ matrix with $det(A) ≠ 0$, then $A(adj(A)) = (det A) I$. + +**Proposition**: If $A$ is an $n×n$ matrix with integer entries and $m$ is a positive integer such that $(det A, m) = 1$, then the matrix $\hat{A} = \hat{Δ}(adj A)$ is an inverse of $A$ modulo $m$. + +# Chapter 4: Applications of Congruences + +## Divisibility Tests + +**Divisibility Test**: If $d|b$ and $j,k$ are positive integers with $j < k$, then $n = (a_k ... a_1 a_0)_b$ is divisible by $d^j$ iff $(a_{j-1} ... a_i a_0)_b$ is divisible by $d^j$. +- ex. todo +- Proof: Since $b ≡ 0$ (mod $d$), $b^j ≡ 0$ (mod d^j). So $d | (a_k a_{k-1} ... a_0)$ if and only if $d | (a_{j-1} ... a_0)$. todo: fix + +**Divisibility Test**: If $d | (b-1)$, then $n = (a_k ... a_1 a_0)$ is divisible by $d$ if and only if $a_k + ... a_1 + a_0$ is divisible by $d$. +- ex. todo + +**Divisibility Test**: If $d | (b+1)$, then $n = (a_k ... a_1 a_0)$ is divisible by $d$ if and only if $(-1)^k a_k + ... -a_1 + a_0$ is divisible by $d$. +- ex. Let $n = (1001001111)_2$. Then $3 ∣ n$ as $n ≡ 1 - 1 + 1 - 1 + 0 - 0 + 1 - 0 + 0 - 1 ≡ 0$ (mod 3) and $3 ∣ 2 + 1$. + +# Chapter 5: Some Special Congruences + +## Wilson's Theorem and Fermat's Little Theorem + +**Wilson's Theorem**: If $p$ is prime, then $(p-1)! ≡ -1$ (mod $p$). +- Proof: inverses cancel. The only numbers that are their own inverses in a prime field are $1$ and $p-1$. So $(p-1)! ≡ p-1 ≡ -1$ (mod $p$). + +**Corollary**: If $n$ is a positive integer satisfying $(n-1)! ≡ -1$ (mod $n$), $n$ is prime. +- Proof: Assume $n$ is composite. Proof by contradiction. todo + +**Fermat's Little Theorem**: If $p$ is prime and $a$ is a positive integer with $p ∤ a$, then $a^{p-1} ≡ 1$ (mod $p$). + +**Corollary**: If $p$ is a prime and $a$ is a positive integer with $p ∤ a$, then $a^p ≡ a$ (mod $p$). +- Proof: trivial. + +**Corollary**: If $p$ is prime and $a$ is an integer with $p ∤ a$, then $a^{p-2}$ is an inverse of $a$ modulo $p$. +- Proof: Fermat's little theorem tells us that $a^{p-1} ≡ 1$ (mod $p$). $a^{p-1} = aa^{p-2}$. + +**Corollary**: If $p$ is prime and $a,b$ are positive integers with $p ∤ a$, then the solutions of $ax ≡ b$ (mod $p$) are the integers $x ≡ a^{p-2}b$ (mod $p$). +- Proof: Take $ax ≡ b$ (mod $p$). Since $p$ does not divide $a$, we know $a^{p-2}$ is an inverse of $a$ (mod $p$). So $x ≡ a^{p-2}b$ (mod $p$). + +## Pseudoprimes + +**Definition**: Let $b$ be a positive integer. If $n$ is a composite positive integer and $b^n ≡ b$ (mod $n$), then $n$ is a **pseudoprime to the base b**. +- ex. 341 is a pseudoprime to the base 2. $2^340 ≡ 1$ (mod 341), and so $2^341 ≡ 2$ (mod 341). + +**Lemma**: If $d$ and $n$ are positive integers with $d ∣ n$, then $2^d - 1 ∣ 2^n - 1$. +- Proof: Since $d ∣ n$, $dt = n$ for some $t$. We have the identity $x^t - 1 = (x-1)(x^{t-1} + x^{t-2} + ... + 1)$, and so $2^n - 1 = 2^dt - 1 = (2^d - 1)(2^{d(t-1)} + x^{d(t-2)} + ... + 1)$. + +**Proof**: There are infinitely many pseudoprimes to the base 2. +We may prove this by showing that if $n$ is an odd pseudoprime to the base 2, $m=2^n - 1$ is also an odd pseudoprime to the base 2. +Let $n$ be an odd pseudoprime. Then $2^{n-1} ≡ 1$ (mod $n$) and $n$ is composite by definition. Let $m = 2^{n-1}$. It is composite, as $n$ is composite (by the above lemma). As $2^n - 2 = kn$, there is an integer $k$ s.t. $2^{m-1} = 2^{2^n - 2} = 2^{kn}$. todo: finish + +**Definition**: A composite integer that satisfies $b^{n-1} ≡ 1$ (mod $n$) for all positive integers $b$ with $(b,n)=1$ is called a **Carmichael number**. +- ex. $561 = (3)(11)(17)$ is a Carmichael number. If $(b, 561) = 1$, then $(b,3)=(b,11)=(b,17)=1$. So $b^2 ≡ 1$ (mod 3), $b^10 ≡ 1$ (mod 11), $b^16 ≡ 1$ (mod 17). So $b^560$... todo: finish + +**Theorem**: If $n = q_1 q_2 ... q_k$, where $q_j$ is a distinct prime satisfying $(q_j - 1) | (n - 1)$ for all $j$, then $n$ is a Carmichael number. + +**Definition**: Let $n$ be a positive integer with $n-1 = 2^s t$, where $s$ is a nonnegative integer and $t$ is an odd positive integer. We say that $n$ **passes Miller's test for the base b** if either $b^1 ≡ 1$ (mod $n$) or $b^{2^j t} ≡ -1$ (mod $n$) for some $j$ with $0 ≤ j ≤ s - 1$. +- ex. todo + +**Theorem**: If $n$ is prime and $b$ is a positive integer with $n ∤ b$, then $n$ passes Miller's test for the base $b$. + +**Definition**: If $n$ is composite and passes Miller's test for the base $b$, then we say $n$ is a **strong pseudoprime to the base b**. Strong pseudoprimes are exceedingly rare. +- ex. Let $n = 2047 = 23*89$. Then $2^2046 = (2^11)^186 = 2048^186 ≡ 1$ (mod 2047), so 2047 is a pseudoprime to the base 2. todo + +**Proof**: There are infinitely many strong pseudoprimes to the base 2. +- We may prove this by showing that if $n$ is a pseudoprime to the base 2, $N = 2^n - 1$ is a *strong* pseudoprime to the base 2. +- todo + +**Theorem**: If $n$ is an odd composite positive integer, then $n$ passes Miller's test for at most $(n-1)/4$ bases $b$ with $1 ≤ b ≤ n-1$. + +**Rabin's Probabilistic Primality Test** +- Let $n$ be a positive integer. Pick $k$ different positive integers less than $n$ and perform Miller's test on $n$ for each of those bases. If $n$ is composite the probability that $n$ passes all $k$ tests is less than (1/4)^k. + +**Conjecture**: For every composite positive integer $n$, there is a base $b$ with $b < 70 (log_2 n)^2$ such that $n$ fails Miller's test for the base $b$. If this conjecture is true, the following proposition provides a rapid primality test: + +**Proposition**: If the generalized Riemann hypothesis is valid, then there exists an algorithm to determine whether a positive integer $n$ is prime using $O((log_2 n)^5)$ bit operations. +- Proof: Let $b$ be a positive integer less than $n$. Note that Miller's test takes $O((log_2 b)^2)$ bit operations. Assuming the generalized Riemann hypothesis is true, if $n$ is composite, there is a base $b$ with $b < 70 (log_2 n)^2$ such that $n$ fails Miller's test for $b$. To discover this $b$, then, requires less than $O((log_n )^5)$ bit operations. + +## Euler's theorem + +**Euler's Totient Function**: Let $n$ be a positive integer. **Euler's totient function** / the **Euler phi-function** $ϕ(n)$ is defined to be the number of positive integers not exceeding $n$ which are relatively prime to $n$. + +**Definition**: A **reduced residue system modulo n** is a set of $ϕ(n)$ integers such that each element of the set is relatively prime to $n$, and no two different elements of the set are congruent modulo $n$. +- ex. $\{1, 3, 5, 7\}$ is a reduced residue system modulo 8. + +**Theorem**: If $r_1, r_2, ..., r_{ϕ(n)}$ is a reduced residue system modulo $n$, and if $a$ is a positive integer with $(a, n) = 1$, then the set $ar_1, ar_2, ..., ar_{ϕ(n)}$ is also a reduced residue system modulo $n$. +- Proof: Consider that $(a,n) = 1$... intuitive. todo. + +**Euler's Theorem**: If $m$ is a positive integer and $a$ is an integer with $(a,m) = 1$, then $a^{ϕ(m)} ≡ 1$ (mod $m$). +- ex. $\{1,3,5,7\}$ (mod 8): $3^{ϕ(8)} = 3^4 ≡ 1$ (mod 8) + +**Corollary**: Euler's Theorem is useful for finding inverses modulo $m$. If $a$ and $m$ are relatively prime, $aa^{ϕ(m)-1} = a^{ϕ(m)} ≡ 1$ (mod $m$). + +# Chapter 6: Multiplicative Functions + +## The Euler ϕ-Function + +**Definition**: An **arithmetic function** is defined for all positive integers. + +**Definition**: An arithmetic function $f$ is **multiplicative** if $f(mn) = f(m)f(n)$ whenever $m$ and $n$ are relatively prime. + +**Theorem**: If $f$ is a multiplicative function and if $n = p_1^{a_1} p_2^{a_2} ... p_s^{a_s}$ is the prime power factorization of the positive integer $n$, then $f(n) = f(p_1^{a_1})f(p_2^{a_2})...f(p_s^{a_s})$. +- Proof: trivial. Prime factors are relatively prime to each other. + +**Theorem**: If $p$ is prime, then $ϕ(p) = p - 1$. Conversely, if $p$ is a positive integer with $ϕ(p) = p - 1$, then $p$ is prime. +- Proof: Primes are not factorable. Every number less than a prime is relatively prime to the prime. + +**Theorem**: Let $p$ be a prime and $a$ a positive integer. Then $ϕ(p^a) = p^a - p^{a-1} = p^{a-1}(p-1)$. +- Proof: There are $p^{a-1}$ numbers that divide some $p^a$. So there are $p^a - p^{a-1}$ numbers that do not divide $p^a$. + +**Theorem**: The Euler ϕ-function is multiplicative. That is, for some relatively prime positive integers $m$ and $n$, $ϕ(mn) = ϕ(m)ϕ(n)$. +- Proof: somewhat intuitive. todo + +**Theorem**: Let $n = p_1^{a_1} p_2^{a_2} ... p_k^{a_k}$ be the prime power factorization of the positive integer $n$. Then $ϕ(n) = n(1-\frac1p_1)(1-\frac1p_2)...(1-\frac1p_k)$. +- Proof: ϕ is multiplicative, so $ϕ(n) = ϕ(p_1^{a_1}) ϕ(p_2^{a_2}) ... ϕ(p_k^{a_k})$. From an above theorem, we know $ϕ(p_j^{a_j}) = p_j^{a_j} - p_j^{a_j - 1}$. $p_j^{a_j} - p_j^{a_j - 1} = p_j^{a_j}(1-\frac1p_j)$, so the product of all $p_j^{a_j} = n$ and so $ϕ(n) = n(1-\frac1p_1)(1-\frac1p_2)...(1-\frac1p_k)$. + +**Theorem**: Let $n$ be a positive integer. Then $\sum_{d ∣ n} ϕ(d) = n$. +- ex. Consider $n=18$. Then... todo. + +## The Sum and Number of Divisors + +**Definition**: The **sum of the divisors function** $σ$ is defined by setting $σ(n)$ equal to the sum of all the positive divisors of $n$. + +**Definition**: The **number of the divisors function** $τ$ is defined by setting $τ(n)$ equal to the number of positive divisors of $n$. + +**Theorem**: If $f$ is a multiplicative function, then the arithmetic function $F(n) = \sum_{d ∣ n} f(d)$ is also multiplicative. +- Proof: Trivial. + +**Lemma**: For a prime $p$ and positive integer $a$, $σ(p^a) = (1+p+p^2+...p^a) = \frac{p^{a+1} - 1}{p-1}$ and $τ(p^a) = a + 1$. + +**Theorem**: Consider a positive integer $n$ with prime factorization $n = p_1^{a_1} p_2^{a_2} ... p_s^{a_s}$. Then $σ(n) = Π_{j=1}^s \frac{p_j^{a_j+1} - 1}{p_j-1}$ and $τ(n) = Π_{j=1}^s (a_j + 1)$. +- Proof: Trivial. + +## Perfect Numbers and Mersenne Primes + +**Definition**: If $n$ is a positive integer and $σ(n) = 2n$, then $n$ is called a **perfect number**. +- ex. $σ(6) = 1 + 2 + 3 + 6 = 12$, so 6 is perfect. +- ex. $σ(28) = 1 + 2 + 4 + 7 + 14 + 28 = 56$, so 28 is perfect. + +**Theorem**: The positive integer $n$ is an even perfect number if and only if $n = 2^{m-1}(2^m - 1)$ where $m$ is a positive integer such that $2^m - 1$ is prime. +- ex. + +**Theorem**: If $m$ is a positive integer and $2^m - 1$ is prime, then $m$ must be prime. +- Proof: If $m$ is not prime, $2^m - 1$ must be composite: $2^ab - 1 = (2^a - 1)(2^{a(b-1)} + 2^{a(b-2)} + ... + 2^a + 1)$. + +**Definition**: Given a positive integer $m$, $M_m = 2^m - 1$ is called the $m$th **Mersenne number**. Given a positive prime integer $p$ and $M_p = 2^p - 1$ is also prime, $M_p$ is called a **Mersenne prime**. +- ex. $M_7 = 2^7 - 1 = 127$ +- ex. $M_11 = 2^11 - 1 = 2047 = 23*89$ + +**Theorem**: If $p$ is an odd prime, then any divisor of the Mersenne number $M_p = 2^p - 1$ is of the form $2kp + 1$, for some positive integer $k$. + +**The Lucas-Lehmer Test**: Let $p$ be a prime and $M_p = 2^p - 1$ be the $p$th Mersenne number. Consider a recursive sequence of integers with $r_1 = 4$ and $r_k ≡ r_{k-1}^2 - 2$ (mod $M_p$) for $r_k < M_p$. Then, $M_p$ is prime if and only if $r_{p-1} ≡ 0$ (mod $M_p$). +- ex. Consider $M_5 = 2^5 = 31$. Then $r_1 = 4$, $r_2 ≡ 4^2 - 2 = 14$ (mod 31), $r_3 ≡ 14^2 - 2 ≡ 8$ (mod 31), $r_4 ≡ 8^2 - 2 ≡ 0$ (mod 31). $r_{5-1} ≡ 0$ (mod 31), so $M_5 = 31$ is prime. + +**Corollary**: Let $p$ be prime and let $M_p = 2^p - 1$ denote the $p$th Mersenne number. It is possible to determine whether $M_p$ is prime using $O(p^3)$ bit operations. + +known mersenne primes table + +Mersenne primes are of interest because of their ease in location and verification: the largest known prime, at any given time, is typically a Mersenne prime. It has been conjectured, but not proven, that there are infinitely many Mersenne primes. + +# Chapter 7: Cryptology + +**Caeser Cipher**: $C ≡ P + 3$ (mod 26) +- ex. todo + +Ciphers may be described by a **shift transformation**: $C ≡ P+k$ (mod 26), for some key $k$. More generally, ciphers of the form $C ≡ aP+b$ (mod 26) for some integers $a,b$ such that $(a,26)=1$ are considered **affine transformations**. This $(a,26)=1$ is so that as $P$ runs through a complete system of distinct residues, $C$ also does. + +The *inverse* relationship is given by $P ≡ \hat{a} (C-b)$ (mod 26), where $\hat{a}$ is an inverse of $a$ (mod 26). + +Ciphers like these encrypt *each character individually*, making them 1) trivially easy to bruteforce and 2) subject to *frequency analysis*. +- ex. if we know we are looking for $C ≡ P+k$ (mod 26): say the most common letter is P. we then set that equal to $e$, the most common letter in English. So $15_p ≡ 4_e + k$ (mod 26), so $k=11$ and we have out solution. +- ex. if we know we are looking for $C ≡ aP+b$ (mod 26): say the most common letters are $L$ and $U$. We then set those equal to $E$ and $T$, the first and second most common letters in English. Leaving us with two equations: $4_E a + b ≡ 11_L$ and $19_T a + b ≡ 20_U$ (mod 26). This is solvable: $a ≡ \hat{Δ}(d11-b20)$ and $b ≡ \hat{Δ}(a20-c11)$ + +# Chapter 8: Primitive Roots + +## The Order of an Integer and Primitive Roots + +**Definition**: Let $a$ and $m$ be relatively prime positive integers. Then, the least positive integer $x$ such that $a^x ≡ 1$ (mod $m$) is called the **order of a modulo m**. +- ex. $2^3 ≡ 1$ (mod 7), so $ord_7 2 = 3$. + +**Theorem**: If $a$ and $n$ are relatively prime integers with $n > 0$, then the positive integer $x$ is a solution of the congruence $a^x ≡ 1$ (mod $n$) if and only if $ord_n a ∣ x$. +- Proof: trivial. + +**Corollary**: If $a$ and $n$ are relatively prime integers with $n > 0$, then $ord_n a | ϕ(n)$. +- Proof: $(a,n) = 1$, so $a^{ϕ(n)} ≡ 1$ (mod $n$) by Euler's theorem. So by the above theorem, $a^x ≡ 1$ (mon $n$). + +**Theorem**: If $a$ and $n$ are relatively prime integers with $n > 0$, then $a^i ≡ a^j$ (mod $n$) where $i$ and $j$ are nonnegative integers if and only if $i ≡ j$ (mod $ord_n a$). +- ex. todo + +**Definition**: If $r$ and $n$ are relatively prime integers with $n > 0$ and $ord_n r = ϕ(n)$, then $r$ us called a **primitive root modulo n**. Not all integers have primitive roots. +- ex. $ord_7 3 = 6 = ϕ(7)$, so $3$ is a primitive root modulo 7. + +**Theorem**: If $r$ and $n$ are relatively prime positive integers with $n > 0$, and if $r$ is a primitive root modulo $n$, then the integers $r^1, r^2, ..., r^{ϕ(n)}$ form a reduced residue set modulo $n$. +- ex. todo + +**Theorem**: If $ord_m a = t$ and $u$ is a positive integer, then $ord_m (a^u) = t/(t,u)$. +- ex. todo + +**Corollary**: Let $r$ be a primitive root modulo $M$ where $m$ is an integer > 1. Then $r^u$ is a primitive root modulo $m$ if and only if $(u, ϕ(m)) = 1$. + +**Theorem**: If the positive integer $m$ has a primitive root, then it has a total of $ϕ(ϕ(m))$ incongruent primitive roots. + +## Primitive Roots for Primes + +**Definition**: Let $f(x)$ be a polynomial with integer coefficients. We say that an integer $c$ is a **root of f(x) modulo m** if $f(c) ≡ 0$ (mod $m$). It is easy to see that if $c$ is a root of $f(x)$ modulo $m$, then every integer congruent to $c$ modulo $m$ is also a root. + +**Lagrange's Theorem**: Let $f(x) = a_n x^n + a_{n-1} x^{n-1} + ... + a_1 x + a_0$ be a polynomial of degree $n$ with integer coefficients and with leading coefficient $a_n$ not divisible by $p$. Then $f(x)$ has at most $n$ incongruent roots modulo $p$. + +**Theorem**: Let $p$ be prime and let $d$ be a divisor of $p-1$. Then the polynomial $x^d - 1$ has exactly $d$ incongruent roots modulo $p$. + +**Theorem**: Let $p$ be prime and let $d$ be a positive divisor of $p-1$. Then the number of incongruent integers of order $d$ modulo $p$ is equal to $ϕ(d)$. + +**Corollary**: Every prime has a primitive root. + +## Existence of Primitive Roots + +**Theorem**: If $p$ is an odd prime with a primitive root $r$, then either $r$ or $r+p$ is a primitive root modulo $p^2$. + +**Theorem**: Let $p$ be an odd prime, then $p^k$ has a primitive root for all positive integers $k$. Moreover, if $r$ is a primitive root modulo $p^2$, then $r$ is a primitive root modulo $p^k$, for all positive integers $k$. + +**Theorem**: If $a$ is an odd integer, then for some integer $k > 3$ $a^{ϕ(2^k)/2} = a^{2^{k-2}} ≡ 1$ (mod $2^k$). + +**Theorem**: Let $k ≥ 3$ be an integer. Then $ord_{2^k} 5 = ϕ(2^k)/2 = 2^{k-2}$. + +**Theorem**: If $n$ is a positive integer that is not a prime power or twice a prime power, then $n$ does not have a primitive root. + +**Theorem**: If $p$ is an odd prime and $t$ is a positive integer, then $2p^t$ has a primitive root. If $r$ is an primitive root modulo $p^t$, if it is odd $r$ is also a primitive root modulo $2p^t$, and if it is even $r + p^t$ is a primitive root modulo $2p^t$. + +**Theorem**: The positive integer $n$ has a primitive root if and only if $n = 2,4,p^t,2p^t$ for an odd prime $p$ and positive integer $t$. + +## Index Arithmetic + +**Definition**: Let $m$ be a positive integer with primitive root $r$. If $a$ is a positive integer with $(a,m) = 1$, then the unique integer $x$ with $1 ≤ x ≤ ϕ(m)$ and $r^x ≡ a$ (mod $m$) is called the **index of a to the base r modulo m**. With this definition, we have $a ≡ r^{ind_r a}$ (mod $m$). + +**Theorem**: Let $m$ be a positive integer with primitive root $r$, and let $a$ and $b$ be integers relatively prime to $m$. Then: +- $ind_r 1 ≡ 0$ (mod $ϕ(m)$) +- $ind_r (ab) ≡ ind_r a + ind_r b$ (mod $ϕ(m)$) +- $ind_r a^k ≡ k⋅ind_r a$ (mod $ϕ(m)$) for a positive integer $k$. + +**Definition**: If $m$ and $k$ are positive integers and $a$ is an integer relatively prime to $m$, then $a$ is a **kth power residue of m** if the congruence $x^k ≡ a$ (mod $m$) has a solution. + +**Theorem**: Let $m$ be a positive integer with a primitive root. If $k$ is a positive integer and $a$ is an integer relatively prime to $m$, then the congruence $x^k ≡ a$ (mod $m$) has a solution if and only if $a^{ϕ(m)/d} ≡ 1$ (mod $m$), where $d = (k, ϕ(m))$. + +**Corollary**: If there are solutions of $x^k ≡ a$ (mod $m$), then there are exactly $d$ incongruent solutions modulo $m$. + +**Theorem**: If $n$ is an odd composite positive integer, then $n$ passes Miller's test for at most $(n-1)/4$ bases $b$ with $1 ≤ b < n-1$. + +**Lemma**: Let $p$ be an odd prime and let $e,q$ be positive integers. Then the number of incongruent solutions of the congruence $x^{q-1} ≡ 1$ (mod $p^e$) is $(q,p^{e-1}(p-1))$. + +## Primality Testing Using Primitive Roots + +**Theorem**: If $n$ is a positive integer and if an integer $x$ exists such that $x^{n-1} ≡ 1$ (mod $n$) and $x^{(n-1)/q} ≢ 1$ (mod $n$) for all prime divisors $q$ of $n-1$, then $n$ is prime. + +**Corollary**: If $n$ is an odd positive integer and $x$ is a positive integer such that $x^{(n-1)/2} ≡ -1$ (mod $n$) and $x^{(n-1)/q} ≢ 1$ (mod $n$) for all odd prime divisors $q$ of $n-1$, then $n$ is prime. + +**Theorem**: If $n$ is composite, it can be proved with $O((log_2 n)^2)$ bit operations. + +**Theorem**: If $n$ is prime, it can be proved with $O((log_2 n)^4)$ bit operations. + +## Universal Exponents + +## Pseudo-random Numbers + +## The Splicing of Telephone Cables diff --git a/math/proof.md b/math/proof.md new file mode 100644 index 0000000..17548dd --- /dev/null +++ b/math/proof.md @@ -0,0 +1,5 @@ +--- +layout: foundations +title: mathematics/proof +--- + diff --git a/mathematics/algebra.md b/mathematics/algebra.md deleted file mode 100644 index 3717445..0000000 --- a/mathematics/algebra.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -layout: algebra -title: mathematics/algebra ---- - -# algebra - -Modern algebra is the study of **algebraic structures**: groups, rings, fields, modules, and the like. These structures are very abstract: and so results can be applied to a wide variety of situations. - -## structures - -An **algebraic structure** is a set with a collection of *operations* and a finite set of *axioms* those operations must satisfy. - -A [**group**](group-theory) $G$ is a set with a single binary operation $⋆$ satisfying the following axioms: -- associativity: $∀a,b,c : (a⋆b)⋆c = a⋆(b⋆c)$ -- identity: $∃e, ∀a : e⋆a = a⋆e = a$ -- inverse: $∀a, ∃a^{-1} : a⋆a^{-1} = e$ -- An Abelian or **commutative group** satisfies an additional axiom: - - commutativity: $∀a,b : a⋆b=b⋆a$ - -A **monoid** is a group without an inverse operation. - -A [**ring**](ring-theory) $R$ is a set with two binary operations $+$ and $×$ satisfying the following axioms: -- $(R, +)$ is a *commutative group*: - - associativity: $∀a,b,c : (a+b)+c = a+(b+c)$ - - additive identity: $∃0, ∀a : 0+a = a+0 = a$ - - additive inverse: $∀a, ∃-a : a+(-a) = 0$ - - commutativity: $∀a,b : a+b=b+a$ -- $(R, ×)$ is a *monoid* - - associativity: $∀a,b,c : (a×b)×c = a×(b×c)$ - - multiplicative identity: $∃1, ∀a : 1×a = a×1 = a$ -- The *distributive laws* hold for + and ×: - - $∀a,b,c : (a+b) × c = (a×c)+(b×c)$ - - $∀a,b,c : a × (b+c) = (a×b) + (a×c)$ -- An Abelian or **commutative ring** satisfies an additional axiom: - - commutativity (of $×$): $∀a,b : a×b=b×a$ - -A **field** is a *commutative ring* where all elements sans $0$ have an inverse $a^{-1}$ under multiplication. Subsequently, $0 ≠ 1$. A field may be also thought of as a set on which addition, subtraction, multiplication, and division are defined and behave as they do on $ℝ$. - -A [**vector space**](linear-algebra) $V$ over a field $F$ of scalars is a set with a binary operation $+$ and a binary function satisfying the following axioms: -- $(V, +)$ is a *commutative group*: - - associativity: $∀u,v,w : (u+v)+w = u+(v+w)$ - - additive identity: $∃0, ∀v: 0+v = v+0 = v$ - - additive inverse: $∀v, ∃-v: v+(-v) = 0$ - - commutativity: $∀u,v : u+v=v+u$ -- $(V, )$ is a *scalar operation*: - - scalar identity: $∃1 ∈ F, ∀v ∈ V : 1v = v1 = v$ - - commutativity: $∀a,b ∈ F, ∀v ∈ V : (ab)v = a(bv)$ -- The *distributive laws* hold: - - $∀a ∈ F, ∀u,v ∈ V : a(u+v) = au+av$ - - $∀a,b ∈ F, ∀v ∈ V : (a+b)v = av + bv$ - -A **module** $M$ is a generalization of a *vector space* to function over a ring $R$ instead of a field. - -A [**lattice**](order-theory) $L$ is a set with two binary operations $∧$ and $∨$ satisfying the following axioms: -- commutativity: - - $∀a,b : a ∧ b = b ∧ a$ - - $∀a,b : a ∨ b = b ∨ a$ -- associativity: - - $∀a,b,c : a ∧ (b ∧ c) = (a ∧ b) ∧ c$ - - $∀a,b,c : a ∨ (b ∨ c) = (a ∨ b) ∨ c$ -- absorption: - - $∀a,b : a ∧ (a ∨ b) = a$ - - $∀a,b : a ∨ (a ∧ b) = a$ -- idempotence: - - $∀a : a ∧ a = a$ - - $∀a : a ∨ a = a$ - -An **algebra** $A$ over a field $F$ is a *vector space* equipped with an additional *bilinear product*. It is also common to consider algebras over a *ring* (and thus $A$ as a *module* with an additional product). diff --git a/mathematics/analysis.md b/mathematics/analysis.md deleted file mode 100644 index 7bfef62..0000000 --- a/mathematics/analysis.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: analysis -title: mathematics/analysis ---- - -# analysis diff --git a/mathematics/category-theory.md b/mathematics/category-theory.md deleted file mode 100644 index 19100a7..0000000 --- a/mathematics/category-theory.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -layout: foundations -title: mathematics/foundations/category theory ---- - diff --git a/mathematics/coding-theory.md b/mathematics/coding-theory.md deleted file mode 100644 index b080ed6..0000000 --- a/mathematics/coding-theory.md +++ /dev/null @@ -1,1049 +0,0 @@ ---- -layout: algebra -title: mathematics/algebra/coding theory ---- - -# MATH 342: Coding Theory - - - -Sections 1-9, 11-12, 14 - -## Things I forget -- $M$ = number of codewords. the sphere packing bound -- $n$ = length of codewords -- $d$ = distance between codewords. or minimal distance of a code -- $k$ = the dimension of the code, number of row vectors in a generator matrix -- $q$ = the alphabet -- $V(n, q) = F_q^n$ -- (n, ?, ?)-code -- [?, ?, ?]-code -- r = ??? -- $C$: a code. -- $G$: the generator matrix. -- $H$: the parity-check matrix. - -## What is coding theory? - - - - -Coding theory deals with **codes**: ways to represent data robustly & securely. -MATH 342 will mostly cover the theory behind *detecting & correcting* errors. - - - -Definition: A **code** C is a set of codewords, for example: - -``` -C = { - w1 = 00000 - w2 = 01011 - w3 = 10101 - w4 = 11110 -} -``` - -A goal of codes is to *detect and correct errors*. How can we do this? Coming back to it later. - -Definition: A **q-ary** (n, M, D) code C is a set of codewords such that: -- $q$ = size of alphabet -- $n$ = length of codewords (when consistent) -- $M$ = number of codewords -- $d$ = *minimal distance* of the code - -A *2-ary code* is a **binary code**, a *10-ary code* is a **decimal code**. etc. - -Let $(F_q)^n$ denote the set of all ordered $n$-tuples $a = a_1 a_2 ... a_n$, where each $a_i \in F_q$. -The elements of $(F_q)^n$ are called *vectors* or *words*. -The order of the set $(F_q)^n$ is $q^n$. -A $q$-ary code of length $n$ is a subset of $(F_q)^n$. - -### Distance functions & Hamming distance - -Consider the distance: -- $d(x, y) = \text{# of places where they differ}$ -- $d(C) = min(d(x, y))$ such that $x ≠ y$ - -Definition: The **Hamming distance** of two codes a, b is the number of *places* where a and b differ. - -The **distance function** $dist(x, y)$ on a set $S$ satisfies: -- $dist(x, y) == dist(y, x)$ -- $dist(x, y) ≥ 0$ and $dist(x, y) == 0 ⟺ x == y$ -- $dist(x, z) ≤ dist(x, y) + dist(y, z)$ - -The last one is known as the *triangle inequality*. - -Aside: A set $S$ plus a distance function $dist$ is known as a **metric space**. - -### Error detection & correction - -Definition: The **minimum distance** between distinct codewords is denoted $d(C)$, and given to be $d(C) = min \{dist(x, y) | x, y ∈ C, x ≠ y\}$, for some code $C$. - -Given a code $C$, $d(C) = d$ (where $d(C)$ is the minimum distance in the code) -1. The code can *detect* up to $d-1$ errors. -2. The code can *correct* up to $d-1 / 2$ errors. - -### Nearest neighbour decoding - -Just how should we go about *decoding* these codes? Particularly when there is error correction involved. This is obvious, but... -the **nearest neighbour decoding** is the decoding such that the distance between the received word and a codeword is as *small* as possible. - -This *maximizes* the likelihood of correcting errors given the following assumptions: -- Every symbol is equally likely to have been received in error -- If a symbol is received in error, then each of the $q-1$ possible errors is equally likely. - -This is also known as a $q$-ary **symmetric channel**. - -### What makes a good code? - -**The main coding theory problem.** A good (n, M, D)-code has: -- a small $n$: for compactness & subsequently speed -- a large $M$: to fit many codewords & subsequently a variety of messages -- a large $d$: to correct many errors - -What is known as **the main coding theory problem** is a solution of how to balance these conflicting aims, by the way of *optimizing* one of the parameters for *given values* of the other two. - -The **usual version of the main coding problem** is to find the largest possible $M$, i.e. the code with the most codewords, given a fixed $n, d, q$. - -We denote $A_q(n, d)$ to be the largest M s.t. there exists a $q$-ary $(n, M, d)-code$. -- This is known for $d=1$: $A_q(n, 1) = q^n$ -- This is known for $d=n$: $A_q(n, n) = q$ - - - - - -### Equivalence of codes - -**Definition**: Two codes are *equivalent* if we can go from one to the other using elementary operations. Elementary operations are: -1. Permute rows. -2. Permute columns. -3. Permute all the symbols in a column. - -Any $q$-ary $(n,M,d)$-code over an alphabet $\{0,1,...,q-1\}$ is also equivalent to an $(n,M,d)$-code which contains the zero vector. -- Proof: This is trivial by ordering some code at the top and permuting the non-zero symbols to zero. - -### Weight of a vector - -The weight of a vector in $(F_2)^n$ is simply the count of $1$ bits. -- If $x, y \in (F_2)^n$, then $d(x,y) = w(x+y)$. -- If $x, y \in (F_2)^n$, then $d(x,y) = w(x) + w(y) - 2w(x ∩ y)$. - -Some theorems about oddness and evenness: -- If $d$ is odd, then a binary $(n,M,d$-code exists if and only if a binary $(n+1, M, d+1)$-code exists. -- If $d$ is odd, then $A_2(n+1, d+1) = A_2(n,d)$. Equivalently, if $d$ is even, then $A_2(n,d) = A_2(n-1,d-1)$. - -### Binomial coefficients - -Note the definition of the binomial: -$$\binom{n}{m} = \frac{n!}{m!(n-m)!}$$ - -An ordered selection of $m$ distinct objects from a set of $n$ distinct objects can be made in $\frac{n!}{(n-m)!}$ ways. - -An unordered selection of $m$ distinct objects from a set of $n$ distinct objects can be made in $\binom{n}{m} = \frac{n!}{m!(n-m)!}$ ways. - -todo: binomial coeffients -sphere packing bounds -perfect codes -balanced blocks -baby finite fields - - - -### Sphere Packing Bound - -Consider $(n, M, d)$ for some q-ary code C. -- n: length of words -- M: number of words -- d: minimum distance -- q: alphabet set (??) - -(n, M, d)-codes -[n, k, d]-codes are the same as (n, q^k, d)-codes - linear, so M = q^k and linear -[n, k]-codes leave d implied (also linear) - -Ham(r, q) - -Sphere packing is an application of the main coding theory problem. Fix q, n, d. Then $A_q(n, d) = max(M)$. - -Assume d is odd, and d = 2t + 1. - -$F_q^n$ = all words of length n - -Consider the idea of fitting codewords in a box: we can represent those as spheres, and so the distance does fancy stuff - -$S(w_i, t)$ = all words of distance ≤ t from $w_i$ -- where $w_i$ = the center -- t = radius - -$A_q(n, d)$ = the maximum number of disjoint spheres of radius t that fit into $F_q^n$ - -$N_t$ = the number of points in a sphere of radius t -$F_q^n$ has $q^n$ points - -**Sphere packing bound** -M = number of spheres ≤ (# points in $F_q^n$ / # points in a sphere) = ($q^n/N_t$) - -The **sphere-packing bound** or **Hamming bound** states that a q-ary $(n, M, 2t+1)$-code satisfies $M ≤ q^n$. - - -A code that achieves the sphere-packing bound is called a **perfect code**. Intuitively, for a perfect code, the $M$ speheres of radius $t$ centred on the words "fill" the whole space $(F_q)^n$ without overlapping. - -Example: Binary reptition codes: $n=3$, $d=3$, $q=2$: i.e. $C = \{000, 111\}$. -- $M ≤ 2^3 / N_t = 8/4 = 2$ - - -All binary repitition codes are perfect, i.e. when n = d - -Finding $N_t$ for a general n, q, t: -choose which $n$ symbol to -choose a new symbol: q-1 choices ??? -n(q-1) - - -Example: finding the bound for $q=3$, $n=7$, $d=5$. How big of a code can we find for these parameters? $d=5$, so $t=2$: $d=2t+1$ - -$N_t$ = 1 + (n 1)(q-1) + (n 2)(q-1)^2 = 99$ -$M ≤ 3^7/99 = 81*27/99 = 22.9$ -$A_3(n=7, d=5) ≤ 22$ - - - -## A Taste of Abstract Algebra - - - - -Recall: sphere packing. in $ℝ^2$, it's easy: points form a lattice. -If you fix the zero point my center is closed under addition -A **lattice** is a set of points closed under addition. - -$ℤ^n ⊆ ℝ^n$ - -We want to do the same for any code C that is closed under addition. -How do we add two vectors? - -$C ⊆ F_q^n =~ ℝ^n$ - -### Rings and Fields - -A **ring** $R$ is a set of elements with *two* operations + (addition) and • (multiplication) satisfying: -- $F$ is *closed* under $+$ and $•$ -- + is commutative, associative, and distributive; • is associative and distributive - - commutative on addition: $a+b = b+a$ - - associative: $(a+b)+c = a+(b+c)$, $a•(b•c) = (a•b)•c$ - - distributive: $a•(b+c) = a•b + a•c$ -- The additive identity element $0$ must exist - - $∀a ∈ R : a + 0 = a$ -- The multiplicative identity element $1$ must exist -- The additive inverse element $(-a)$ must exist - - $∀a ∈ F : a + (-a) = 0$ - -We will take "ring" to refer to **abelian/commutative rings**. This adds commutativity on multiplication (), and the multiplicative identity element $1$ ($∀a ∈ R : a • 1 = a$). -- unital - -- why with an identity????? do rings not normally have both additive and multiplicative identities????????? - -in every ring, 0 times something is 0 -0 + a = b -0b + ab = ab -R = \{0\} is a ring, not a field - -2ℤ has no 1 - -A **field** $F$ is a set of elements with *two* operations + (addition) and • (multiplication) satisfying: -- $F$ is *closed* under + and • -- + and • are commutative, associative, and distributive - - commutative: $a+b = b+a$, $a•b = b•a$ - - associative: $(a+b)+c = a+(b+c)$, $a•(b•c) = (a•b)•c$ - - distributive: $a•(b+c) = a•b + a•c$ -- The additive identity element $0$ must exist - - $∀a ∈ F : a + 0 = a$ -- The multiplicative identity element $1$ must exist - - $∀a ∈ F : a • 1 = a$ -- The additive inverse element $(-a)$ must exist - - $∀a ∈ F : a + (-a) = 0$ -- The multiplicative inverse element $a^{-1}$ must exist, when $a ≠ 0$ - - $∀a ∈ F : a • a^{-1} = 1$ - - is this true for 0???? - -Fields behave as standard operations on the rationals and reals. The only difference between a field and an (abelian) ring is the existence of a *multiplicative inverse*. - -A ring can also be regarded as having *three* operations, with - given by its status as an additive inverse, i.e. $a-b = a+(-b)$. - -A field can also be regarded as having *four* operations, with - and ÷ given by their definitions as inverses, i.e. $a-b = a+(-b)$ and $a÷b = a(b^{-1})$ for $b ≠ 0$. - -The operation $a•b$ can be more succienctly written as $ab$. The operation $a÷b$ can be alternatively writtten as $a/b$. - -### Euclidean Arithmetic - -For a given number $q + r/m$, $r$ is the **principle remainder**. - -$m$ **divides** $n$ $m|n$ if $n/m$ is an integer. - -A number is **prime** if its divisors are $1, n$ only. - -Theorem: $gcd(m, n) = gcd(m, n-q•m)$ - - - -### The modular integers - -Consider $ℤ_m = (\{0,1\}, +, •)$ -For example: $ℤ_2 = \{0,1\}$. -- $0+1=1$ -- $1+1=0$ -- $0•1=0$ -- $1•1=1$ - -Another example: $ℤ_4 = \{0,1,2,3\}$ - -`+` | 0 1 2 3 ------------ -0 | 0 1 2 3 -1 | 1 2 3 0 -2 | 2 3 0 1 -3 | 3 0 1 2 - -`•` | 0 1 2 3 ------------ -0 | 0 0 0 0 -1 | 0 1 2 3 -2 | 0 2 0 2 -3 | 0 3 2 1 - -Note the difference between the integers and the reals. In ℤ, $a/b$ may not exist - ℤ is a *ring*. In ℝ, $a/b$ always exists if $b != 0$ - ℝ is a *field*. - -### Division in $ℤ_m$ - -When does $1/n$ exist in $ℤ_m$? -- $1/n = a ⟺ a•n ≡ 1 (mod m)$ -- $⟺ a•n + b•m = 1$ for some b -- $⟺ gcd(n,m) = 1$ - -Some examples of modular math: -- $a•5 + b•7 = 1$, $3•5 + 2•7 = 1$ - -Ex. $ℤ_4$, $1/2$ does not exist -- gcd(2, 4) != 1 - -Theorem: $ℤ_m$ is a field if and only if $m$ is prime. -Proof: -- Assume $M$ is prime. -- We must show every $n$ in $\{1, 2, 3, ...\}$ is invertible if and only if $gcd(m, n) = 1$. -- If m$m is not prime, then $m = m_1*m_2$, $m_1, m_2 > 1$. Then $0 != m$ has no inverse gcd($m_1, m_1) = m_1 > 1$. -- ???? - -The **invertible elements** of $ℤ_m$ (units of ℤ_m) is expressed as $ℤ_m^x$. It is the **group of units**. -- ex. $ℤ_7^x = \{0, 1, 2, 3, 4, 5, 6\} = ℤ_7$ -- ex. $ℤ_8^x = \{1, 3, 5, 7\} ⊂ ℤ_7$ - -A **finite or Galois field** $𝔽_p, GF(p)$ is a field that, well, is finite. Every $ℤ_p$, $p$ is prime, is a finite field. Every $ℤ_{p^n}$ is also a field! - -$𝔽_4$ is a four element field. It is *not* necessarily $ℤ_4$! -- clarify: what? ℤ_7 counts as 𝔽_7? exact numbers do not matter? does 𝔽_4 exist, then? - -Consider $ℤ_6$. $2•3 = 0$. -A field has *no* zero divisors. Why? -- $a*b=0$, a and b are not zero -- $1/a (a*b = 0)$ and $a != 0$. b = $0/a = 0$ - -The **cancellation property** of a field states that if $a*b = a*c$ and $a != 0$, then $b=c$. - -### Back to coding theory - -We can now bridge our knowledge of fields back to coding theory: consider *alphabets* to be *finite fields*, $𝔽_q^n$ to be a *vector space*, with *codewords* as vectors ($w = (a_1, a_2, ..., a_n))$ - -An example is an ISBN code. -- textbook: $0-19-853803-0$ -- language, publisher, book number, checksum -- codewords: all vectors $(x_1, ..., x_{10})$ that satisfy the linear equation -- $1x_1 + 2x_2 + ... + 10x_{10} = 0$ -- because the checksum changes to satisfy that equation - -checksum: set such that it satisfies the equation - -why is the distance between isbns two? cause you have it 1 normally and 2 with the checksum - -The ISBN code can: -- Detect a single error -- Detect if we switch two numbers -- Can correct a single error *if we know where it is* - -Given an equation $1x_1 + 2x_2 + ... + 10x_{10} = 0$, $x_7 = -\frac{1}{7}(1x_1 + 2x_2 + ... + 10x{10} - 7x_7)$ - -These linear equations are linear, so linear algebra comes in handy... - -... missed a bunch ... - - - -### Vector Spaces on Finite Fields - -Our finite fields can be viewed as **vector spaces**, with the *elements* as **scalars** and the ordered $n$-tuples over $GF(q)$ as **vectors**. We denote the set of all vectors of length $n$ as $V(n,q)$. We must define two additional operations within $V(n,q)$: -- *addition* of vectors: if $x = (x_1, x_2, ..., x_n)$ and $y = (y_1, y_2, ..., y_n)$ and $x,y \in V(n, q)$, then $x+y = (x_1 + y_1, x_2 + y_2, ..., x_n + y_n$. -- *multiplication* of a vector by a scalar: if $x = (x_1, x_2, ..., x_n) \in V(n, q)$, $a \in GF(q)$, then $ax = (ax_1, ax_2, ..., ax_n)$. - -The formal axioms for a vector space are as follows. Given $\textbf{u} = (u_1, u_2, ..., u_n)$, $\textbf{v} = (v_1, v_2, ..., v_n)$, $u,v \in V(n, q)$: -- $\textbf{u}+\textbf{v} \in V(n, q)$ -- $(\textbf{u}+\textbf{v}) + \textbf{w} = \textbf{u} + (\textbf{v}+\textbf{w})$ -- $\textbf{0} = (0, 0, ..., 0) \in V(n, q)$ satisfying $\textbf{u} + \textbf{0} = \textbf{0} + \textbf{u} = \textbf{u}$. -- $-\textbf{u} = (-u_1, -u_2, ..., -u_n) \in V(n, q)$ -- $\textbf{u} + \textbf{v} = \textbf{v} + \textbf{u}$ -- (note that the above show $V(n, q)$ forms a commutative group under addition) -- $a\textbf{v} \in V(n, q)$ -- $a(\textbf{u} + \textbf{v}) = a\textbf{u} + a\textbf{v}$, $(a+b)\textbf{u} = a\textbf{u} + b\textbf{u}$ -- $(ab)\textbf{u} = a(b\textbf{u})$ -- $1\textbf{u} = \textbf{u}$ - -A (non-empty) *subset* $C$ of $V(n,q)$ is called a **subspace** of $V(n,q)$ if and only if $C$ is closed under addition and scalar multiplication. i.e: -- if $x,y \in C$, then $x+y \in C$ -- if $a \in GF(q)$ and $x \in C$, then $ax \in C$. - -The set $\{0\}$ and the set $V(n, q)$ are **trivial subspaces** and present for every vector space. A subspace is *non-trivial* if it contains at least one vector other than 0. -- q: is the subspace that is just the space trivial? - -A **linear combination** of $r$ vectors $\vec{v_1}, \vec{v_2}, ..., \vec{v_r}$ in $V(n, q)$ is a vector of the form $a_1\vec{v_1} + a_2\vec{v_2} + ... + a_r\vec{v_r}$ where $a_i$ are scalars. - -A set of vectors is **linearly dependent** if they can form the zero vector as a linear combination, without all scalars being 0. - -A set of vectors is **linearly independent** if they *cannot* form the zero vector as a linear combination without all scalars 0. - -Let $C$ be a subspace of $V(n,q)$. Then a *subset* of $C$ is called a **generating set** (or spanning set) of $C$ if every vector in $C$ can be expressed as a linear combination of the vectors in the subset. A generating set which is linearly independent is also called a **basis**. - -For a (non-trivial) subspace $C$ of $V(n,q)$, any generating set of $C$ contains a basis of $C$. - -For a basis of a subspace $C$ of $V(n,q)$, every vector of $C$ can be expressed *uniquely* as a linear combination of the basis vectors, and $C$ contains exactly $q^k$ vectors. It follows that any two bases of $C$ contain the same number $k$ vectors: this is called the **dimension** of $C$ and denoted $dim(C)$. - -chapter 4 done - -## Linear Codes - - - - - -A **linear code** over $GF(q)$ is a subspace of $V(n,q)$ for some positive integer $n$. Thus, a *subset* $C$ of $V(n,q)$ is a linear code if and only if it is: -- closed under addition: $u + v \in C$ for all $u,v \in C$ -- closed under scalar multiplication: $au \in C$, for all $u \in C$, $a \in GF(q)$ -- q: why subset? - -If $C$ is a k-dimensional subspace of $V(n,q)$, then the linear code $C$ is also called an **[n,k]-code** or a **[n,k,d]-code**. -- n: length of codewords -- k: dimension of $C$ -- d: minimum distance of $C$ - - -Note: a q-ary [n,k,d]-code is also a q-ary (n,q^k,d)-code but not every (n,q^k,d)-code is an [n,k,d]-code? do not understand -- also known as group codes - -The **weight** $w(\vec{x})$ of a vector $\vec{x} \in V(n,q)$ is the number of non-zero entries of $\vec{x}$. - -For an $x,y \in V(n,q)$, $d(x,y) = w(x-y)$. From this, the **minimum distance** of a linear code is equal to the smallest non-zero weight of the codewords. - -Linear codes are nice because we can determine the weight by simply examining all $M$ codewords. They're also nice because we can describe them by simply giving a basis. - -A $k×n$ matrix whose rows form a basis of a linear $[n,k]-code$ is called a **generator matrix** of the code. -- ex. the generator matrix $[0 1 1][1 0 1]$ describes a certain $[3,2,2]-code$ - -Linear $q-ary$ codes *cannot* be defined unless $q$ is a prime power. However, it's pretty easy to transform non-prime-power $q-ary$ codes into prime-power $q-ary$ codes. -- q: what's q again? (size of alphabet) what does a prime power entail again? - -### Equivalence of Linear Codes - -Two linear codes over $GF(q)$ are called **equivalent** if one can be obtained from the other by a combination of operations being: -- permutation of positions of the code -- multiplication of the symbols in a fixed position by a (non-zero) scalar -- q: ??? what? - -Two $k×n$ matrices generate equivalent linear $[n,k]-codes$ over $GF(q)$ if one matrix can be obtained from the other by a sequence of operations: -- (R1) Permutation of rows -- (R2) Multiplication of a row by a (non-zero) scalar -- (R3) Addition of a scalar multiple of one row to another -- (C1) Permutation of columns -- (C2) Multiplication of any column by a (non-zero) scalar - -Let $G$ be a generator matrix of an $[n,k]-code$. Then by performing the above operations, $G$ can be transformed to the **standard form** $[I_k | A]$, where $I_k$ is the $k×k$ identity matrix, and $A$ is a $k×(n-k)$ matrix. - -If two generator matrices can be transformed to the same standard form by *row operations* only, they generate the **same** code. If they must use the column operations to reach the same standard form, they are **equivalent**, but not necessarily the *same* code. -- q: not *necessarily* the same? if there is no way without column operations must they be different? - -chapter 5 done - -### Encoding with linear codes - - - -A **coset** of an $[n,k]$-code $C$ over $GF(q)$... - -Suppose that $C$ is an $[n,k]$-code over $GF(q)$ and that $\vec{a}$ is any vector in $V(n, q)$. Then the set $\vec{a} + C$ defined by $\vec{a} + C = \{\vec{a}+\vec{x} | \vec{x} \in C\}$ is called a **coset** of $C$. - - -For an $[n,k]$-code $C$ over $GF(q)$: -- *every vector* of $V(n,q)$ is in some coset of $C$ -- every coset contains *exactly* $q^k$ vectors -- *every coset* is either disjoint or identical (no partial overlap) - - Proof: todo... - -todo: proof - -The **coset leader** is the vector having minimum weight in a coset. (there may be multiple. it is generally fine to pick one randomly.) - -A (Slepian) **standard array** for an $[n,k]$-code $C$ is a $q^{n-k} × q^k$ matrix of *all* of the vectors in $V(n,q)$, with the first row consisting of the code $C$, and the following rows consisting of the cosets of $C$, each arranged with the coset leader first ($\vec{0}$ for $C$) and the codewords in consistent order, so that they line up by columns. - -$$\begin{bmatrix} - 0000 & 1011 & 0101 & 1110\\ - 1000 & 0011 & 1101 & 0110\\ - 0100 & 1111 & 0001 & 1010\\ - 0010 & 1001 & 0111 & 1100\\ -\end{bmatrix}$$ - -Simple decoding of linear codes may be performed with the observation that *cosets correspond to distance* with regard to error correction (rephrase). By choosing coset leaders to be minimum weight vectors, the standard array decoding is a nearest neighbour decoding scheme. An entry may be looked up in the standard array, and the corresponding codeword may be found by looking at the *top entry* of its column. - -### Probability of error correction - - -### Probability of error detection - - -### cosets - -### Parity-check matrices - -... missed a bunch ... - - -## Chapter 7: The dual code etc - -### Dual Codes - -The **inner product** $u ⋅ v$ of vectors $u$ and $v$ is the *dot product*. (todo: learn relation) If $u ⋅ v = 0$, $u$ and $v$ are **orthogonal**. -- $u ⋅ v = v ⋅ u$ -- $(λu + μv) ⋅ w = λ(u ⋅ w) + μ(v ⋅ w)$ - -For some linear $[n,k]$-code $C$, the **dual code** of $C$ (denoted $C^⊥$) is the set of vectors in $V(n,q)$ which are *orthogonal to every codeword of $C$*. We may generalize this to say: a vector $v ∈ V(n,q)$ belongs to $C^⊥$ if and only if $v$ is orthogonal to every row of $G$, the generator matrix for $C$. -- $C^⊥ = \{v ∈ V(n,q) | v ⋅ u = 0\}$, for all $u ∈ C$ - -$C^⊥$ has dimension $n-k$: in other words, if $C$ is an $[n,k]$-code over $GF(q)$, the dual code $C^⊥$ is a linear $[n,n-k]$-code. - -We know that $(C^⊥)^⊥ = C$. - -A **parity-check matrix** $H$ for an $[n,k]$-code $C$ is a generator matrix of $C^⊥$. Note that this means any linear code is completely specified by a parity-check matrix. If $G = [I_k | A]$ is the standard form generator matrix of an $[n,k]$-code $C$, then a parity-check matrix in **standard form** is $H = [-A^T | I_{n-k}]$. - -## Syndrome Decoding - -For a parity-check matrix $H$, a **syndrome** of a given row vector is... - -## Hamming Codes - - - - -The Hamming codes are an important family of single-error-correcting codes which are easy to encode and decode. They are linear and definable over any finite field $GF(q)$. - -Let $r$ be a positive integer and let $H$ be an $r×(2^r - 1)$ matrix whose columns are the distinct non-zero vectors of $V(r, 2)$. Then the code having $H$ as its parity-check matrix is called a **binary Hamming code** and is denoted by $Ham(r,2)$. - -To find a Hamming code, you start with the parity-check matrix $H$, then find the generator matrix $G$, then find the code $C$. - -$$H = [-A^⊤ | I_{n-k}]$$ -$$G = [I_k | A]$$ - -For a binary Hamming code of $r ≥ 2$: -- it is a $[2^r - 1, 2^r - 1 - r]$-code -- it has minimum distance $d=3$ -- it is a perfect code. - - - -### How to find $dist(C)$ from H -C = Nul(H), codewords = $[a_1 a_2 ... a_n]$ -$H·\begin{bmatrix}a_1\\ a_2\\ ...\\ a_n\end{bmatrix}$ = $\vec{0} \iff [a_1 a_2 ... a_n]·H^t = \vec{0}$ -where $H_i$ are the columns of $H$, $H = \begin{bmatrix}1&1&...&1\\H_1&H_2&...&H_n\\1&1&...&1\end{bmatrix}$ - -$H·\begin{bmatrix}a_1\\ a_2\\ ...\\ a_n\end{bmatrix} = a_1 H_1 + a_2 H_2 + ... a_n H_n = \vec{0}$ - -A codeword = a linear relation among $H_i$ -The smallest weight of a codeword = the shortest linear relation among $H_i$ - -ex. Consider a codeword of weight = 1, i.e. $\vec{w} = [0 ... 0 a_i 0 ... 0]$ -Relation: $a_i H_i = \vec{0} \iff H_i = \vec{0}$ -There is a codeword of weight 1 iff some $H_i = \vec{0}$ - -ex. Consider a codeword of weight 2, i.e. $\vec{w} = [0 a_i a_j 0]$ -Relation: $a_i H_i + a_j H_j _ \vec{0} \iff H_i = -\frac{a_j}{a_i} H_j$ - - -### Theorems about distance - -For an [n, k] q-ary code, any set of $d-1$ columns in the matrix H (parity check matrix) have to be linearly independent. - -A Hamming code has to have distance 3. - - -Suppose $C$ is a linear $[n, k]$-code over $GF(q)$ with parity-check matrix $H$. THen the minimum distance of $C$ is $d$ if and only if $d-1$ columns of $H$ are linearly independent but some $d$ columns are linearly dependent. - -... missed a bunch... - -## Polynomials - -We will denote the field $GF(q)$ by $F_q$, or frequently $F$ (with $q$ implicit). Then, $F[x]$ is the set of polynomials in $x$ with **coefficients** in $F$. - -Polynomials in $F[x]$ form a **ring**. They: -- Contain the zero vector $\vec{0}$ -- Are closed under addition: $(5x^2 + x^1) + (x^3 + 2x^2) = x^3 + 7x^2 + x^1$ -- Are closed under subtraction: $(5x^2 + x^1) - (x^3 + 2x^2) = -x^3 + 3x^2 + x^1$ -- Are closed under multiplication: $(5x^2 + x^1) · (x^3 + 2x^2)$ = 5x^5 + 11x^4 + 2x^3$ -- *Do not* have multiplicative inverses! So they are not a field. - -Just as $Z_m$ is a ring, so also is $F[x] / f(x)$: it is called - - -For some polynomial $f(x) = f_m x^m + f_{m-1} x^{m-1} + ... f_1 x + f_0$, $m$ is called the **degree** of $f(x)$, and $f_m$ is called the **leading coefficient** of $f(x)$. By convention, the degree of the zero polynomial is $-∞$. (why??) A polynomial is called **monic** if its leading coefficient is 1. Note that if $f(x), g(x) \in F[x]$, $deg(f(x)g(x)) = deg(f(x)) + deg(g(x))$. - -The division algorithm exists for polynomials just as it exists for the integers. For any pair of polynomails $a(x), b(x) \in F[x]$, there exists a unique pair of polynomials $a(x)$ and $b(x)$ - - -### The division algorithm for polynomials - -Some polynomial $a$ in $Z_p$ is a **root* of $f(x)$ ($\in Z_p [x]$) if $f(a) = 0$. -A polynomial $a$ is a root of $f(x)$ if and only if $x-a$ divides $f(x)$. - -A polynomial $f(x) \in F[x]$ is said to be **reducible** if $f(x) = a(x)b(x)$, where $a(x), b(x) \in F[x]$ and $deg(a(x)) < deg(f(x))$ and $deg(b(x)) < deg(f(x))$, i.e. it is a product of smaller degree polynomials. If $f(x)$ is not reducible, it is called **irreducible**. Just as any positive integer can be factorized uniquely into a product of prime numbers, any *monic* polynomial in $F[x]$ can be factorized uniquely into a product of irreducible monic polynomials. Some useful lemmas: -- A polynomial $f(x)$ has a linear factor $x-a$ if and only if $f(a) = 0$. -- A polynomial $f(x) \in F[x]$ of degree 2 or 3 is irreducible if and only if $f(a) ≠ 0$ for all $a \in F$. -- Over any field, $x^n - 1 = (x-1)(x^n-1 + x^n-2 + ... + x + 1)$. - -Proof for a polynomial of degree 2 or 3 being irreducible: ... - -The property of a polynomial being irreducible corresponds directly to the property in $ℤ$ of a number being prime. -- The ring $Z_m$ is a field if and only if $m$ is prime. -- The ring $F[x]/f(x)$ is a field if and only if $f(x)$ is irreducible in $F[x]$. -- For any prime number $p$ and any positive integer $h$, there exists an irreducible polynomial over $GF(p)$ of degree $h$. This corresponds to the existence of fields $GF(p^h)$ for all integers $h ≥ 1$. - ---- - -Factoring polynomials - -$f(x) \in Z_p [x]$ -$f(x) = C·p_1(x) p_2(x) ... p_m(x)$, $p_1 (x)$ implies it is monic, and irreducible - -$n \in ℤ$ -$n = \pm p_1 p_2 ... p_m$ - -4 | -irreducible polynomials: -x, x+1 -0b10 = 2 -0b11 = 3 -x^2 + x + 1 -0b111 = 7 -x^3 + x + 1 -0b1011 = 11 -x^3 + x^2 + 1 -0b1101 = 13 - -but no 5???? huh x^2 + 1 - -### Factorization of polynomials - -Factorization: a polynomial is fully reduced if it is fully a product of irreducible polynomials. -How to factor: just, kinda do it... try dividing by every irreducible polynomial - -All (monic) irreducible polynomials in $Z_2 [x]$: - -deg. | all. | reducible. | irreducible ------|------|------------|------------ -1 | $x$, $x+1$ | - | $x$, $x+1$ -2 | $x^2$, $x^2 + 1$, $x^2 + x$, $x^2 + x + 1$ | $x^2 = x·x$, $x^2 + x = x(x+1)$, $x^2 + 1 = x^2 + 2x + 1 = (x+1) (x+1)$ | $x^2 + x + 1$ -3 | 8 poly | 6 poly | $x^3 + x + 1$, $x^3 + x^2 + 1$ - -Note: while these irreducible polynomials can be written as binary numbers, ex. $x+1 ~ 11_2 = 3$, and all of the above examples are prime, there is *not* a correspondence with primes. See $x^2 + 1$: $101_2 = 5$, but $x^2 + 1 ≡ (x+1)(x+1)$ (in $Z_2 [x]$). - -ex. Factor -$$f(x) = x^6 + x^5 + x^4 + x^3 + x^2 + x$$ - -1. Check whether an irreducible polynomial divides $f(x)$. - - Does $f$ have roots? Yes, $f(0) = 0$. So $x-0$ divides $f$. - - $f(x) = x g(x) = x(x^5 + x^4 + x^3 + x^2 + x + 1)$ - - $g(x) = (x^5 + x^4 + x^3 + x^2 + x + 1)$ - -2. Keep checking whether a linear? polynomial divides $f(x)$. - - Does $f$ have roots? Yes, $f(1) = 0$. So $x-1$ divides $f$. - - Note that $x-1 ≡ x+1$ mod $2$. - - $f(x) = x (x+1) h(x) = x(x+1)(x^4 + x^2 + 1)$ - - $h(x) = (x^4 + x^2 + 1)$ - -3. Now no linear? polynomials divide $f(x)$. What to do? - - $h(x)$ may still be factored... - - By an identity in $Z_2$, $(a + b + c)^2 ≡ (a^2 + b^2 + c^2)$ (in $Z_2$) - - Verify by long division on $(x^4 + x^2 + 1)$: $(x^2 + x + 1)^2$ - - $f(x) = x(x+1)(x^2+x+1)^2$ - -### Modular polynomials - -The idea behind modular polynomials: just modular arithmetic! $Z_p [x] / f(x)$ denotes the set of polynomials in $Z_p [x]$ that are congruent mod $f(x)$, i.e. $f(x) ≡ 0$. - -Definition: $g(x), f(x) \in Z_p [x]$ are congruent (mod $f(x)$), i.e. -$$g ≡ h (\mod f)$$ -if $g(x) = h(x) + t(x) f(x)$ for some $t(x)$. - -Every $g(x)$ is congruent to a unique polynomial $r(x)$ when $\deg r(x) < \deg f(x)$. -- Proof: just divide $g(x)$ by $f(x)$! -- $\frac{g}{f} = q(x) + \frac{r(x)}{f(x)}$ -- $g = f·q + r$ -- $f·q ≡ 0$ - - -Note: for $ℤ/(m)$, every $n$ is congruent to an element in $\{0, 1, ..., m-1\}$. -i.e. $Z_m = ℤ/(m)$. ??? what is this? - -Definition: Let $f(x) \in Z_p [x]$, $\deg f = n > 0$. -Then $Z_p [x] / f(x)$ = the set of all polynomials $r(x)$ with deg. r < n. - -Operations `+`, `*`: -- Do the usual operations with polynomials -- If the result has degree ≥ n, then take its residue mod $f(x)$. -- This is a *ring*! We have addition, subtraction (additive inverse), multiplication... no division -- *also* a vector space! isomorphic to $Z_p^n$ - - In a vector space: need addition and *scalar multiplication* - -ex. $Z_2 [x] / (x^3 + x)$ -(we'll consider $f(x) = x^3 + x$) - -Addition: polynomial addition -- $(1+x^2) + (x+x^2) = 1+x$ (as $2??? ≡ 0$ in $Z_2$) - -Multiplication: -- $(1+x^2)(x+x^2) = x+x^2+x^3+x^4 ≡ r(x)$ (mod $f$) -- $x^4 + x^3 + x^2 + x ÷ x^3 + x = x+1$ -- by long division -- So $(1+x^2)(x+x^2) ≡ x+1$ - - -### Quotient polynomial rings - -We've previously? defined quotient rings: $Z_m = ℤ/(m)$, where: -- $m ≡ 0$ -- $n ≡ r$ (mod $m$) -- $r =$ the principle residue -- $n/m = q + r/m$, for $0 <= r <= m$ -- $Z_m$ then equals the set of *prime residues*, i.e. $\{0, 1, 2, ..., m-1\}$ for $m$ - -We can also have quotient polynomial rings: $Z_p [x] / f(x)$, where -- $f(x) ≡ 0$ -- $g(x) ≡ r(x)$ (mod $f$) -- $r(x) =$ the prime residue -- $g(x)/f(x) = q(x) + r(x)/f(x)$ for some $deg(r) < deg(f)$ -- $Z_p [x] / f(x)$ then equals the set of *prime residues*, i.e. $r(x)$ st. $deg(r) < deg(f)$ (???) - -Operations on quotient rings are the usual: `+`, `·`. But when can we divide? -- We can divide when $1/n$ exists, which is equivalent to $\gcd(n, m) = 1$ -- To have a field: $m = p$ prime - -Ex. $Z_2 [x] / (x^3 + x + 1)$ -- Note: this is a field! $(x^3 + x + 1)$ is irreducible -- Goal: find $1/(x+1)$ -- $gcd(x^3 + x + 1, x+1)$ -- $gcd(x^3 + x + 1 - (x+1)(x^2 + x), x+1) = gcd(1, x+1)$ -- $gcd(1, x+1 - 1·(x+1)) = gcd(1, 0)$ -- $gcd(f, g)$ -- $gcd(f - g(x^2 + x), g)$ -- $1 = f-g(x^2 + x)$ -- $1 = -(x^2 + x) g$ (mod $f$) -- $1/g = x^2 + x$ - -Ex. $Z_3 [x] / (x^2 + x + 2)$ -- Goal: find $1/x$ -- $gcd(x^2 + x + 2, x)$ -- $gcd(x^2 + x + 2 - (x+1)·x, x)$ -- $gcd(2, x)$ -- $gcd(f, g)$ -- $gcd(f - (x+1)·g, g)$ -- $2 = f - (x+1)·g$ -- $1 = 2(f - (x+1)·g)$ -- $1/g = -2(x+1) = x+1$ - -So what about fields? $Z_p [x] / f(x)$, where: -- The characteristic $p$ is prime -- $f(x)$ (of degree n) is irreducible -- $\{a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1 | a_i \in Z_p\}$ $\simeq$ $Z_p^n$ -- The field has $q = p^n$ elements -- Fact: every finite field is of this type, $𝔽_q$ -+ Two fields with the same number of elements turn out to be the same field! They're *isomorphic*. - -Ex. $Z_2 [x] / (x^3 + x + 1) = 𝔽_8$ -> $Z_2 [x] / (x^3 + x^2 + 1) = 𝔽_8$ -- x -> x+1 - -Ex. $F_4 = Z_2 [x] / (x^2 + x + 1) \neq Z_4$ -- $\{0, 1, x, x+1\} \neq \{0, 1, 2, 3\}$ - -𝔽_4 | 0 | 1 | x | x+1 -----|---|---|---|---- -0 | 0 | 0 | 0 | 0 -1 | 0 | **1** | x | x+1 -x | 0 | x | x+1 | **1** -x+1 | 0 | x+1 | **1** | x - -Z_4 | 0 | 1 | 2 | 3 -----|---|---|---|--- -0 | 0 | 0 | 0 | 0 -1 | 0 | **1** | 2 | 3 -2 | 0 | 2 | 0 | 2 -3 | 0 | 3 | 2 | **1** - -Every $𝔽_q$ has a primitive element $a(x)$: -$$a(x), a^2(x), ..., a^{q-1}(x)$$ -are all distinct nonzero elements of $𝔽_q$ - -Ex. In $𝔽_4$, is $x$ primtive? -- $x x^2 = x+1$, $x^3 = x(x+1) = 1$ -- $b^q-1 = 1$ for any $0 \neq b \in 𝔽_q$ - -Little Fermat's theorem -- $b = [a(x)]^i$, $b^q-1 = [a(x)^q-1]^i = 1^i = 1$ - -Example from first midterm: -- In $Z_m$, $b^m-1 = 1$ holds iff $m=p$ is prime -- ex. Z_123$, $b^122 = 1$ -- $Z_m^x = \{n \in Z_m | 1/n exists\}$ -- $b^{|Z_m^x|} = 1$ if $b \in Z_m^x$ -- $Z_4^x = \{1,3\}$, $b^2 = 1$, $3^3 \neq 1$ - -## Cyclic codes - - - -A code $C$ is **cyclic** if $C$ is a linear code, and any cyclic shift of a codeword is also a codeword. In other words, whenever $a_0 a_1 ... a_n-1 \in C$, $a_n-1 a_0 a_1 ... a_n-2 \in C$. -- ex. the binary code $\{0000, 0101, 1010, 1111\}$ is cyclic. - -Consider the *ring* $R_n = 𝔽_q [x] / (x^n - 1)$: as as a vector space, $\simeq 𝔽_q^n$ - -A cyclic code $C$ is a (nonempty) $C \subseteq R_n$, where: -- $C$ is a subspace closed under `+` -- $C$ is closed under scalar multiplication -- $C$ is closed under `·` -- This is a *linear code* (??) - -We can consider a cyclic shift to be a *multiplication* by $x$. -- ex. $a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1$ = $(a_0, a_1, ..., a_n-1)$ -- $x·(a_0 + a_1 x + a_2 x^2 + ... + a_n-1 x^n-1)$ -- $a_0 x + a_1 x^2 + a_2 x^3 + ... + a_n-1 x^n$ -- $x^n ≡ 1$, so... -- $a_n-1 + a_0 x + a_1 x^2 + ... + a_n-2 x^n-1$ - -$C$ is a **module** on the ring $R_n$, where a **module** is a vector space working over a ring, not a field -... - -missed some stuff... - -Every cyclic code is of the form $$. The polynomial $g(x)$ is uniquely determined by: -- $g(x)$ is monic -- $g(x)$ divides $x^n - 1$ in $𝔽_q [x]$ - -As a corollary, cyclic codes correspond with monic divisors of $x^n - 1$. - -Ex. find all binary cyclic codes of length $n=3$. -$$x^3 - 1 = (x+1)(x^2 + x + 1)$$ -So our two codes are: -- $ = \{\text{all even words}\}$ -- $ = \{000, 111\}$ - -Ex. find all binary cyclic codes of length 4 -- $x^4 - 1 = (x-1)^4$ -So our codes are: -- $<1> = R_n$ -- $ = \{\text{even words}\}$ -- $(x-1)^2 = ?$ -- $(x-1)^3 = \{000, 111\}$ -- $(x-1)^4 = 0$ - -Let $C \subseteq R_n$ be a cyclic code. Let $g(x) \in C$ be the lowest degree polynomial. -Then, $ = C$. Proof: -- To show $ \subseteq C$: $g(x) \in C \implies f·g \in C$ -- To show $ \supseteq C$: we must show that $h(x) = f(x)·g(x)$ for some unknown $h(x) \in C$. - - $\frac{h}{g} = f + {r}{g}$, $h = f·g + r$, $r = h-fg \in C$. - - $deg(r) < deg(g)$ by the definition of division. - - $r=0$ as the degree of $r$ is less than the lowest degree polynomial. - -Ex. $R_3 = 𝔽_2 [x] / (x^3 - 1)$ -- $g(x) = (x^2 + 1) = (x+1)^2$ -- $g(x) = (x^3 - 1) = (x+1)(x^2 + x + 1)$ -- $ =$ a cyclic code -- 0, $1·(x^2 + 1) = x^2 + 1$ -- $x(x^2 + 1) = x^3 + x \equiv 1+x$ -- $(x+1)(x^2 + 1) = x^3 + x^2 + x + 1 \equiv x^2 + x$ -- $x^2 (x^2 + 1) = x^4 + x^2 = x + x^2$ -- $(x^2 + 1)(x^2 + 1) = x^4 + 1 = x + 1$ -- $(x^2 + x + 1) (x^2 + 1) = x^3 + 1 + x^3 + x \equiv x + 1 + 1 + x \equiv 0$ -- $C = \{0, 1+x, x^2 + 1, x^2 + x\} = <1+x>$ - -Let $g(x)$ divide $x^n - 1$ in $𝔽_q (x)$. Then $dim = dim(C) = n-deg(g)$. -- Matrix: $g(x) = a_0 + a_1 x + a_2 x^2 + ... + a_m x^m$ -- i.e. $g(x), x·g(x), x^2 g(x), ..., x^{n-m-1} g(x)$ form a basis for the vector space $C \subseteq R_n$. - - -ideal - ---- - -### Vandermonde matrix - -$𝔽$: field, $ - -V = [1, 1, 1, ...; x_1, x_2, ..., x_n; x_1^2, x_2^2, ..., x_n^2; x_1^n-1, x_2^n-2, ..., x_n^n-1] - -If $x_i$ are distinct, then $V$ is invertible (i.e. the determinant is nonzero). - -The determinant: $dev V = Π_{i>j} (x_i - x_j)$ -examples... -the determinant of a vandermonde matrix equals its transpose - -### Lagrange interpolation - -ex. a continuous *function*: distinct x, not necessarily distinct y - -Theorem: If $x_i$ are distinct (for any numbers y_i), then there is a unique $p(x)$ of degree ≤ $n-1$ that interpolates the points (x_i, y_i). - -Proof: $P_n-1 = \{a_6 + a_1x + ... + a_n x^n-1 | a_i ∈ 𝔽\}$ -$P_n-1 kindaeq 𝔽^n$ -$p(x) <--> (a_0, a_1, ... a_n-1)$ - -find a $p(x)$ such that when evaluated at $x_1, x_2, ...$ it equals $y_1, y_2, ...$ - -theorem is equivalent to saying that T is invertible: for $T : P_n-1 --> 𝔽^n$ - -invertible means 1-1 and onto (equivalent, if you have one property you have the other) - -Rank-Nullity theorem: $dim P_n-1 = dim N(T) + dim R(T)$ - -1-1 means that the null space is zero -- why? - -onto means that the dimension of the range is all of $dim 𝔽^n = n$ - -The matrix of T: $P_n-1 -> 𝔽^n$ - -T = [T(1), T(x), T(x^2)...] - -### BCH codes - -Fix $x_1 x_2... x_n$ in 𝔽_q distinct -Fix some $r = redundancy ≤ n$ - -H = [1, 1, ..., 1; x_1, x_2, ... x_n; ...; x_1^n-1, x_2^n-1, ..., x_n^n-1] -dim C^⟂ = r, dim C = n - r - -the data of the codeword: n - r, the redundancy of the codeword: r \ -distance of C: any r columsn of H are independent (r columns form a rxr Vandermonde matrix) - -ex. construct a length 10 (n) code, distance 5 (r + 1) (can correct 2 errors) -Choose 𝔽_q, x_1, ..., x_10 in 𝔽_q -Take 𝔽_11 = Z_11, x_1 = 1, x_2 = 2, ..., x_10 = 10 - -H = [1, 1, ..., 1; 1, 2, ..., 10; 1, 2^2, ... 10^2, 1, 2^3, ..., 10^3] (# of rows: 4) - -BCH codes are not perfect, usually? but they do achieve the singleton bound - -## Singleton bound - -C: length n, distance d, q-ary, then: -M ≤ q^n-d+1 which (if C is linear): <=> dim C ≤ n-d - -BCH code: d = r+1 -singleton bound -dim C ≤ n-(d-1) = n-r - -M ≤ Singleton bound ≤ Sphere packing bound -M ≤ ex. 10-5+1 = 11^6 ≤ 11^10 - -next time: how to decode BCH codes - - - - - - -## The main linear coding theory problem - -The main linear coding theory problem: What is the "best" linear code? - - -Question: What is the "best" linear code? - -Answer: we construct H in 𝔽_q such that - -$$H = [H_1 H_2 \ldots H_4]$$ - -as n goes to the maximum for some fixed r. - -How many vectors $H_1 ... H_n$ in 𝔽_q^n, such that any d-1 are linearly independent, n = max(q, r) <--- ???? - -Last time: for $d=3$: the best codes are $Ham(q, r)$ - -Today: for $d=4$: we need to think about algebraic geometry?? - -How many vectors $H_1, ... H_n$ in 𝔽_q^n such that no 3 vectors lie on a planes - - - -### Projective spaces - -$$ℙ^{n-1} = \{\text{all lines through 0 in 𝔽_q^n}\}\}$$ - -For r=3: - -Ex. for $r=3$, $q=2$: we have a cube with its corners as $000, 010, 100, 110, 001, 011, 101, 111$ - - -``` -010 -| -| 110 -| -| -011------111 -| | -| | -| | -| | -011------101-----------100 -``` - - -A projective space reduces the dimension by 1. -- ex. a line $L$ in $𝔽_q^n$ = point in $ℙ^{n-1}$ -- ex. a plane $W$ in $𝔽_q^n$ = a line in $ℙ^{n-1}$ -- ex. a 3d subspace in $𝔽_q^n$ = a plane in $ℙ_q^n$ - -In ℙ^2, any two different lines intersect at a point. - -Ex. ℙ_2 over 𝔽_2 -``` - 010 - - 011 111 110 - -001 101 100 -``` -(the triangle with points intersecting) - - -### MLCT problem d=4 -Find the maximum number of lines L_1, L_2, ..., L_n in 𝔽_q^n such that no 3 lines lie on a plane. -This is the same as our earlier problem! -This is also the same as finding the maximum number of points in the projective space ℙ^r-1 such that 3 points are on a line. - -Ex. for r=3, 𝔽_q = 𝔽_2 -- Take all 4 finite points r > 3, 𝔽_q = 𝔽_2 -- Take all 2^r-1 finite points -- Ham(2, r) + parity bit = 2^r - 1 + 1 = 2^r = HamHat(2, r) -- for redundancy r+1 - -Ex. for the case q ≠ 2 -q = 3 -Cannot take all the finite points -We consider a parabola intersecting a line M in a plane, and consider the points intersected with it -For q > 2, r = 3, the maximum n is q+1 if q is odd, and q+2 if q is even. - - diff --git a/mathematics/foundations.md b/mathematics/foundations.md deleted file mode 100644 index 9057bea..0000000 --- a/mathematics/foundations.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: foundations -title: mathematics/foundations ---- - -# foundations diff --git a/mathematics/index.md b/mathematics/index.md deleted file mode 100644 index 7e03385..0000000 --- a/mathematics/index.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -layout: mathematics -title: mathematics ---- - -
-
-
-
-
- -# mathematics is the art of *proof* diff --git a/mathematics/information-theory.md b/mathematics/information-theory.md deleted file mode 100644 index d0cd75c..0000000 --- a/mathematics/information-theory.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -layout: mathematics -title: mathematics/information theory ---- - -# information theory diff --git a/mathematics/linear-algebra.md b/mathematics/linear-algebra.md deleted file mode 100644 index 3d58b39..0000000 --- a/mathematics/linear-algebra.md +++ /dev/null @@ -1,666 +0,0 @@ ---- -layout: algebra -title: mathematics/linear algebra ---- - -# Linear Algebra - -$$ℝ^n$$ - -## Fields and Vector Spaces - -A **field** $F$ is a *set* with two *binary operation* $+$ and $×$ satisfying the following axioms: -- $(F, +)$ is a *commutative group*: - - associativity: $∀a,b,c : a + (b + c) = (a + b) + c$ - - additive identity: $∃0, ∀a : 0 + a = a + 0 = a$ - - additive inverse: $∀a, ∃-a : a + (-a) = 0$ - - commutativity: $∀a,b : a + b = b + a$ -- $(F, ×)$ is a *commutative group* - - associativity: $∀a,b,c : a(bc) = (ab)c$ - - multiplicative identity: $∃1, ∀a : 1a = a1 = a$ - - multiplicative inverse: $∀a ≠ 0, ∃\frac{1}{a} : a\frac{1}{a} = 1$ - - commutativity: $∀a,b : ab = ba$ -- $×$ is distributive with respect to $+$ - - distributivity: $∀a,b,c : a(b + c) = (ab) + (ac)$ - -Intuitively, a field is a set on which addition, subtraction, multiplication and division are defined and behave as they do on $ℝ$. We often omit the multiplication sign, and write $a × a$ as simply $aa$. It can also be thought of as a *commutative ring* with a multiplicative inverse (sans 0). - -A **vector space** $V$ *over* a field $F$ is a set with a binary operation $+$ and a binary function satisfying the following axioms: -- $(V, +)$ is a *commutative group*: - - associativity: $∀u,v,w : u + (v + w) = (u + v) + w$ - - additive identity: $∃0, ∀v : 0 + v = v + 0 = v$ - - additive inverse: $∀v, ∃-v : v + (-v) = 0$ - - commutativity: $∀u,v : u + v = v + u$ -- $(V,)$ is a *scalar operation* - - scalar identity: $∃1 ∈ F, ∀v : 1v = v1 = v$ - - commutativity: $∀a,b ∈ F, ∀v : (ab)v = a(bv)$ -- The *distributive laws* hold: - - $∀a ∈ F, ∀u,v ∈ V : a(u + v) = au + av$ - - $∀a,b ∈ F, ∀v ∈ V : (a + b)v = av + bv$ - -Our definition of our vector space leads us to some facts: -- The zero vector is *unique* and always present. - - Proof: Suppose there were two zero vectors: $0$ and $0'$. Then $0' = 0 + 0' = 0' + 0 = 0$. -- Vector spaces are *non-empty*. - - Proof: By definition, the zero vector exists. -- The additive inverse for some $x$ is *unique*. - - Proof: Suppose there were two inverses: $-x$ and $-x'$. Then $-x + x = 0$ and $-x + x + -x' = -x'$, and so as $x + -x' = 0$ $-x = -x'$. -- If $V$ is a vector space over $F$ and $V ≠ \\{0\\}$, then $V$ is an *infinite set* over $F$. - - Proof: you can just keep adding things - -
-Examples - -Let $S = \\{(a_1, a_2) : a_1, a_2 ∈ ℝ\\}$. -For $(a_1, a_2), (b_1, b_2) ∈ S$ and $c ∈ ℝ$, we define: -- $(a_1, a_2) + (b_1, b_2) = (a_1 + b_1, a_2 - b_2)$ -- $c(a_1, a_2) = (ca_1, ca_2)$. - -This fails commutativity! It is thus not a vector space. - -Let $S = \\{(a_1, a_2) : a_1, a_2 ∈ ℝ\\}$. We define: -- $(a_1, a_2) + (b_1, b_2) = (a_1 + b_1)$ -- $c(a_1, a_2) = (ca_1, 0)$ - -This fails the existence of zero! It is thus not a vector space. -
- -## Subspaces - -A subset $W$ of a vector space $V$ over a field 𝔽 is called a **subspace** of $V$ if $W$ is a *vector space* over 𝔽 with the operations of addition and scalar multiplication from $V$. -- . -- . - -A subset of $V$ is a **subspace** of V iff: -- the subset is non-empty -- the subset contains the zero vector -- it is closed under addition and multiplication - -Let $V$ be a vector space over $F$ and $S$ a nonempty subset of $V$. A vector $v \in V$ is a **linear combination** of vectors $s,t ∈ V$ if there exists a *finite* number of vectors $u_1, u_2, ..., u_n ∈ S$ and scalars $a_1, a_2, ..., a_n ∈ F$ such that $v = a_1 u_1 + a_2 u_2 + ... a_n u_n$. - -We call $a_1 ... a_n$ the *coefficients* of the linear combination. - - - ---- - - -## Introduction: The Complex Numbers - -A **complex number** is of the form $a + b\text{i}$, where $a, b$ are real numbers and $i$ represents the imaginary base. - -We denote the set of complex numbers as $ℂ$. -The complex numbers form a *vector space*. -Every element in the vector space can be expressed as a *linear combination* of $a + bi$. - -- ℂ: the set of complex numbers -- ℝ: the set of real numbers -- ℚ: the set of rational numbers - -Elementary operations on the complex numbers as are follows: -- $(a + bi) ± (c + di) = a ± c ± (b ± d)$ -- $(a + bi)(c + di) = (ac - bd) + (ad + bc)$ -- $i^2 = -1$ - -The **conjugate** of $z = a + bi$ is ... defined as $\bar{z} = a - bi$. -As long as $a^2 + b^2 ≠ 0$, the inverse of $z = a + bi$ is given by $z^{-1} = \frac{a - b}{a^2 + b^2} = \frac{\bar{z}}{a^2 + b^2}$. - -**Theorem**: Let $z, w ∈ ℂ$. Then: -- $\bar{\bar{z}} = z$ the conjugate of the conjugate -- $\bar{z ± w} = \bar{z} ± \bar{w}$ -- $\bar{zw} = \bar{z}\bar{w}$ -- $\bar{\frac{z}{w}} = \frac{\bar{z}}{\bar{w}}$ (if $w ≠ 0$) - - (where $\frac{z}{w} = z ⋅ w^{-1} = z \frac{1}{w}$) -- $z$ is a *real number* iff $\bar{z} = z$ - -Let $z = a + bi$, where $a, b ∈ ℝ$. The **absolute value** of $z$ is defined as the real number $\sqrt{a^2 + b^2}$. - -This absolute value shares many properties. For $z, w ∈ ℂ$: -- $z \bar{z} = a^2 + b^2 = \|z\|^2$, where $\|z\| = \sqrt{a^2 + b^2}$ -- $\frac{z}{w} = \frac{\|z\|}{\|w\|}$ where $w ≠ 0$ -- $\|z + w\| ≤ \|z\| + \|w\|$ -- $\|z\| - \|w\| ≤ \|z + w\|$ - -**The Fundamental Theorem of Algebra**: -Consider the polynomial $p(z) = a_n z^n + a_{n-1}z^{n-1} + ... + a_1 z + a_0$ where all $a$ are complex numbers. -If $n ≥ 1$, then $p(z)$ has a zero (*root*), i.e. there exists a complex number $c$ such tbar $p(c) = 0$. - -If $c$ is a root, $\bar{c}$ is also a root! - -Let 𝔽 be one of the following sets: ℚ, ℝ, ℂ - -https://math.stackexchange.com/questions/3492590/linear-combination-span-independence-and-bases-for-infinite-dimensional-vector - -Let $S$ be a nonempty subset of a vector space $V$. The **span** of $S$, denoted $span(S)$, is the set consisting of all linear combination of vectors in S. For convenience, we define $span(∅) = \\{0\\}$. - -The span of any subset $S$ of a vector space $V$ is a subspace of $V$. - ---- - -A subspace $S$ over a vector space $V$ is **linearly dependent** if there exists a finite number of distinct vectors $u_1, u_2, ..., u_n ∈ S$ and scalars $a_1, a_2, ..., a_n ∈ F$ such that $a_1, a_2, ..., a_n$ are not all zero and $a_1 u_1 + a_2 u_2 + ... a_n u_n = 0$. - -A subset $S$ of a vector space that is not linearly dependent is **linearly independent**. - - -Example: Consider the following set: $S = \\{(1, 0, 0, -1), (0, 1, 0, -1), (0, 0, 1, -1), (0, 0, 0, 1)\\}$ -Assume that $a v_1 + a_2 v_2 + a_3 v_3 + a_4 v_4 = 0$. then... - -as the determinant is nonzero, S is linearly independent. - - -Let $V$ be a vector space, and let $S_1 ⊆ S_2 ⊆ V$. If $S_1$ is linearly dependent, then $S_2$ is linearly dependent. If $S_2$ is linearly independent, then $S_1$ is also linearly independent. - -Let $S$ be a linearly independent subset of a vector space $V$, and let $v ∈ V : v ∉ S$. Then $S ∪ \\{v\\}$ is linearly independent iff $v ∈ span(S)$. - -A basis $B$ for a vector space $V$ is a *linearly independent* subset of $V$ that *spans* $V$. If $B$ is a basis for $V$, we also say that the vectors of $B$ form a basis for $V$. - -Let $V$ be a vector space and $β = \\{v_1, ..., v_n\\}$ be a subset of V. Then β is a basis for V iff every $v ∈ V$ can be **uniquely expressed** as a linear combination of vectors of β. that is, V can be written in the form $v = a_1 u_1 + a_2 u_2 ... a_n u_n$ for unique scalars a. - -If a vector space V is spanned by a finite set S, then some subset of S is a basis of V. So, V has a finite basis. -Proof: If $S = ∅$, then $V = span{S} = span{∅} = \span{𝕆}$ in which case ∅ is a basis for $V$. -If S ≠ ∅, then ∃ u_1 ∈ S : u_1 ≠ 𝕆. and we have two cases: span(u_1) = V we are done... - -(Replacement theorem) Let $V$ be a vector space that is spanned by a set G containing exactly n vectors, and let L be a linearly independent subset of V containing exactly m vectors. Then m ≤ n. Moreover, you can find a subset H of G - -Let V be a vector space with dimension n. -- any finite spanning set for V contains at least n vectors, and a spanning set for V that contains exactly n vectors is a basis for V. - -Theorem 1.4: Let $W$ be a subspace of a finite-dimensional vector space $V$. Then $W$ is also finite-dimensional (and dim W ≤ dim V). Moreover if dim W = dim V, then V = W. - ---- - -## Linear Transformations - -Let $V$ and $W$ be vector spaces (over a field $F$). - -A function $T: V → W$ is a **linear transformation** from $V$ into $W$ if $∀x,y ∈ V, c ∈ F$ we have $T(cx + y) = cT(x) + T(y)$. Subsequently: -- $T(x + y) = T(x) + T(y)$ -- $T(cx) = cT(x)$ -- $T(0) = 0$ -- $T(\sum_{i=1}^n a_i x_i) = \sum_{i=1}^n a_i T(x_i)$ - -Let $T: V → W$ be a linear transformation. - -The **kernel** (or null space) $N(T)$ of $T$ is the set of all vectors in $V$ such that $T(x) = 0$: that is, $N(T) = \\{ x ∈ V : T(x) = 0 \\}$. - -The **image** (or range) $R(T)$ of $T$ is the subset of $W$ consisting of all images (under $T$) of elements of $V$: that is, $R(T) = \\{ T(x) : x ∈ V \\}$. - -Theorem: The kernel $N(T)$ and image $R(T)$ are subspaces of $V$ and $W$, respectively. -
-Proof -We shall denote the zero vector of $V$ and $W$ as $0_v$ and $0_w$, respectively. - -Let $x,y ∈ N(T)$ and $c ∈ F$. As $T(0_v) = 0_w$, $0_v ∈ N(T)$. Then $T(cx + y) = cT(x) + T(y) = 0_w + 0_w = 0_w$, as $x$ and $y$ are in the null space. Hence any linear combination of $x$ and $y$ in the null space is in the null space. So as $N(T) ⊆ V$ by definition, $N(T)$ is a subspace of $V$. ∎ - -Let $x,y ∈ R(T)$ and $c ∈ F$. As $T(0_v) = 0_w$, $0_w ∈ R(T)$. Then there exist $v,w ∈ V$ such that $T(v) = x$ and $T(w) = y$. So $T(v + cw) = T(v) + cT(w) = x + cy$. Hence any linear combination of $x$ and $y$ is in the image. So as $R(T) ⊆ W$ by definition, $R(T)$ is a subspace of $W$. ∎ - -
- -Theorem: If $β = \\{ v_1, v_2, ... v_n \\}$ is a basis for $V$, then $R(T) = span(\\{ T(v_1), T(v_2), ..., T(v_n) \\})$. -
-Proof - -Clearly, $T(v_i) ∈ R(T)$ for all $i$. As $R(T)$ is a subspace of $W$ by the previous theorem, $R(T)$ *contains* $span(\\{ v_1, v_2, ... v_n \\}) = span(T(β))$. - -Now consider an arbitrary $w ∈ R(T)$. By definition, there exists a $v ∈ V$ such that $w = T(v)$. As $β$ is a basis for $V$, we can consider $v$ as a linear combination of some basis vectors in $β$. As $T$ is linear, it thus must be the case that $T(v) ∈ span(T(β))$. So $R(T) = span(T(β))$. ∎ - -
- -For a finite-dimensional $N(T)$ and $R(T)$: the **nullity** and **rank** of $T$ are the dimensions of $N(T)$ and $R(T)$, respectively. - -**Rank-Nullity Theorem**: If $V$ is *finite-dimensional*, then $dim(V) = nullity(T) + rank(T)$. -
-Proof - -Let $dim(V) = n$, let $nullity(T) = k$, and let $β_N$ be a basis for $N(T)$. As $N(T) ⊆ V$, we may extend $β_N$ to a basis $β$ for $V$. - -We assert that $S = \\{T(v_{k+1}), T(v_{k+2}), ..., T(v_n) \\}$. We must prove this. - -... - -So $S$ is linearly independent. As it contains $n-k$ linearly independent vectors in $R(T)$, it is a basis for $R(T)$. - -
- -Recall that a *function* definitionally maps *each* element of its domain to *exactly* one element of its codomain. -- A function is **injective** (or **one-to-one**) iff each element of its domain maps to a *distinct* element of its codomain, that is, $f(x) = f(y) → x = y$. -- A function is **surjective** (or **onto**) iff each element of the codomain is mapped to by *at least* one element in the domain, that is, $R(T) = W$. -- A function is **bijective** iff it is surjective and injective. Necessarily, a bijective function is invertible, which will be formally stated & proven later. - -Theorem: $T$ is injective iff $N(T) = \\{0\\}$. -
-Proof - -Suppose that $T$ is injective. Consider some $x ∈ N(T)$. Then $T(x) = 0 = T(0)$. As $T$ is injective, $x = 0$, and so $N(T) = \\{0\\}$. ∎ - -Now suppose that $N(T) = \\{0\\}$. Consider some $T(x) = T(y)$. Then $T(x) - T(y) = T(x-y) = 0$. So $x-y ∈ N(T) = \\{0\\}$, and so $x-y = 0$ and $x = y$. So $T$ is injective. ∎ - -
- -Theorem: For $V$ and $W$ of equal (and finite) dimension: $T$ is injective iff it is surjective. -
-Proof - -We have that $T$ is injective iff $N(T) = \\{0\\}$ and thus iff $nullity(T) = 0$. So $T$ is injective iff $rank(T) = dim(R(T)) = dim(W)$, from the Rank-Nullity Theorem. $dim(R(T)) = dim(W)$ is equivalent to $R(T) = W$, which is the definition of surjectivity. So $T$ is injective iff it is surjective. ∎ - -
- -Theorem: Suppose that $V$ has a finite basis $\\{ v_1, v_2, ..., v_n \\}$. For any vectors $w_1, w_2, ... w_n$ in $W$, there exists *exactly* one linear transformation such that $T(v_i) = w_i$ for $i = 1, 2, ..., n$. -
-Proof -... -
- -Theorem: Suppose that $V$ has a finite basis $\\{ v_1, v_2, ..., v_n \\}$. If $U, T : V → W$ are linear and $U(v_i) = T(v_i)$ for all $i$, then $U = T$. -
-Proof -... -
- -## Composition of Linear Transformations - -Let $V$, $W$, and $Z$ be vector spaces. - -Theorem: Let $T, U : V → W$ be linear. Then their sum and scalar products are linear. -
-Proof -... -
- -Theorem: The set of all linear transformations (via our definitions of addition and scalar multiplication above) $V → W$ forms a vector space over $F$. We denote this as $\mathcal{L}(V, W)$. -- If $V = W$, we write $\mathcal{L}(V)$. -
-Proof -... -
- -Let $T, U : V → W$ be arbitrary functions. We define **addition** $T + U : V → W$ as $∀x ∈ V : (T + U)(x) = T(x) + U(x)$, and **scalar multiplication** $aT : V → W$ as $∀x ∈ V : (aT)(x) = aT(x)$ for all $a ∈ F$. - -Theorem: Let $T : V → W$ and $U : W → Z$ be linear. Then their composition $UT : V → Z$ is linear. -
-Proof -Let $x,y ∈ V$ and $c ∈ F$. Then: - -$$UT(cx + y)$$ - -$$= U(T(cx + y)) = U(cT(x) + T(y))$$ - -$$= cU(T(x)) + U(T(y)) = c(UT)(x) + UT(y)$$ - -
- -Theorem: Let $T, U_1, U_2 ∈ \mathcal{L}(V)$, and $a ∈ F$. Then: -- $T(U_1 + U_2) = TU_1 + TU_2$ -- $(U_1 + U_2)T = U_1 T + U_2 T$ -- $T(U_1 U_2) = (TU_1) U_2$ -- $TI = IT = T$ -- $a(U_1 U_2) = (aU_1) U_2 = U_1 (aU_2)$ -
-Proof -... - -
- -## Invertibility and Isomorphism - -- Let $V$ and $W$ be vector spaces. -- Let $T: V → W$ be a linear transformation. -- Let $I_V: V → V$ and $I_W: W → W$ denote the identity transformations within $V$ and $W$, respectively. - -A function $U: W → V$ is an **inverse** of $T$ if $TU = I_W$ and $UT = I_V$.
-If $T$ has an inverse, then $T$ is **invertible**. - -Theorem: Consider a linear function $T: V → W$. -- If $T$ is invertible, it has a *unique* inverse $T^{-1}$. -- If $T$ is invertible, $T^{-1}$ is invertible with the inverse $T$. -- A function is invertible if and only iff it is bijective. -
-Proof -... -
- -Theorem: If $T$ is linear and invertible, $T^{-1}$ is linear and invertible. -
-Proof - -Let $y_1, y_2 ∈ W$ and $c ∈ F$. As $T$ is bijective, there exist unique vectors $x_1$ and $x_2$ such that $T(x_1) = y_1$ and $T(x_2) = y_2$. So $x_1 = T^{-1}(y_1)$ and $x_2 = T^{-1}(y_2)$. So: - -$$T^{-1}(y_1 + cy_2) = T^{-1}[T(x_1) + cT(x_2)] = T^{-1}[T(x_1 + cx_2)] = x_1 + cx_2 = T^{-1}(y_1) + cT^{-1}(y_2)$$ - -Thus $T^{-1}$ is linear. We may see it is invertible by definition. ∎ - -
- -$V$ is **isomorphic** to $W$ if there exists an *invertible* linear $T : V → W$ (an **isomorphism**). - -Lemma: For finite-dimensional $V$ and $W$: If $T: V → W$ is invertible, then $dim(V) = dim(W)$. -
-Proof - -As $T$ is invertible, it is bijective. So $nullity(T) = 0$, $rank(T) = dim(R(T)) = dim(W)$. So by the Rank-Nullity Theorem $dim(V) = dim(W)$. ∎ - -
- -Theorem: $V$ is isomorphic to $W$ iff $dim(V) = dim(W)$. Subsequently: -- $V$ is isomorphic to $F^n$ iff $dim(V) = n$. -
-Proof - -Suppose that $V$ is isomorphic to $W$ and that $T : V → W$ is an isomorphism. As $T$ is an isomorphism, it is invertible, and so by our earlier lemma $dim(V) = dim(W)$. - -Now suppose that $dim(V) = dim(W)$. Let $β = \\{v_1, v_2, ..., v_n\\}$ and $γ = \\{w_1, w_2, ..., w_n\\}$ be bases for $V$ and $W$, respectively. Then as $V$ and $W$ are of equal dimension there must exist $T : V → W$ such that $T$ is linear and $T(v_i) = w_i$ for all $i$. Thus $R(T) = span(T(β)) = span(γ) = W$, and $T$ is surjective. As $V$ and $W$ are of equal dimension, $T$ is also injective. So $T$ is bijective, and so $T$ is an isomorphism. ∎ - -
- -## Linear Transformations as Matrices - -- Let $V, W$ be finite-dimensional vector spaces (over a field $F$). -- Let $T, U : V → W$ be linear transformations from $V$ to $W$. -- Let $β$ and $γ$ be ordered bases of $V$ and $W$, respectively. -- Let $a ∈ F$ be a scalar. - -An **ordered basis** of a finite-dimensional vector space $V$ is, well, an ordered basis of $V$. We represent this with exactly the same notation as a standard unordered basis, but will call attention to it whenever necessary. -- For the vector space $F^n$ we call $\\{ e_1, e_2, ..., e_n \\}$ the **standard ordered basis** for $F^n$. -- For the vector space $P_n(F)$ we call $\\{ 1, x, ..., x^n \\}$ the **standard ordered basis** for $P_n(F)$. - -Let $a_1, a_2, ... a_n$ be the unique scalars such that $x = Σ_{i=1}^n a_i u_i$ for all $x ∈ V$. The **coordinate vector** of $x$ relative to $β$ is $\begin{pmatrix} a_1 \\ a_2 \\ ... \\ a_n \end{pmatrix}$ (vert) and denoted $[x]_β$. - -The $m × n$ matrix $A$ defined by $A_{ij} = a_{ij}$ is called the **matrix representation of $T$ in the ordered bases $β$ and $γ$**, and denoted as $A = [T]_β^γ$. If $V = W$ and $β = γ$, we write $A = [T]_β$. - -Theorem: $[T + U]_β^γ = [T]_β^γ + [U]_β^γ$ and $[aT]_β^γ = a[T]_β^γ$. -
-Proof -... -
- ---- - -Let $A$ be a $n × n$ matrix. Then $A$ is **invertible** iff there exists an $n × n$ matrix $B$ such that $AB = BA = I$. - -Theorem: If $A$ is invertible, the matrix $B$ is unique, and denoted $A^{-1}$. -
-Proof -Suppose there existed another inverse matrix $C$. Then $C = CI = C(AB) = (CA)B = IB = B$. -
- -Theorem: For finite-dimensional $V$ and $W$: $T$ is invertible iff $[T]_β^γ$ is invertible, and $[T^{-1}]_γ^β = ([T]_β^γ)^{-1}$. Subsequently: -- Let $U : V → V$ be linear. $U$ is invertible iff $[U]_β$ is invertible, and $[U^{-1}]_β = ([T]_β)^{-1}$. -- Let $A$ be an $n × n$ matrix. $A$ is invertible iff $[L_A]$ is invertible, and $(L_A)^{-1} = L_{A-1}$. -
-Proof -... -
- -Theorem: Let $V$ and $W$ be of finite dimensions $n$ and $m$ with ordered finite bases $β$ and $γ$, respectively. The function $Φ : \mathcal{L}(V,W) → M_{m×n}(F)$ where $Φ(T) = [T]_β^γ$ for all $T ∈ \mathcal{L}(V,W)$ is an isomorphism. -That is, the set (really *vector space*) of all linear transformations between two vector spaces $V$ and $W$ itself is isomorphic to the vector space of all $m × n$ matrices. Subsequently: -- For $V$ and $W$ of finite dimensions $n$ and $m$, $\mathcal{L}(V,W)$ is of finite dimension $mn$. -
-Proof -... -
- -The **standard representation** of an $n$-dimensional $V$ with respect to its ordered basis $β$ is the function $ϕ_β : V → F^n$ where $ϕ_β(x) = [x]_β$ for all $x ∈ V$. - -Theorem: For any $V$ with ordered basis $β$, $ϕ_β$ is an isomorphism. -
-Proof -... -
- -## The Change of Coordinate Matrix - -- Let $V$ be a finite-dimensional vector space (over a field $F$). -- Let $B$ and $β'$ be two ordered bases for $V$. -- Let $T$ be a linear operator on $V$. - -Theorem: Let $Q = [I_V]_{β'}^β$. Then $Q$ is invertible, and for any $v ∈ V$, $[v]_β = Q[v]_{β'}$. -
-Proof -... -
- -We call such a matrix $Q = [I_V]_{β'}^β$ a **change of coordinate matrix** and say that $Q$ **changes β'-coordinates into β-coordinates**. - -Let $Q = [I_V]_{β'}^β$. - -Theorem: $Q^{-1}$ changes β-coordinates into β'-coordinates. -
-Proof -... -
- -Theorem: $[T]_{β'} = Q^{-1} [T]_β Q$ -
-Proofs -... -
- -## Dual Spaces - -The **dual space** of a vector space $V$ is the vector space $\mathcal{L}(V,F)$ and is denoted by $V^*$. - -- Let $V, W$ be finite-dimensional vector spaces (over a field $F$). -- Let $T, U : V → W$ be linear transformations from $V$ to $W$. -- Let $β$ and $γ$ be ordered bases of $V$ and $W$, respectively. - -Theorem: For a finite-dimensional $V$: $dim(V^*) = dim(\mathcal{L}(V,F)) = dim(V) ∙ dim(F) = dim(V)$. - -Corollary: $V$ and $V^*$ are isomorphic. -
-Proof -$dim(V) = dim(V^*)$ and so they are isomorphic. -
- -The **dual basis** of a basis $β$ (of a vector space $V$) is the ordered basis $β^* = \\{ f_1, f_2, ..., f_n \\}$ of $V^*$ that satisfies the $i$th coordinate function $f_i(x_j) = δ_{ij} (1 ≤ i, j ≤ n)$. Recall that $δ_{ij} = 1$ if $i=j$, and $0$ otherwise. - -Theorem: Let $β = \\{ x_1, x_2, ..., x_n \\}$. Let $f_i(1 ≤ i ≤ n)$ be the $i$th coordinate function wrt. $β$, and let $β^* = \\{ f_1, f_2, ..., f_n \\}$. Then $ - -... - -Theorem: For any linear transformation $T : V → W$, the mapping $T^t : W^\* → V^\*$ defined by $T^t(g) = gT$ for all $g ∈ W^\*$ is a linear transformation, and $[T^t]_{γ^\*}^{β^\*} = ([T]_β^γ)^t$ -
-Proof -... -
- - - -## Systems of Linear Equations - -This section is mostly review. - - - ---- - -## Determinants, summarized - -Determinants are important for future sections. We state facts here without proof. - -Let $A$ be a matrix containing entries from a field $F$. - -The **determinant** of an $n×n$ (square) matrix $A$ is a scalar in $F$, and denoted $\|A\|$. The determinant is calculated as follows: -- For a $1 × 1$ matrix $A = [a]$, $\|A\| = a$ -- For a $2 × 2$ matrix $A = \begin{bmatrix} a & b \\\\ c & d \end{bmatrix}$, $\|A\| = ac - bd$ -- For an $n × n$ matrix $A = \begin{bmatrix} a_{1,1} & a_{1,2} & ... & a_{1,n} \\\\ a_{2,1} & a_{2,2} & ... & a_{2,n} \\\\ ⋮ & ⋮ & ⋱ & ⋮ \\\\ a_{n,1} & a_{n,2} & ... & a_{n,n} \end{bmatrix}$, $\|A\| = Σ^n_{i=1} (-1)^{i+j} A_{i,j} \|A^\~_{i,j}\|$. - -The last one deserves some additional exposition: todo - -The determinant has a number of nice properties that make it of fair interest. -1. $\|B\| = -\|A\|$ if $B$ is a matrix obtained by exchanging any two rows or columns of $A$ -2. $\|B\| = k\|A\|$ if $B$ is a matrix obtained by multiplying $A$ by some scalar $k$ -3. $\|B\| = \|A\|$ if $B$ is a matrix obtained by adding a multiple of a column or row to a *different* column or row -4. $\|A\| = 1 $ if $A$ is the identity matrix -5. $\|A\| = 0 $ if either the rows or columns of $A$ are not linearly independent -6. $\|AB\| = \|A\|\|B\|$ if $A$ and $B$ are both $n×n$ matrices -7. $\|A\| ≠ 0 $ iff $A$ is invertible -8. $\|A\| = \|A^t\|$ -9. $\|A\| = \|B\|$ if $A$ and $B$ are *similar* - -Thus, we can say that the determinant *characterizes* square matrices (and thus linear operations), somewhat. It is a scalar value with a deep relation to the core identity of the matrix, and changes regularly as the matrix changes. - -## Eigenvalues and Eigenvectors - -- Let $V$ be a finite-dimensional vector space over a field $F$. -- Let $T: V → V$ be a linear operator on $V$. -- Let $β$ be an ordered basis on $V$. -- Let $A$ be in $M_{n×n}(F)$ (a square $n×n$ matrix with entries in $F$). - -$T$ is **diagonalizable** if there exists an ordered basis $β$ for $V$ such that $[T]_β$ is a *diagonal matrix*. -$A$ is **diagonalizable** if $L_A$ is diagonalizable. - -A nonzero vector $v ∈ V$ is an **eigenvector** if $∃λ ∈ F$: $T(v) = λv$. -The corresponding scalar $λ$ is the **eigenvalue** corresponding to the eigenvector $v$. -A nonzero vector $v ∈ F^n$ is an **eigenvector** of $A$ if $v$ is an eigenvector of $L_A$ (that is, $∃λ ∈ F$: $Av = λv$) -The corresponding scalar $λ$ is the **eigenvalue** of $A$ corresponding to the eigenvector $v$. - -The terms *characteristic vector* and *proper vector* are also used in place of *eigenvector*. -The terms *characteristic value* and *proper value* are also used in place of *eigenvalue*. - -Theorem: $T: V → V$ is diagonalizable if and only if $V$ has an ordered basis $β$ consisting of eigenvectors of $T$. -
-Proof -... -
- -Corollary: If $T$ is diagonalizable, and $β = \\{v_1, v_2, ..., v_n\\}$ is an ordered basis of eigenvectors of $T$, and $D = \[T\]_β$, then $D$ is a diagonal matrix with $D_{i,i}$ being the eigenvalue corresponding to $v_n$ for any $i ≤ n$. -
-Proof -... -
- -To *diagonalize* a matrix (or a linear operator) is to find a basis of eigenvectors and the corresponding eigenvalues. - -Theorem: A scalar $λ$ is an eigenvalue of $A$ if and only if $\|A - λI_n\| = 0$, that is, the eigenvalues of a matrix are exactly the zeros of its characteristic polynomial. -
-Proof -A scalar $λ$ is an eigenvalue of $A$ iff $∃v ≠ 0 ∈ F^n$: $Av = λv$, that is, $(A - λI_n)(v) = 0$. -... todo -
- -The **characteristic polynomial** of $A$ is the polynomial $f(t) = \|A - tI_n\|$. -The **characteristic polynomial** of $T$ is the characteristic polynomial of $[T]_β$, often denoted $f(t) = \|T - tI\|$. - -Theorem: The characteristic polynomial of $A$ is a polynomial of degree $n$ with leading coefficient $(-1)^n$. -
-Proof -... -
- -Corollary: $A$ has at most $n$ distinct eigenvalues. -
-Proof -... -
- -Theorem: A vector $v ∈ V$ is an eigenvector of $T$ corresponding to an eigenvalue $λ$ iff $v ∈ N(T - λI)$ and $v ≠ 0$. -
-Proof -... -
- - - -## Diagonalizability - -- Let $V$ be a finite-dimensional vector space over a field $F$. -- Let $T: V → V$ be a linear operator on $V$. -- Let $A$ be in $M_{n×n}(F)$ (a square $n×n$ matrix with entries in $F$). -- Let $λ$ be an eigenvalue of $T$. -- Let $λ_1, λ_2, ..., λ_k$ be distinct eigenvalues of $T$. - -Theorem: Let $λ_1, λ_2, ..., λ_k$ be distinct eigenvalues of $T$. If $v_1, v_2, ..., v_k$ are eigenvectors of $T$ such that $λ_i$ corresponds to $v_i$ (for all $i ≤ k$), then $\\{v_1, v_2, ..., v_k\\}$ is linearly independent. In fewer words, eigenvectors with distinct eigenvalues are all linearly independent from one another. -
-Proof -... -
- - -Corollary: If $T$ has $n$ distinct eigenvalues, where $n = dim(V)$, then $T$ is diagonalizable. -
-Proof -... -
- -A polynomial $f(t) ∈ P(F)$ **splits over** $F$ if there are scalars $c, a_1, a_2, ..., a_n ∈ F$: $f(t) = c(a_1 - t)(a_2 - t)...(a_n - t)$. - -Theorem: The characteristic polynomial of any diagonalizable linear operator splits. -
-Proof -... -
- -Let $λ$ be an eigenvalue of a linear operator or matrix with characteristic polynomial $f(t)$. The **(algebraic) multiplicity** of $λ$ is the largest positive integer $k$ for which $(t-λ)^k$ is a factor of $f(t)$. - -Let $λ$ be an eigenvalue of $T$. The set $E_λ = \\{x ∈ V: T(x) = λx\\} = N(T - λI_V)$ is called the **eigenspace** of $T$ with respect to the eigenvalue $λ$. Similarly, the **eigenspace** of $A$ is the eigenspace of $L_A$. - -Theorem: If $T$ has multiplicity $m$, $1 ≤ dim(E_λ) ≤ m$. -
-Proof -... -
- -Lemma: Let $S_i$ be a finite linearly independent subset of the eigenspace $E_{λ_i}$ (for all $i ≤ k$). Then $S = S_1 ∪ S_2 ∪ ... ∪ S_k$ is a linearly independent subset of $V$. -
-Proof -... -
- -Theorem: If the characteristic polynomial of $T$ splits, then $T$ is diagonalizable iff the multiplicity of $λ_i$ is equal to $dim(E_{λ_i})$ (for all $i ≤ k$). -
-Proof -... -
- -Corollary: If the characteristic polynomial of $T$ splits, $T$ is diagonalizable, $β_i$ is an ordered basis for $E_{λ_i}$ (for all $i ≤ k$), then $β = β_1 ∪ β_2 ∪ ... ∪ β_k$ is an ordered basis for $V$ consisting of eigenvectors of $T$. -
-Proof -... -
- -$T$ is diagonalizable iff both of the following conditions hold: -1. The characteristic polynomial of $T$ splits. -2. The multiplicity of $λ$ equals $n - rank(T-λI)$, where $n = dim(V)$ and for each eigenvalue $λ$ of $T$. - -## Direct Sums - -- Let $V$ be a finite-dimensional vector space over a field $F$. -- Let $T: V → V$ be a linear operator on $V$. -- Let $W_1, W_2, ..., W_k$ be subspaces of $V$. - -The **sum** of some subspaces $W_i$ (for $1 ≤ i ≤ k$) is the set $\\{v_1 + v_2 + ... + v_k : v_i ∈ W_i \\}$, denoted $W_1 + W_2 + ... + W_k$ or $Σ^k_{i=1} W_i$. - -The subspaces $W_i$ (for $1 ≤ i ≤ k$) form a **direct sum** of $V$, denoted $W_1 ⊕ W_2 ⊕ ... ⊕ W_k$, if $V = Σ^k_{i=1} W_i$ and $W_j ∩ Σ_{i≠j} W_i = \\{0\\}$ for all $j ≤ k$. - -Theorem: The following conditions are equivalent: -1. $V = W_1 ⊕ W_2 ⊕ ... ⊕ W_k$. -2. $V = Σ^k_{i=1} W_i$ and ??? todo -3. Every vector $v ∈ V$ can be uniquely written as $v = v_1 + v_2 + ... + v_k$ where $v_i ∈ W_i$. -4. If $γ_i$ is an ordered basis for $W_i$ (for $1 ≤ i ≤ k$), then $γ_1 ∪ γ_2 ∪ ... ∪ γ_k$ is an ordered basis for $V$. -5. There exists an ordered basis $γ_i$ for $W_i$ for every $1 ≤ i ≤ k$ such that $γ_i ∪ γ_2 ∪ ... γ_k$ is an ordered basis for $V$. -
-Proof -... -
- -Theorem: $T: V → V$ is diagonalizable if and only if $V$ is the direct sum of the eigenspaces of $T$. -
-Proof -... -
diff --git a/mathematics/logic.md b/mathematics/logic.md deleted file mode 100644 index b07e867..0000000 --- a/mathematics/logic.md +++ /dev/null @@ -1,154 +0,0 @@ ---- -layout: foundations -title: mathematics/logic ---- - -# logic, mathematics, & that around + in between - -## kinds of logic - -there are different types of logic! - -some such systems: -- classical logic -- intuitionistic logic -- paraconsistent logic - -and many others. - -## orders of logic - -### propositional logic - -[**propositional logic**](https://ncatlab.org/nlab/show/propositional+logic) or **zeroth-order logic** deals with raw *propositions*. -**propositions** are statements that *reduce* to a **truth value**. -truth values are classically either true or false. in non-classical logics, this can differ. - -the basic foundations of propositional logic are as follows: - -notation | definition ----------|-------------- -0 | *false* -1 | *true* -p | a *proposition* -¬p | *not* p -p → q | *if* p *then* q, p *implies* q - -several logical connectives are *derivable* from the above: - -notation | derivation | definition -------|---------------------|---- -p ∨ q | ¬p → q | p *or* q, *disjunction* -p ∧ q | ¬(p → ¬q) | p *and* q, *conjunction* -p → q | ¬p ∨ q | p *implies* q, (material) *implication* -p ↔ q | (p → q) ∧ (q → p) | p *if and only if* q, p *iff* q -p ⊕ q | (p ∨ q) ∧ ¬(p ∧ q) | p *exclusively or* q, p *xor* q -p ↑ q | ¬(p ∧ q) | p *not both* q, p *nand* q -p ↓ q | ¬(p ∨ q) | *neither* p *nor* q, p *nor* q - -note that several of these definitions are circular. -our choice in ¬ and → as the primitive connectives is thus arbitrary. -interestingly, ↑ and ↓ are *functionally complete*: we may define all other connectives in terms of them. - -
-aside: nand and nor - -notation | definition ----------|----------- -¬p | p ↑ p -p → q | p ↑ ¬q -p ∨ q | ¬p ↑ ¬q -p ∧ q | (p ↑ q) ↑ (p ↑ q) -p ↔ q | (p ↑ q) ↑ (p ∨ q) - -notation | definition ----------|----------- -¬p | p ↓ p -p → q | (¬p ↓ q) ↓ (¬p ↓ q) -p ∨ q | (p ↓ q) ↓ (p ↓ q) -p ∧ q | ¬p ↓ ¬q -p ↔ q | ... - -
- -### [predicate logic](https://ncatlab.org/nlab/show/predicate+logic) - -**predicate logic** or **first-order logic** adds variables and quantifiers to propositions: -* ∀x: for all x -* ∃y: there exists a y - -these additions are enough to serve as a foundation of mathematics... - -## philosophy of mathematics - -what mathematics *is* is not universally agreed upon. - -there are two primary - - -logicism is generally widely regarded as correct - -these philosophies - -there are others, but none that i am interested in. - -## foundations of mathematics - -### set theory - -there have been a number of set theories, of varying consistency - - -naive set theory was shown inconsistent - - -### type theory - -*type theories* can serve as an alternative (equivalent) foundation of mathematics. - - -but what is the point in fashioning equivalent foundations when standard ZFC works just fine? - -an alternative (equivalent) foundation of mathematics are *type theories*. - - -## the curry-howard correspondence - -the **curry-howard correspondence** or *propositions-as-types* - - -symbol | logic | types ----|-------------|-------------- -⊤ | true | top type -⊥ | false | bottom type -→ | implication | function type -∧ | conjunction | product type -∨ | disjunction | sum type -∀ | universal | dependent product type -∃ | existential | dependent sum type -¬ | negation | many such representations - -a couple of other notes. -programs as proofs comes into play when -you MUST have a constructed thing with a type - -no negation?? -don't fully understand the distinction between dependent and nondependent types -implication over types? - - - -## [principle of explosion](https://en.wikipedia.org/wiki/Principle_of_explosion) - -what's wrong with inconsistency, anyway? - -it may or may not be intuitive that a contradiction can lead to further contradictions. -either way, an example can be helpful. -the following logic proves any arbitrary statement B: - -```logic -A = ⊤, ¬A = ⊤ -A ∨ B = ⊤ -¬A ⇒ A = ⊥ -⊥ ∨ B ⇒ B = ⊤ -``` diff --git a/mathematics/number-theory.md b/mathematics/number-theory.md deleted file mode 100644 index 47e63e7..0000000 --- a/mathematics/number-theory.md +++ /dev/null @@ -1,441 +0,0 @@ ---- -layout: algebra -title: mathematics/number theory ---- - -# MATH 312: Number Theory - -## Chapter 1: The Integers - -## Chapter 2: Greatest Common Divisors and Prime Factorization - -## Greatest Common Divisors - -**Definition**: The **greatest common divisor** of two integers $a$ and $b$ not both zero is the largest integer which divides both $a$ and $b$. It is written as $(a,b)$. - -**Definition**: The integers $a$ and $b$ are called **relatively prime** if $a$ and $b$ have greatest common divisor $(a,b) = 1$. - -**Proposition**: Let $a,b,c$ be integers with $(a,b) = d$. Then -- $(a/d, b/d) = 1$ -- $(a+cb, b) = (a,b)$. - -**Definition**: If $a,b$ are integers, then a **linear combination** of $a$ and $b$ is a sum of the form $ma + nb$, where $m,n$ are integers. - -**Theorem**: The greatest common divisor of the integers $a,b$ not both zero is the least positive integer that is a linear combination of $a$ and $b$. - -**Definition**: Let $a_1, a_2, ... a_n$ be integers not all zero. The **greatest common divisor** of those integers is the largest integer which is a divisor of all of the integers in the set. It is written as $(a_1, a_2, ..., a_n)$. - -**Lemma**: If $a_1, a_2, ... a_n$ are integers not all zero, then $(a_1, a_2, ..., a_{n-1}, a_n)$ = $(a_1, a_2, ..., (a_{n-1}, a_n))$. - -**Definition**: We say that integers $a_1, a_2, ..., a_n$ are **mutually relatively prime** if their greatest common divisor is 1. These integers are **pairwise relatively prime** if for each pair of integers their greatest common divisor is 1. - -## The Euclidean Algorithm - -**The Euclidean Algorithm**: Let $r_0 = a$ and $r_1 = b$ be nonnegative integers with $b ≠ 0$. If the division algorithm is successively applied to obtain $r_j = r_{j+1} q_{j+1} + r_{j+2}$ with $0 < r_{j+2} < r_{j+1}$ for $j = 0,1,2,...,n-1$ and $r_n = 0$, then $(a,b) = r_{n-1}$, the last nonzero remainder. - -**Lemma**: If $c$ and $d$ are integers and $c = dq + r$, then $(c,d) = (d,r)$. - -**Definition**: The **Fibonacci numbers**... todo - -**Theorem**: Let $n$ be a positive integer and let $α = (1 + \sqrt{5})/2$. Then $u_n > α^{n-2}$ for $n ≥ 3$. - -**Theorem**: todo - -**Lame's Theorem**: The number of divisions needed to find the greatest common divisor of two positive integers using the Euclidean algorithm does not exceed five times the number of digits in the smaller of the two integers. - -**Corollary**: The number of bit operations needed to find the greatest common divisor of two positive integers $a$ and $b$ with $b > a$ is $O((log_2 a)^3)$. - -**Theorem**: Let $a$ and $b$ be positive integers. Then $(a,b) = s_n a + t_n b$ for $n = 0,1,2,...$ where $s_n$ and $t_n$ are the $n$th terms of the sequences defined by $s_0 = 1, s_1 = 0, t_0 = 0, t_1 = 1$ and $s_j = s_{j-2} - q_{j-1} q_{j-1}, t_j = t_{j-2} - q_{j-2} t_{j-t}$ for $j = 2,3,...,n$, where $q_j$ are the quotients in the divisions of the Euclidean algorithm when it is used to find $(a,b)$. - -## The Fundamental Theorem of Arithmetic - -**The Fundamental Theorem of Arithmetic**: Every positive integer can be written uniquely as a product of primes, in order of nondecreasing size. - -**Lemma**: If $a,b,c$ are positive integers such that $(a,b) = 1$ and $a | bc$, then $a | c$. - -**Corollary**: If $p$ divides $a_1 a_2 ... a_n$ where $p$ is a prime and $a_1, a_2, ..., a_n$ are positive integers, then there is an integer $i$ with $1 ≤ i ≤ n$ such that $p$ divides $a_i$. - -**Definition**: The **least common multiple** of two positive integers $a$ and $b$ is the smallest positive integer that is divisible by both $a$ and $b$. It is written as $[a,b]$. - -**Lemma**: If $x,y$ are real numbers, then $max(x,y) + min(x,y) = x+y$. - -**Theorem**: If $a,b$ are positive integers, then $[a,b] = ab/(a,b)$. - -**Lemma**: Let $m,n$ be relatively prime positive integers. Then if $d$ is a positive divisor of $mn$, there is a pair of unique positive divisors $d_1$ of $m$ and $d_2$ of $n$ such that $d = d_1 d_2$. Conversely, if $d_1$ and $d_2$ are positive divisors of $m$ and $n$ respectively, then $d = d_1 d_2$ is a positive divisor of $mn$. - -**Dirichlet's Theorem on Primes in Arithmetic Progressions**: Let $a,b$ be relatively prime positive integers. Then the arithmetic progression $an + b$, $n = 1,2,3,...$ contains infinitely many primes. - -**Proposition**: There are infinitely many primes of the form $4n + 3$, where $n$ is a positive integer. - -**Lemma**: If $a$ and $b$ are integers both of the form $4n + 1$, then the product $ab$ is also of this form. - -## Factorization of Integers and the Fermat Numbers - -**Lemma**: If $n$ is an odd positive integer, then there is a one-to-one correspondence between factorizations of $n$ into two positive integers and differences of two squares that equal $n$. - -**Proof**: The Fermat number $F_5 = 2^{2^5} + 1$ is divisible by 641. -$641 = 5⋅2^7 + 1 = 2^4 + 5^4$. So $2^{2^5} + 1 = 2^32 + 1 = 2^4 ⋅ 2^28 + 1$ = $(641 - 5^4)2^28 + 1$. Subsequently, we can rearrange this to pull out 641. - -**Proposition**: Every prime divisor of the Fermat number $F_n = 2^{2^n} + 1$ is of the form $2^{n+2}k + 1$. - -**Lemma**: Let $F_k = 2^{2^k} + 1$ denote the $k$th Fermat number, where $k$ is a nonnegative integer. Then for all positive integers $n$, we have $F_0 F_1 F_2 ... F_{n-1} = F_n - 2$. - -**Theorem**: Let $m,n$ be distinct nonnegative integers. Then the Fermat numbers $F_m$ and $F_n$ are relatively prime. - -**Theorem**: A regular polygon of $n$ sides can be constructed using a ruler and compass if and only if $n$ is of the form $n = 2^a p_1 ... p_i$ where $p_i, i = 1, 2, ..., t$ are distinct Fermat primes and $a$ is a nonnegative integer. - -## Linear Diophantine Equations - -**Theorem**: Let $a$ and $b$ be positive integers with $d = (a,b)$. The equation $ax + by = c$ has no integral solutions if $d ∤ c$. If $d | c$, then there are infinitely many integral solutions. If $x = x_0$, $y = y_0$ is a particular solution, then all solutions are given by $x = x_0 + (b/d) n$, $y = y_0 - (a/d) n$ for some integer $n$. - -## Chapter 3: Congruences - -## Introduction to Congruences - -**Definition**: If $a$ and $b$ are integers, we say that $a$ is **congruent to b modulo m** if $m ∣ (a-b)$. - -**Proposition**: If $a$ and $b$ are integers, then $a ≡ b$ (mod $m$) if and only if there is an integer $k$ such that $a = b + km$. - -**Proposition**: Let $m$ be a positive integer. Congruences modulo $m$ satisfy the following properties: -- Reflexivity: $a ≡ a$ (mod $m$) -- Symmetry: $a ≡ b$ (mod $m$) ⟹ $b ≡ a$ (mod $m$) -- Transitivity: $a ≡ b$ and $b ≡ c$ (mod $m$) ⟹ $a ≡ c$ (mod $m$) - -**Definition**: A **complete system of residues modulo m** is a set of integers such that every integer is congruent modulo $m$ to exactly one integer of the set. -- ex. $\{0, 1, 2, ..., m-1\}$ is a complete set of residues modulo $m$. -- ex. $\{0, -1, -2, ... -(m-1)\}$ is a complete set of residues modulo $m$. - -**Theorem**: If $a, b, c, m$ are integers with $m > 0$ and $a ≡ b$ (mod $m$), then: -- $a + c ≡ b + c$ (mod $m$) -- $a - c ≡ b - c$ (mod $m$) -- $ac ≡ bc$ (mod $m$) - -**Theorem**: If $a,b,c,m$ are integers with $m > 0$, $d = (c,m)$, and $ac ≡ bc$ (mod $m$), then $a ≡ b$ (mod $m/d$). - -**Theorem**: If $a,b,c,d,m$ are integers with $m > 0$, $a ≡ b$ (mod $m$), and $c ≡ d$ (mod $m$), then -- $a + c ≡ b + d$ (mod $m$) -- $a - c ≡ b - d$ (mod $m$) -- $ac ≡ bd$ (mod $m$) - -**Theorem**: If $r_1 r_2 ... r_m$ is a complete system of residues modulo $m$ and $a$ is a positive integer satisfying $(a,m) = 1$, then $ar_1 + b, a_r_2 + b, ..., ar_m + b$ is a complete system of residues modulo $m$. - -**Theorem**: If $a,b,k,m$ are integers such that $k > 0$, $m > 0$, $a ≡ b$ (mod $m$), then $a^k ≡ b^k$ (mod $m$). -- Proof: $a ≡ b$ (mod $m$), so $m ∣ (a - b)$. Since we may pull $(a-b)$ out of $a^k - b^k$, $m ∣ (a^k - b^k)$. So $a^k ≡ b^k$ (mod $m$). - -**Theorem**: If $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$) for integers and positive $m$, then $a ≡ b$ (mod $[m_1,m_2,...,m_k]$) where $[m_1,m_2,...,m_k]$ is the least common multiple of $m_1,m_2,...,m_k$. -- Proof: $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$). So $m_1 ∣ (a-b)$, $m_2 ∣ (a-b)$, ..., $m_k ∣ (a-b)$. Therefore, $[m_1,m_2,...,m_k] ∣ (a-b)$, and so $a ≡ b$ (mod $[m_1,m_2,...,m_k]$). - -**Corollary**: If $a ≡ b$ (mod $m_1$), $a ≡ b$ (mod $m_2$), ..., $a ≡ b$ (mod $m_k$) for integers and positive relatively prime $m$, then $a ≡ b$ (mod $m_1 m_2... m_k$). - -**Proposition**: Let $b,m,N$ be positive integers with $b < m$. Then the least positive residue of $b^N$ modulo $m$ can be computed using $O((log_2 m)^2 log_2 N)$ bit operations. - -## Linear Congruences - -A congruence of the form $ax ≡ b$ (mod $m$) for an unknown integer $x$ is called a **linear congruence in one variable**. - -**Theorem**: Let $a,b,m$ be integers with $m > 0$ and $(a,m) = d$. If $d ∤ b$, then $ax ≡ b$ (mod $m$) has no solutions. If $d | b$, then $ax ≡ b$ (mod $m$) has exactly $d$ incongruent solutions modulo $m$. - -**Proposition**: Let $p$ be prime. The positive integer $a$ is its own inverse modulo $p$ if and only if $a ≡ 1$ (mod $p$) or $a ≡ -1$ (mod $p$). - -## The Chinese Remainder Theorem - -**The Chinese Remainder Theorem**: Let $m_1, m_2, ..., m_r$ be *pairwise relatively prime* positive integers. Then the system of congruence -- $x ≡ a_1$ (mod $m_1$) -- $x ≡ a_2$ (mod $m_2$) -- $...$ -- $x ≡ a_r$ (mod $m_r$) - -has a unique solution modulo $M = m_1 m_2 ... m_r$. - -**Lemma**: If $a$ and $b$ are positive integers, then the least positive residue of $2^a - 1$ (mod $2^b - 1$) is $2^r - 1$, where $r$ is the least positive residue of $a$ (mod $b$). - -**Lemma**: If $a$ and $b$ are positive integers, then the greatest common divisor of $2^a - 1$ and $2^b - 1$ is $2^(a,b) - 1$. - -**Proposition**: The positive integers $2^a - 1$ and $2^b - 1$ are relatively prime if and only if $a$ and $b$ are relatively prime. - -## Systems of Linear Congruences - -**Theorem**: Let $a,b,c,d,e,f,m$ be integers with $m > 0$ such that $(Δ, m) = 1$, where $Δ = ad - bc$. Then, the system of congruences -- $ax + by ≡ e$ (mod $m$) -- $cx + dy ≡ f$ (mod $m$) - -has a unique solution modulo $m$ given by -- $x ≡ \hat{Δ} (de - bf)$ (mod $m$) -- $y ≡ \hat{Δ} (af - ce)$ (mod $m$) - -where $\hat{Δ}$ is an inverse of $Δ$ modulo $m$. - -**Definition**: Let $A$ and $B$ be $n×k$ matrices with integer entries, with $(i,j)$th entries $a_ij$ and $b_ij$ respectively. We say that **A is congruent to B modulo m** if $a_ij ≡ b_ij$ (mod $m$) for all pairs $(i,j)$ with $1 ≤ i ≤ n$ and $1 ≤ j ≤ k$. We write $A ≡ B$ (mod $m$) if $A$ is congruent to $B$ (mod $m$). - -**Proposition**: If $A$ and $B$ are $n×k$ matrices with $A ≡ B$ (mod $m$), $C$ is a $k×p$ matrix, and $D$ is a $p×n$ matrix, all with integer entries, then $AC ≡ BC$ (mod $m$) and $DA ≡ DB$ (mod $m$). - -**Definition**: Definition of $\hat{A}$ as an inverse. todo - -**Proposition**: Let $A = \begin{bmatrix} a & b \\ c & d\end{bmatrix}$ be a matrix of integers such that $Δ = det A = ad - bc$ is relatively prime to the positive integer $m$. Then, the matrix $\hat{A} = \hat{Δ} \begin{bmatrix} d & -b \\ -c & a\end{bmatrix}$ is an inverse of $A$ modulo $m$. - -**Definition**: The **adjoint** of an $n×n$ matrix $A$ is the $n×n$ matrix with $(i,j)$th entry $C_ji$, where $C_ji$ is $(-1)^{i+j}$ times the determinant of the matrix obtained by deleting the $i$th row and $j$th column from $A$. The adjoint of $A$ is denoted by $adj(A)$. - -**Theorem**: If $A$ is an $n×n$ matrix with $det(A) ≠ 0$, then $A(adj(A)) = (det A) I$. - -**Proposition**: If $A$ is an $n×n$ matrix with integer entries and $m$ is a positive integer such that $(det A, m) = 1$, then the matrix $\hat{A} = \hat{Δ}(adj A)$ is an inverse of $A$ modulo $m$. - -# Chapter 4: Applications of Congruences - -## Divisibility Tests - -**Divisibility Test**: If $d|b$ and $j,k$ are positive integers with $j < k$, then $n = (a_k ... a_1 a_0)_b$ is divisible by $d^j$ iff $(a_{j-1} ... a_i a_0)_b$ is divisible by $d^j$. -- ex. todo -- Proof: Since $b ≡ 0$ (mod $d$), $b^j ≡ 0$ (mod d^j). So $d | (a_k a_{k-1} ... a_0)$ if and only if $d | (a_{j-1} ... a_0)$. todo: fix - -**Divisibility Test**: If $d | (b-1)$, then $n = (a_k ... a_1 a_0)$ is divisible by $d$ if and only if $a_k + ... a_1 + a_0$ is divisible by $d$. -- ex. todo - -**Divisibility Test**: If $d | (b+1)$, then $n = (a_k ... a_1 a_0)$ is divisible by $d$ if and only if $(-1)^k a_k + ... -a_1 + a_0$ is divisible by $d$. -- ex. Let $n = (1001001111)_2$. Then $3 ∣ n$ as $n ≡ 1 - 1 + 1 - 1 + 0 - 0 + 1 - 0 + 0 - 1 ≡ 0$ (mod 3) and $3 ∣ 2 + 1$. - -# Chapter 5: Some Special Congruences - -## Wilson's Theorem and Fermat's Little Theorem - -**Wilson's Theorem**: If $p$ is prime, then $(p-1)! ≡ -1$ (mod $p$). -- Proof: inverses cancel. The only numbers that are their own inverses in a prime field are $1$ and $p-1$. So $(p-1)! ≡ p-1 ≡ -1$ (mod $p$). - -**Corollary**: If $n$ is a positive integer satisfying $(n-1)! ≡ -1$ (mod $n$), $n$ is prime. -- Proof: Assume $n$ is composite. Proof by contradiction. todo - -**Fermat's Little Theorem**: If $p$ is prime and $a$ is a positive integer with $p ∤ a$, then $a^{p-1} ≡ 1$ (mod $p$). - -**Corollary**: If $p$ is a prime and $a$ is a positive integer with $p ∤ a$, then $a^p ≡ a$ (mod $p$). -- Proof: trivial. - -**Corollary**: If $p$ is prime and $a$ is an integer with $p ∤ a$, then $a^{p-2}$ is an inverse of $a$ modulo $p$. -- Proof: Fermat's little theorem tells us that $a^{p-1} ≡ 1$ (mod $p$). $a^{p-1} = aa^{p-2}$. - -**Corollary**: If $p$ is prime and $a,b$ are positive integers with $p ∤ a$, then the solutions of $ax ≡ b$ (mod $p$) are the integers $x ≡ a^{p-2}b$ (mod $p$). -- Proof: Take $ax ≡ b$ (mod $p$). Since $p$ does not divide $a$, we know $a^{p-2}$ is an inverse of $a$ (mod $p$). So $x ≡ a^{p-2}b$ (mod $p$). - -## Pseudoprimes - -**Definition**: Let $b$ be a positive integer. If $n$ is a composite positive integer and $b^n ≡ b$ (mod $n$), then $n$ is a **pseudoprime to the base b**. -- ex. 341 is a pseudoprime to the base 2. $2^340 ≡ 1$ (mod 341), and so $2^341 ≡ 2$ (mod 341). - -**Lemma**: If $d$ and $n$ are positive integers with $d ∣ n$, then $2^d - 1 ∣ 2^n - 1$. -- Proof: Since $d ∣ n$, $dt = n$ for some $t$. We have the identity $x^t - 1 = (x-1)(x^{t-1} + x^{t-2} + ... + 1)$, and so $2^n - 1 = 2^dt - 1 = (2^d - 1)(2^{d(t-1)} + x^{d(t-2)} + ... + 1)$. - -**Proof**: There are infinitely many pseudoprimes to the base 2. -We may prove this by showing that if $n$ is an odd pseudoprime to the base 2, $m=2^n - 1$ is also an odd pseudoprime to the base 2. -Let $n$ be an odd pseudoprime. Then $2^{n-1} ≡ 1$ (mod $n$) and $n$ is composite by definition. Let $m = 2^{n-1}$. It is composite, as $n$ is composite (by the above lemma). As $2^n - 2 = kn$, there is an integer $k$ s.t. $2^{m-1} = 2^{2^n - 2} = 2^{kn}$. todo: finish - -**Definition**: A composite integer that satisfies $b^{n-1} ≡ 1$ (mod $n$) for all positive integers $b$ with $(b,n)=1$ is called a **Carmichael number**. -- ex. $561 = (3)(11)(17)$ is a Carmichael number. If $(b, 561) = 1$, then $(b,3)=(b,11)=(b,17)=1$. So $b^2 ≡ 1$ (mod 3), $b^10 ≡ 1$ (mod 11), $b^16 ≡ 1$ (mod 17). So $b^560$... todo: finish - -**Theorem**: If $n = q_1 q_2 ... q_k$, where $q_j$ is a distinct prime satisfying $(q_j - 1) | (n - 1)$ for all $j$, then $n$ is a Carmichael number. - -**Definition**: Let $n$ be a positive integer with $n-1 = 2^s t$, where $s$ is a nonnegative integer and $t$ is an odd positive integer. We say that $n$ **passes Miller's test for the base b** if either $b^1 ≡ 1$ (mod $n$) or $b^{2^j t} ≡ -1$ (mod $n$) for some $j$ with $0 ≤ j ≤ s - 1$. -- ex. todo - -**Theorem**: If $n$ is prime and $b$ is a positive integer with $n ∤ b$, then $n$ passes Miller's test for the base $b$. - -**Definition**: If $n$ is composite and passes Miller's test for the base $b$, then we say $n$ is a **strong pseudoprime to the base b**. Strong pseudoprimes are exceedingly rare. -- ex. Let $n = 2047 = 23*89$. Then $2^2046 = (2^11)^186 = 2048^186 ≡ 1$ (mod 2047), so 2047 is a pseudoprime to the base 2. todo - -**Proof**: There are infinitely many strong pseudoprimes to the base 2. -- We may prove this by showing that if $n$ is a pseudoprime to the base 2, $N = 2^n - 1$ is a *strong* pseudoprime to the base 2. -- todo - -**Theorem**: If $n$ is an odd composite positive integer, then $n$ passes Miller's test for at most $(n-1)/4$ bases $b$ with $1 ≤ b ≤ n-1$. - -**Rabin's Probabilistic Primality Test** -- Let $n$ be a positive integer. Pick $k$ different positive integers less than $n$ and perform Miller's test on $n$ for each of those bases. If $n$ is composite the probability that $n$ passes all $k$ tests is less than (1/4)^k. - -**Conjecture**: For every composite positive integer $n$, there is a base $b$ with $b < 70 (log_2 n)^2$ such that $n$ fails Miller's test for the base $b$. If this conjecture is true, the following proposition provides a rapid primality test: - -**Proposition**: If the generalized Riemann hypothesis is valid, then there exists an algorithm to determine whether a positive integer $n$ is prime using $O((log_2 n)^5)$ bit operations. -- Proof: Let $b$ be a positive integer less than $n$. Note that Miller's test takes $O((log_2 b)^2)$ bit operations. Assuming the generalized Riemann hypothesis is true, if $n$ is composite, there is a base $b$ with $b < 70 (log_2 n)^2$ such that $n$ fails Miller's test for $b$. To discover this $b$, then, requires less than $O((log_n )^5)$ bit operations. - -## Euler's theorem - -**Euler's Totient Function**: Let $n$ be a positive integer. **Euler's totient function** / the **Euler phi-function** $ϕ(n)$ is defined to be the number of positive integers not exceeding $n$ which are relatively prime to $n$. - -**Definition**: A **reduced residue system modulo n** is a set of $ϕ(n)$ integers such that each element of the set is relatively prime to $n$, and no two different elements of the set are congruent modulo $n$. -- ex. $\{1, 3, 5, 7\}$ is a reduced residue system modulo 8. - -**Theorem**: If $r_1, r_2, ..., r_{ϕ(n)}$ is a reduced residue system modulo $n$, and if $a$ is a positive integer with $(a, n) = 1$, then the set $ar_1, ar_2, ..., ar_{ϕ(n)}$ is also a reduced residue system modulo $n$. -- Proof: Consider that $(a,n) = 1$... intuitive. todo. - -**Euler's Theorem**: If $m$ is a positive integer and $a$ is an integer with $(a,m) = 1$, then $a^{ϕ(m)} ≡ 1$ (mod $m$). -- ex. $\{1,3,5,7\}$ (mod 8): $3^{ϕ(8)} = 3^4 ≡ 1$ (mod 8) - -**Corollary**: Euler's Theorem is useful for finding inverses modulo $m$. If $a$ and $m$ are relatively prime, $aa^{ϕ(m)-1} = a^{ϕ(m)} ≡ 1$ (mod $m$). - -# Chapter 6: Multiplicative Functions - -## The Euler ϕ-Function - -**Definition**: An **arithmetic function** is defined for all positive integers. - -**Definition**: An arithmetic function $f$ is **multiplicative** if $f(mn) = f(m)f(n)$ whenever $m$ and $n$ are relatively prime. - -**Theorem**: If $f$ is a multiplicative function and if $n = p_1^{a_1} p_2^{a_2} ... p_s^{a_s}$ is the prime power factorization of the positive integer $n$, then $f(n) = f(p_1^{a_1})f(p_2^{a_2})...f(p_s^{a_s})$. -- Proof: trivial. Prime factors are relatively prime to each other. - -**Theorem**: If $p$ is prime, then $ϕ(p) = p - 1$. Conversely, if $p$ is a positive integer with $ϕ(p) = p - 1$, then $p$ is prime. -- Proof: Primes are not factorable. Every number less than a prime is relatively prime to the prime. - -**Theorem**: Let $p$ be a prime and $a$ a positive integer. Then $ϕ(p^a) = p^a - p^{a-1} = p^{a-1}(p-1)$. -- Proof: There are $p^{a-1}$ numbers that divide some $p^a$. So there are $p^a - p^{a-1}$ numbers that do not divide $p^a$. - -**Theorem**: The Euler ϕ-function is multiplicative. That is, for some relatively prime positive integers $m$ and $n$, $ϕ(mn) = ϕ(m)ϕ(n)$. -- Proof: somewhat intuitive. todo - -**Theorem**: Let $n = p_1^{a_1} p_2^{a_2} ... p_k^{a_k}$ be the prime power factorization of the positive integer $n$. Then $ϕ(n) = n(1-\frac1p_1)(1-\frac1p_2)...(1-\frac1p_k)$. -- Proof: ϕ is multiplicative, so $ϕ(n) = ϕ(p_1^{a_1}) ϕ(p_2^{a_2}) ... ϕ(p_k^{a_k})$. From an above theorem, we know $ϕ(p_j^{a_j}) = p_j^{a_j} - p_j^{a_j - 1}$. $p_j^{a_j} - p_j^{a_j - 1} = p_j^{a_j}(1-\frac1p_j)$, so the product of all $p_j^{a_j} = n$ and so $ϕ(n) = n(1-\frac1p_1)(1-\frac1p_2)...(1-\frac1p_k)$. - -**Theorem**: Let $n$ be a positive integer. Then $\sum_{d ∣ n} ϕ(d) = n$. -- ex. Consider $n=18$. Then... todo. - -## The Sum and Number of Divisors - -**Definition**: The **sum of the divisors function** $σ$ is defined by setting $σ(n)$ equal to the sum of all the positive divisors of $n$. - -**Definition**: The **number of the divisors function** $τ$ is defined by setting $τ(n)$ equal to the number of positive divisors of $n$. - -**Theorem**: If $f$ is a multiplicative function, then the arithmetic function $F(n) = \sum_{d ∣ n} f(d)$ is also multiplicative. -- Proof: Trivial. - -**Lemma**: For a prime $p$ and positive integer $a$, $σ(p^a) = (1+p+p^2+...p^a) = \frac{p^{a+1} - 1}{p-1}$ and $τ(p^a) = a + 1$. - -**Theorem**: Consider a positive integer $n$ with prime factorization $n = p_1^{a_1} p_2^{a_2} ... p_s^{a_s}$. Then $σ(n) = Π_{j=1}^s \frac{p_j^{a_j+1} - 1}{p_j-1}$ and $τ(n) = Π_{j=1}^s (a_j + 1)$. -- Proof: Trivial. - -## Perfect Numbers and Mersenne Primes - -**Definition**: If $n$ is a positive integer and $σ(n) = 2n$, then $n$ is called a **perfect number**. -- ex. $σ(6) = 1 + 2 + 3 + 6 = 12$, so 6 is perfect. -- ex. $σ(28) = 1 + 2 + 4 + 7 + 14 + 28 = 56$, so 28 is perfect. - -**Theorem**: The positive integer $n$ is an even perfect number if and only if $n = 2^{m-1}(2^m - 1)$ where $m$ is a positive integer such that $2^m - 1$ is prime. -- ex. - -**Theorem**: If $m$ is a positive integer and $2^m - 1$ is prime, then $m$ must be prime. -- Proof: If $m$ is not prime, $2^m - 1$ must be composite: $2^ab - 1 = (2^a - 1)(2^{a(b-1)} + 2^{a(b-2)} + ... + 2^a + 1)$. - -**Definition**: Given a positive integer $m$, $M_m = 2^m - 1$ is called the $m$th **Mersenne number**. Given a positive prime integer $p$ and $M_p = 2^p - 1$ is also prime, $M_p$ is called a **Mersenne prime**. -- ex. $M_7 = 2^7 - 1 = 127$ -- ex. $M_11 = 2^11 - 1 = 2047 = 23*89$ - -**Theorem**: If $p$ is an odd prime, then any divisor of the Mersenne number $M_p = 2^p - 1$ is of the form $2kp + 1$, for some positive integer $k$. - -**The Lucas-Lehmer Test**: Let $p$ be a prime and $M_p = 2^p - 1$ be the $p$th Mersenne number. Consider a recursive sequence of integers with $r_1 = 4$ and $r_k ≡ r_{k-1}^2 - 2$ (mod $M_p$) for $r_k < M_p$. Then, $M_p$ is prime if and only if $r_{p-1} ≡ 0$ (mod $M_p$). -- ex. Consider $M_5 = 2^5 = 31$. Then $r_1 = 4$, $r_2 ≡ 4^2 - 2 = 14$ (mod 31), $r_3 ≡ 14^2 - 2 ≡ 8$ (mod 31), $r_4 ≡ 8^2 - 2 ≡ 0$ (mod 31). $r_{5-1} ≡ 0$ (mod 31), so $M_5 = 31$ is prime. - -**Corollary**: Let $p$ be prime and let $M_p = 2^p - 1$ denote the $p$th Mersenne number. It is possible to determine whether $M_p$ is prime using $O(p^3)$ bit operations. - -known mersenne primes table - -Mersenne primes are of interest because of their ease in location and verification: the largest known prime, at any given time, is typically a Mersenne prime. It has been conjectured, but not proven, that there are infinitely many Mersenne primes. - -# Chapter 7: Cryptology - -**Caeser Cipher**: $C ≡ P + 3$ (mod 26) -- ex. todo - -Ciphers may be described by a **shift transformation**: $C ≡ P+k$ (mod 26), for some key $k$. More generally, ciphers of the form $C ≡ aP+b$ (mod 26) for some integers $a,b$ such that $(a,26)=1$ are considered **affine transformations**. This $(a,26)=1$ is so that as $P$ runs through a complete system of distinct residues, $C$ also does. - -The *inverse* relationship is given by $P ≡ \hat{a} (C-b)$ (mod 26), where $\hat{a}$ is an inverse of $a$ (mod 26). - -Ciphers like these encrypt *each character individually*, making them 1) trivially easy to bruteforce and 2) subject to *frequency analysis*. -- ex. if we know we are looking for $C ≡ P+k$ (mod 26): say the most common letter is P. we then set that equal to $e$, the most common letter in English. So $15_p ≡ 4_e + k$ (mod 26), so $k=11$ and we have out solution. -- ex. if we know we are looking for $C ≡ aP+b$ (mod 26): say the most common letters are $L$ and $U$. We then set those equal to $E$ and $T$, the first and second most common letters in English. Leaving us with two equations: $4_E a + b ≡ 11_L$ and $19_T a + b ≡ 20_U$ (mod 26). This is solvable: $a ≡ \hat{Δ}(d11-b20)$ and $b ≡ \hat{Δ}(a20-c11)$ - -# Chapter 8: Primitive Roots - -## The Order of an Integer and Primitive Roots - -**Definition**: Let $a$ and $m$ be relatively prime positive integers. Then, the least positive integer $x$ such that $a^x ≡ 1$ (mod $m$) is called the **order of a modulo m**. -- ex. $2^3 ≡ 1$ (mod 7), so $ord_7 2 = 3$. - -**Theorem**: If $a$ and $n$ are relatively prime integers with $n > 0$, then the positive integer $x$ is a solution of the congruence $a^x ≡ 1$ (mod $n$) if and only if $ord_n a ∣ x$. -- Proof: trivial. - -**Corollary**: If $a$ and $n$ are relatively prime integers with $n > 0$, then $ord_n a | ϕ(n)$. -- Proof: $(a,n) = 1$, so $a^{ϕ(n)} ≡ 1$ (mod $n$) by Euler's theorem. So by the above theorem, $a^x ≡ 1$ (mon $n$). - -**Theorem**: If $a$ and $n$ are relatively prime integers with $n > 0$, then $a^i ≡ a^j$ (mod $n$) where $i$ and $j$ are nonnegative integers if and only if $i ≡ j$ (mod $ord_n a$). -- ex. todo - -**Definition**: If $r$ and $n$ are relatively prime integers with $n > 0$ and $ord_n r = ϕ(n)$, then $r$ us called a **primitive root modulo n**. Not all integers have primitive roots. -- ex. $ord_7 3 = 6 = ϕ(7)$, so $3$ is a primitive root modulo 7. - -**Theorem**: If $r$ and $n$ are relatively prime positive integers with $n > 0$, and if $r$ is a primitive root modulo $n$, then the integers $r^1, r^2, ..., r^{ϕ(n)}$ form a reduced residue set modulo $n$. -- ex. todo - -**Theorem**: If $ord_m a = t$ and $u$ is a positive integer, then $ord_m (a^u) = t/(t,u)$. -- ex. todo - -**Corollary**: Let $r$ be a primitive root modulo $M$ where $m$ is an integer > 1. Then $r^u$ is a primitive root modulo $m$ if and only if $(u, ϕ(m)) = 1$. - -**Theorem**: If the positive integer $m$ has a primitive root, then it has a total of $ϕ(ϕ(m))$ incongruent primitive roots. - -## Primitive Roots for Primes - -**Definition**: Let $f(x)$ be a polynomial with integer coefficients. We say that an integer $c$ is a **root of f(x) modulo m** if $f(c) ≡ 0$ (mod $m$). It is easy to see that if $c$ is a root of $f(x)$ modulo $m$, then every integer congruent to $c$ modulo $m$ is also a root. - -**Lagrange's Theorem**: Let $f(x) = a_n x^n + a_{n-1} x^{n-1} + ... + a_1 x + a_0$ be a polynomial of degree $n$ with integer coefficients and with leading coefficient $a_n$ not divisible by $p$. Then $f(x)$ has at most $n$ incongruent roots modulo $p$. - -**Theorem**: Let $p$ be prime and let $d$ be a divisor of $p-1$. Then the polynomial $x^d - 1$ has exactly $d$ incongruent roots modulo $p$. - -**Theorem**: Let $p$ be prime and let $d$ be a positive divisor of $p-1$. Then the number of incongruent integers of order $d$ modulo $p$ is equal to $ϕ(d)$. - -**Corollary**: Every prime has a primitive root. - -## Existence of Primitive Roots - -**Theorem**: If $p$ is an odd prime with a primitive root $r$, then either $r$ or $r+p$ is a primitive root modulo $p^2$. - -**Theorem**: Let $p$ be an odd prime, then $p^k$ has a primitive root for all positive integers $k$. Moreover, if $r$ is a primitive root modulo $p^2$, then $r$ is a primitive root modulo $p^k$, for all positive integers $k$. - -**Theorem**: If $a$ is an odd integer, then for some integer $k > 3$ $a^{ϕ(2^k)/2} = a^{2^{k-2}} ≡ 1$ (mod $2^k$). - -**Theorem**: Let $k ≥ 3$ be an integer. Then $ord_{2^k} 5 = ϕ(2^k)/2 = 2^{k-2}$. - -**Theorem**: If $n$ is a positive integer that is not a prime power or twice a prime power, then $n$ does not have a primitive root. - -**Theorem**: If $p$ is an odd prime and $t$ is a positive integer, then $2p^t$ has a primitive root. If $r$ is an primitive root modulo $p^t$, if it is odd $r$ is also a primitive root modulo $2p^t$, and if it is even $r + p^t$ is a primitive root modulo $2p^t$. - -**Theorem**: The positive integer $n$ has a primitive root if and only if $n = 2,4,p^t,2p^t$ for an odd prime $p$ and positive integer $t$. - -## Index Arithmetic - -**Definition**: Let $m$ be a positive integer with primitive root $r$. If $a$ is a positive integer with $(a,m) = 1$, then the unique integer $x$ with $1 ≤ x ≤ ϕ(m)$ and $r^x ≡ a$ (mod $m$) is called the **index of a to the base r modulo m**. With this definition, we have $a ≡ r^{ind_r a}$ (mod $m$). - -**Theorem**: Let $m$ be a positive integer with primitive root $r$, and let $a$ and $b$ be integers relatively prime to $m$. Then: -- $ind_r 1 ≡ 0$ (mod $ϕ(m)$) -- $ind_r (ab) ≡ ind_r a + ind_r b$ (mod $ϕ(m)$) -- $ind_r a^k ≡ k⋅ind_r a$ (mod $ϕ(m)$) for a positive integer $k$. - -**Definition**: If $m$ and $k$ are positive integers and $a$ is an integer relatively prime to $m$, then $a$ is a **kth power residue of m** if the congruence $x^k ≡ a$ (mod $m$) has a solution. - -**Theorem**: Let $m$ be a positive integer with a primitive root. If $k$ is a positive integer and $a$ is an integer relatively prime to $m$, then the congruence $x^k ≡ a$ (mod $m$) has a solution if and only if $a^{ϕ(m)/d} ≡ 1$ (mod $m$), where $d = (k, ϕ(m))$. - -**Corollary**: If there are solutions of $x^k ≡ a$ (mod $m$), then there are exactly $d$ incongruent solutions modulo $m$. - -**Theorem**: If $n$ is an odd composite positive integer, then $n$ passes Miller's test for at most $(n-1)/4$ bases $b$ with $1 ≤ b < n-1$. - -**Lemma**: Let $p$ be an odd prime and let $e,q$ be positive integers. Then the number of incongruent solutions of the congruence $x^{q-1} ≡ 1$ (mod $p^e$) is $(q,p^{e-1}(p-1))$. - -## Primality Testing Using Primitive Roots - -**Theorem**: If $n$ is a positive integer and if an integer $x$ exists such that $x^{n-1} ≡ 1$ (mod $n$) and $x^{(n-1)/q} ≢ 1$ (mod $n$) for all prime divisors $q$ of $n-1$, then $n$ is prime. - -**Corollary**: If $n$ is an odd positive integer and $x$ is a positive integer such that $x^{(n-1)/2} ≡ -1$ (mod $n$) and $x^{(n-1)/q} ≢ 1$ (mod $n$) for all odd prime divisors $q$ of $n-1$, then $n$ is prime. - -**Theorem**: If $n$ is composite, it can be proved with $O((log_2 n)^2)$ bit operations. - -**Theorem**: If $n$ is prime, it can be proved with $O((log_2 n)^4)$ bit operations. - -## Universal Exponents - -## Pseudo-random Numbers - -## The Splicing of Telephone Cables diff --git a/mathematics/proof.md b/mathematics/proof.md deleted file mode 100644 index 17548dd..0000000 --- a/mathematics/proof.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -layout: foundations -title: mathematics/proof ---- - -- cgit v1.2.3-70-g09d2