English
Let X_n: Ω → E be a sequence of independent identically distributed vector-valued random variables with X_0 integrable and strongly measurable. Then μ-a.e. ω, the empirical means converge to μ[X_0], i.e., lim_{n→∞} (1/n) ∑_{i=0}^{n-1} X_i(ω) = μ[X_0].
Русский
Пусть X_n: Ω → E — независимая последовательность вектор-случайных величин с интегрируемостью X_0 и сильной измеримостью, распределение X_i идентично X_0. Тогда почти surely среднее сходится к μ[X_0].
LaTeX
$$$\forall\, \text{ae } \omega ∂μ,\; \displaystyle \lim_{n \to \infty} \frac{1}{n} \sum_{i=0}^{n-1} X_i(\omega) = μ[X_0]$$$
Lean4
/-- Preliminary lemma for the strong law of large numbers for vector-valued random variables,
assuming measurability in addition to integrability. This is weakened to ae measurability in
the full version `ProbabilityTheory.strong_law_ae`. -/
theorem strong_law_ae_of_measurable (X : ℕ → Ω → E) (hint : Integrable (X 0) μ) (h' : StronglyMeasurable (X 0))
(hindep : Pairwise ((IndepFun · · μ) on X)) (hident : ∀ i, IdentDistrib (X i) (X 0) μ μ) :
∀ᵐ ω ∂μ, Tendsto (fun n : ℕ ↦ (n : ℝ)⁻¹ • (∑ i ∈ range n, X i ω)) atTop (𝓝 μ[X 0]) := by
/- Choose a simple function `φ` such that `φ (X 0)` approximates well enough `X 0` -- this is
possible as `X 0` is strongly measurable. Then `φ (X n)` approximates well `X n`.
Then the strong law for `φ (X n)` implies the strong law for `X n`, up to a small
error controlled by `n⁻¹ ∑_{i=0}^{n-1} ‖X i - φ (X i)‖`. This one is also controlled thanks
to the one-dimensional law of large numbers: it converges ae to `𝔼[‖X 0 - φ (X 0)‖]`, which
is arbitrarily small for well-chosen `φ`. -/
let s : Set E := Set.range (X 0) ∪ {0}
have zero_s : 0 ∈ s := by simp [s]
have : SeparableSpace s := h'.separableSpace_range_union_singleton
have : Nonempty s :=
⟨0, zero_s⟩
-- sequence of approximating simple functions.
let φ : ℕ → SimpleFunc E E := SimpleFunc.nearestPt (fun k => Nat.casesOn k 0 ((↑) ∘ denseSeq s) : ℕ → E)
let Y : ℕ → ℕ → Ω → E := fun k i ↦
(φ k) ∘
(X i)
-- strong law for `φ (X n)`
have A : ∀ᵐ ω ∂μ, ∀ k, Tendsto (fun n : ℕ ↦ (n : ℝ)⁻¹ • (∑ i ∈ range n, Y k i ω)) atTop (𝓝 μ[Y k 0]) :=
ae_all_iff.2
(fun k ↦ strong_law_ae_simpleFunc_comp X h'.measurable hindep hident (φ k))
-- strong law for the error `‖X i - φ (X i)‖`
have B :
∀ᵐ ω ∂μ,
∀ k, Tendsto (fun n : ℕ ↦ (∑ i ∈ range n, ‖(X i - Y k i) ω‖) / n) atTop (𝓝 μ[(fun ω ↦ ‖(X 0 - Y k 0) ω‖)]) :=
by
apply ae_all_iff.2 (fun k ↦ ?_)
let G : ℕ → E → ℝ := fun k x ↦ ‖x - φ k x‖
have G_meas : ∀ k, Measurable (G k) := fun k ↦ (measurable_id.sub_stronglyMeasurable (φ k).stronglyMeasurable).norm
have I : ∀ k i, (fun ω ↦ ‖(X i - Y k i) ω‖) = (G k) ∘ (X i) := fun k i ↦ rfl
apply strong_law_ae_real (fun i ω ↦ ‖(X i - Y k i) ω‖)
· exact (hint.sub ((φ k).comp (X 0) h'.measurable).integrable_of_isFiniteMeasure).norm
· unfold Function.onFun
simp_rw [I]
intro i j hij
exact (hindep hij).comp (G_meas k) (G_meas k)
· intro i
simp_rw [I]
apply
(hident i).comp
(G_meas k)
-- check that, when both convergences above hold, then the strong law is satisfied
filter_upwards [A, B] with ω hω h'ω
rw [tendsto_iff_norm_sub_tendsto_zero, tendsto_order]
refine
⟨fun c hc ↦ Eventually.of_forall (fun n ↦ hc.trans_le (norm_nonneg _)), ?_⟩
-- start with some positive `ε` (the desired precision), and fix `δ` with `3 δ < ε`.
intro ε (εpos : 0 < ε)
obtain ⟨δ, δpos, hδ⟩ : ∃ δ, 0 < δ ∧ δ + δ + δ < ε :=
⟨ε / 4, by positivity, by linarith⟩
-- choose `k` large enough so that `φₖ (X 0)` approximates well enough `X 0`, up to the
-- precision `δ`.
obtain ⟨k, hk⟩ : ∃ k, ∫ ω, ‖(X 0 - Y k 0) ω‖ ∂μ < δ :=
by
simp_rw [Pi.sub_apply, norm_sub_rev (X 0 _)]
exact ((tendsto_order.1 (tendsto_integral_norm_approxOn_sub h'.measurable hint)).2 δ δpos).exists
have : ‖μ[Y k 0] - μ[X 0]‖ < δ := by
rw [norm_sub_rev, ← integral_sub hint]
· exact (norm_integral_le_integral_norm _).trans_lt hk
· exact ((φ k).comp (X 0) h'.measurable).integrable_of_isFiniteMeasure
have I : ∀ᶠ n in atTop, (∑ i ∈ range n, ‖(X i - Y k i) ω‖) / n < δ := (tendsto_order.1 (h'ω k)).2 δ hk
have J : ∀ᶠ (n : ℕ) in atTop, ‖(n : ℝ)⁻¹ • (∑ i ∈ range n, Y k i ω) - μ[Y k 0]‖ < δ :=
by
specialize hω k
rw [tendsto_iff_norm_sub_tendsto_zero] at hω
exact (tendsto_order.1 hω).2 δ δpos
filter_upwards [I, J] with n hn h'n
calc
‖(n : ℝ)⁻¹ • ∑ i ∈ Finset.range n, X i ω - μ[X 0]‖ =
‖(n : ℝ)⁻¹ • ∑ i ∈ Finset.range n, (X i ω - Y k i ω) + ((n : ℝ)⁻¹ • ∑ i ∈ Finset.range n, Y k i ω - μ[Y k 0]) +
(μ[Y k 0] - μ[X 0])‖ :=
by
congr
simp only [sum_sub_distrib, smul_sub]
abel
_ ≤
‖(n : ℝ)⁻¹ • ∑ i ∈ Finset.range n, (X i ω - Y k i ω)‖ + ‖(n : ℝ)⁻¹ • ∑ i ∈ Finset.range n, Y k i ω - μ[Y k 0]‖ +
‖μ[Y k 0] - μ[X 0]‖ :=
norm_add₃_le
_ ≤ (∑ i ∈ Finset.range n, ‖X i ω - Y k i ω‖) / n + δ + δ :=
by
gcongr
simp only [norm_smul, norm_inv, RCLike.norm_natCast, div_eq_inv_mul]
gcongr
exact norm_sum_le _ _
_ ≤ δ + δ + δ := by
gcongr
exact hn.le
_ < ε := hδ