Ethan P. Marzban
2023-05-02
Rule-of-Thumb
Here is a quick way to determine whether a random variable is continuous or discrete:
Properties of a P.D.F.
viewof a = Inputs.range(
[-3, 3],
{value: 0, step: 0.1, label: "a="}
)
viewof b = Inputs.range(
[-3, 3],
{value: 1, step: 0.1, label: "b="}
)
margin2 = ({top: 20, right: 30, bottom: 30, left: 40})
height2 = 400
x_values2 = d32.scaleLinear()
.domain(d32.extent(data2, d => d.x))
.range([margin2.left, width - margin2.right])
y_values2 = d32.scaleLinear()
.domain([Math.min(d32.min(data2, d => d.y),0), Math.max(1,d32.max(data2, d => d.y))]).nice()
.range([height2 - margin2.bottom, margin2.top])
line2 = d32.line()
.x(d => x_values2(d.x))
.y(d => y_values2(d.y))
xAxis2 = g => g
.attr("transform", `translate(0,${height2 - margin2.bottom})`)
.call(d32.axisBottom(x_values2)
.ticks(width / 80)
.tickSizeOuter(0))
yAxis2 = g => g
.attr("transform", `translate(${margin2.left},0)`)
.call(d32.axisLeft(y_values2)
.tickValues(d32.scaleLinear().domain(y_values2.domain()).ticks()))
function unif_pdf (input_value, mu, sigsq) {
if(input_value < a){
return 0
} else if(input_value > b){
return 0
} else{
return 1 / (b - a)
}
}
abs_x2=6
data2 = {
let values = [];
for (let x = -abs_x2; x < abs_x2; x=x+0.01) values.push({"x":x,"y":unif_pdf(x, µ, sigsquared)});
return values;
}
d32 = require("https://d3js.org/d3.v5.min.js")
chart2 = {
const svg = d32.select(DOM.svg(width, height2));
svg.append("g")
.call(xAxis2);
svg.append("g")
.call(yAxis2);
svg.append("path")
.datum(data2)
.attr("fill", "none")
.attr("stroke", "steelblue")
.attr("stroke-width", 4)
.attr("stroke-linejoin", "round")
.attr("stroke-linecap", "round")
.attr("d", line);
return svg.node();
}
Credit to https://observablehq.com/@dswalter/normal-distribution for the base of the applet code
Recall, from our initial discussion on continuous random variables, that probabilities are found as areas underneath the density curve.
Due to the rectangular shape of the Uniform density curves, finding probabilities under the Uniform distribution ends up being relatively straightforward (so long as we remember how to find the area of a rectangle!)
Let’s work through an example together.
Worked-Out Example 1
If \(X \sim \mathrm{Unif}(-1, \ 1)\), compute \(\mathbb{P}(X \leq 0.57)\).
Worked-Out Example 2
If \(X \sim \mathrm{Unif}(0, 1)\), compute \(\mathbb{P}(0.25 \leq X \leq 0.75)\).
\[ \huge - \]
can be decomposed as
\[ \huge - \]
Important
\[ \mathbb{P}(x_1 \leq X \leq x_2) = \mathbb{P}(X \leq x_2) - \mathbb{P}(X \leq x_1) \]
Exercise 1
The time (in minutes) spent waiting in line at Starbucks is found to vary uniformly between 5mins and 15mins.
Define the random variable of interest, and call it \(X\).
If a person is selected at random from the line at Starbucks, what is the probability that they spend between 3 and 7 minutes waiting in line?
What is the c.d.f. of wait times? (I.e., find the probability that a randomly selected person spends less than \(x\) minutes waiting in line, for an arbitrary value \(x\). Yes, your final answer will depend on \(x\); that’s why the c.d.f. is a function!)
Probability of Attaining an Exact Value
If \(X\) is a continuous random variable, \(\mathbb{P}(X = x) = 0\) for any value \(x\).
Exercise 2
Consider again the setup of Exerise 1: the time (in minutes) spent waiting in line at Starbucks is found to vary uniformly on between 5mins and 15mins.
If we select a person at random, what is the expected amount of time (in minutes) they will spend waiting in line? What about the variance and standard deviation of the time (in minutes) they will spend waiting in line?
The normal distribution takes two parameters \(\mu\) and \(\sigma\). We use the notation \(X \sim \mathcal{N}(\mu, \ \sigma)\) to denote “\(X\) follows the normal distribution with parameters \(\mu\) and \(\sigma\).”
The normal distribution has distribution function given by \[ f(x) = \frac{1}{\sigma \cdot \sqrt{2 \pi}} \cdot \exp\left\{ - \frac{1}{2} \cdot \left( \frac{x - \mu}{\sigma} \right)^2 \right\} \]
Let’s determine how the parameters affect the shape of the density curve.
viewof µ = Inputs.range(
[-3, 3],
{value: 0, step: 0.1, label: "µ:"}
)
viewof σ = Inputs.range(
[0.2, 3.1],
{value: 1, step: 0.01, label: "σ:"}
)
sigsquared = σ**2
margin = ({top: 20, right: 30, bottom: 30, left: 40})
height = 400
x_values = d3.scaleLinear()
.domain(d3.extent(data, d => d.x))
.range([margin.left, width - margin.right])
y_values = d3.scaleLinear()
.domain([Math.min(d3.min(data, d => d.y),0), Math.max(1,d3.max(data, d => d.y))]).nice()
.range([height - margin.bottom, margin.top])
line = d3.line()
.x(d => x_values(d.x))
.y(d => y_values(d.y))
xAxis = g => g
.attr("transform", `translate(0,${height - margin.bottom})`)
.call(d3.axisBottom(x_values)
.ticks(width / 80)
.tickSizeOuter(0))
yAxis = g => g
.attr("transform", `translate(${margin.left},0)`)
.call(d3.axisLeft(y_values)
.tickValues(d3.scaleLinear().domain(y_values.domain()).ticks()))
function normal_pdf (input_value, mu, sigsq) {
let left_chunk = 1/(Math.sqrt(2*Math.PI*sigsq))
let right_top = -((input_value-mu)**2)
let right_bottom = 2*sigsq
return left_chunk * Math.exp(right_top/right_bottom)
}
abs_x=6
data = {
let values = [];
for (let x = -abs_x; x < abs_x; x=x+0.01) values.push({"x":x,"y":normal_pdf(x, µ, sigsquared)});
return values;
}
d3 = require("https://d3js.org/d3.v5.min.js")
chart = {
const svg = d3.select(DOM.svg(width, height));
svg.append("g")
.call(xAxis);
svg.append("g")
.call(yAxis);
svg.append("path")
.datum(data)
.attr("fill", "none")
.attr("stroke", "steelblue")
.attr("stroke-width", 4)
.attr("stroke-linejoin", "round")
.attr("stroke-linecap", "round")
.attr("d", line);
return svg.node();
}
Credit to https://observablehq.com/@dswalter/normal-distribution for the majority of the applet code
Holding \(\sigma = 1\) fixed and varying \(\mu\), we find:
Holding \(\mu = 0\) fixed and varying \(\sigma\), we find:
Definition
The standard normal distribution is the normal distribution with \(\mu = 0\) and \(\sigma = 1\); i.e. \(\mathcal{N}(0, 1)\).
Worked-Out Example 3
If \(Z \sim \mathcal{N}(0, 1)\), compute \(\mathbb{P}(Z \leq 0.83)\).
Worked-Out Example 4
If \(Z \sim \mathcal{N}(0, 1)\), find
Standardization
If \(X \sim \mathcal{N}(\mu, \ \sigma)\), then \[ \left( \frac{X - \mu}{\sigma} \right) \sim \mathcal{N}(0, 1) \] That is, if we take a normally distributed random variable, subtract off its mean, and divide by its standard deviation, we obtain a random variable whose distribution is the standard normal distribution.
Thus, if \(X \sim \mathcal{N}(\mu, \ \sigma)\), here are the steps we use to compute \(\mathbb{P}(X \leq x)\):
Worked-Out Example 5
If \(X \sim \mathcal{N}(5, \ 1.21)\), compute \(\mathbb{P}(X \leq 6)\).
The \(z-\)score of \(6\) is \[ z = \frac{6- 5}{1.21} \approx 0.83 \]
Looking up the probability corresponding to \(0.83\) on a standard normal table (which we did in Worked-Out Example 3), we see that the desired probability is \(\boxed{0.7967 = 79.67\%}\)
Exercise 3
It is found that the scores on a particular exam are normally distributed with a mean of 83 and a standard deviation of 5.
Define the random variable of interest, and call it \(X\).
If a student is selected at random, what is the probability that they scored 81 or lower?
If a student is selected at random, what is the probability that they scored 75 or higher?
If \(X \sim \mathcal{N}(\mu, \ \sigma)\), we have the following results:
So, the two parameters we use to describe the normal distribution are the mean and the variance.
We’ll talk more about parameters in the next lecture.