[
    {
        "problem_id": 773,
        "domain": [
            "Mathematics -> Applied Mathematics -> Statistics -> Probability -> Other"
        ],
        "difficulty": 4.0,
        "problem_text": "Max repeatedly throws a fair coin in a hurricane. For each throw, there is a $4 \\%$ chance that the coin gets blown away. He records the number of heads $H$ and the number of tails $T$ before the coin is lost. (If the coin is blown away on a toss, no result is recorded for that toss.) What is the expected value of $|H-T|$?",
        "sample_id": 1,
        "final_answer": "The final answer is \\boxed{\\frac{24}{7}}",
        "steps": [
            {
                "step_id": 1,
                "edge": "We establish $p$ as the fundamental probability parameter for the coin being blown away per toss, directly given by the problem's $4\\%$ specification. This definition anchors all subsequent probabilistic calculations and aligns with standard probability modeling where $p$ represents the termination probability of a Bernoulli trial sequence.",
                "direct_dependent_steps": null,
                "node": "Define $p$ as the probability that the coin is blown away on a single toss."
            },
            {
                "step_id": 2,
                "edge": "Converting the problem's $4\\%$ chance to a fraction yields $p = \\frac{4}{100} = \\frac{1}{25}$. This exact rational form avoids decimal approximation errors and facilitates precise algebraic manipulation later, as required for symbolic computation of expectations.",
                "direct_dependent_steps": [
                    1
                ],
                "node": "We have $p=\\frac{1}{25}$."
            },
            {
                "step_id": 3,
                "edge": "We define $H$ to formalize the count of recorded heads, as explicitly described in the problem statement. This variable captures the stochastic outcome of successful tosses before termination, essential for constructing the absolute difference $|H-T|$.",
                "direct_dependent_steps": null,
                "node": "Define $H$ as the number of recorded heads before the coin is lost."
            },
            {
                "step_id": 4,
                "edge": "Similarly, $T$ is defined to represent the number of recorded tails, mirroring the problem's description. This symmetric definition with $H$ ensures we can model the imbalance between heads and tails that drives the absolute difference calculation.",
                "direct_dependent_steps": null,
                "node": "Define $T$ as the number of recorded tails before the coin is lost."
            },
            {
                "step_id": 5,
                "edge": "Combining the definitions of $H$ (Step 3) and $T$ (Step 4), we introduce $D = |H - T|$ as the target random variable. This absolute difference quantifies the discrepancy between heads and tails, which is the core quantity whose expectation we seek to compute.",
                "direct_dependent_steps": [
                    3,
                    4
                ],
                "node": "Define $D$ as the absolute difference $|H-T|$."
            },
            {
                "step_id": 6,
                "edge": "Reiterating the problem's mechanism using the defined parameter $p$ (Step 1), each toss independently terminates the process with probability $p$. This models the hurricane's disruptive effect as a geometric stopping condition for the coin-flipping sequence.",
                "direct_dependent_steps": [
                    1
                ],
                "node": "On each toss the coin is blown away with probability $p$."
            },
            {
                "step_id": 7,
                "edge": "Complementing Step 6, the survival probability per toss is $1-p$ by basic probability axioms (since $p + (1-p) = 1$). This $1-p$ value is critical for computing the likelihood of extended sequences of valid tosses before termination.",
                "direct_dependent_steps": [
                    1
                ],
                "node": "On each toss the coin is not blown away with probability $1-p$."
            },
            {
                "step_id": 8,
                "edge": "Building on Step 6's termination rule, the process ends precisely when the blow-away event occurs. This clarifies that all recorded tosses (contributing to $H$ and $T$) happen strictly before this terminal event, defining the domain for $H$ and $T$.",
                "direct_dependent_steps": [
                    6
                ],
                "node": "The process terminates on the toss when the coin is blown away."
            },
            {
                "step_id": 9,
                "edge": "When $D \\neq 0$ (Step 5), a non-terminating toss flips to heads or tails with equal probability $\\frac{1}{2}$, changing $D$ by $+1$ or $-1$. The expected change is $\\frac{1}{2}(+1) + \\frac{1}{2}(-1) = 0$, meaning imbalance persists stochastically without drift—only states with $D=0$ contribute to expected growth.",
                "direct_dependent_steps": [
                    5
                ],
                "node": "If $D\\neq0$ at a given toss then the expected change in $D$ from that toss is zero."
            },
            {
                "step_id": 10,
                "edge": "At $D=0$ (Step 5), any non-terminating toss (occurring with probability $1-p$ from Step 7) must produce either heads or tails, both yielding $|H-T|=1$. Thus $D$ increases deterministically by $1$, creating the only scenario where the absolute difference grows in expectation.",
                "direct_dependent_steps": [
                    5,
                    7
                ],
                "node": "If $D=0$ at a toss then a non-blown toss increases $D$ by 1 deterministically."
            },
            {
                "step_id": 11,
                "edge": "Integrating Step 8 (termination timing), Step 9 (zero-change when $D\\neq0$), and Step 10 (deterministic increase when $D=0$), we recognize that $\\mathbf{E}[D]$ accumulates only when a toss occurs ($1-p$) and $D=0$ pre-toss. Summing these per-toss contributions across all possible tosses forms the expectation via linearity.",
                "direct_dependent_steps": [
                    8,
                    9,
                    10
                ],
                "node": "Therefore $\\mathbf{E}[D]$ equals the sum over all tosses of the probability that $D=0$ just before the toss and the toss is not blown away."
            },
            {
                "step_id": 12,
                "edge": "For $D=0$ before toss $n$, the prior $n-1$ tosses must have equal heads and tails, requiring $n-1$ even. Setting $n-1=2k$ (so $n=2k+1$) parameterizes valid toss indices where $D$ could reset to zero, enabling systematic summation over $k$.",
                "direct_dependent_steps": [
                    11
                ],
                "node": "The event $D=0$ just before toss $n$ requires $n-1$ to be even, so write $n-1=2k$."
            },
            {
                "step_id": 13,
                "edge": "The coin surviving $2k$ tosses (Step 12) demands $2k$ consecutive non-terminations, each with probability $1-p$ (Step 7). By independence of tosses, this joint probability is $(1-p)^{2k}$, the survival likelihood for the initial segment.",
                "direct_dependent_steps": [
                    7,
                    12
                ],
                "node": "The probability that the coin survives the first $2k$ tosses is $(1-p)^{2k}$."
            },
            {
                "step_id": 14,
                "edge": "Given $2k$ non-blown tosses (Step 12), the count of sequences with equal heads/tails is $\\binom{2k}{k}$, and with fair coin flips ($\\frac{1}{2}$ per outcome), the probability is $\\binom{2k}{k}/2^{2k}$. This is the binomial probability for exactly $k$ successes in $2k$ trials.",
                "direct_dependent_steps": [
                    12
                ],
                "node": "The probability of equal heads and tails in $2k$ non-blown tosses is $\\binom{2k}{k}/2^{2k}$."
            },
            {
                "step_id": 15,
                "edge": "Combining Step 13 (survival through $2k$ tosses) and Step 14 (balanced heads/tails in those tosses), the joint probability of $D=0$ before toss $2k+1$ is their product: $(1-p)^{2k} \\cdot \\binom{2k}{k}/2^{2k}$. This captures both the process surviving long enough and achieving balance.",
                "direct_dependent_steps": [
                    13,
                    14
                ],
                "node": "Hence the probability that $D=0$ just before toss $2k+1$ is $(1-p)^{2k}\\frac{\\binom{2k}{k}}{2^{2k}}$."
            },
            {
                "step_id": 16,
                "edge": "The contribution to $\\mathbf{E}[D]$ from toss $2k+1$ requires both $D=0$ pre-toss (Step 15) and the toss occurring ($1-p$ from Step 7). Multiplying these gives $(1-p)^{2k+1} \\binom{2k}{k}/2^{2k}$, representing the incremental expectation from this specific toss.",
                "direct_dependent_steps": [
                    15,
                    7
                ],
                "node": "The contribution to $\\mathbf{E}[D]$ from toss $2k+1$ equals this probability times the probability $1-p$ that the toss is not blown away."
            },
            {
                "step_id": 17,
                "edge": "Summing Step 16's contribution over all $k \\geq 0$ (since $k=0$ corresponds to the first toss) yields $\\mathbf{E}[D] = (1-p) \\sum_{k=0}^{\\infty} (1-p)^{2k} \\binom{2k}{k}/2^{2k}$. Factoring out $1-p$ simplifies the series expression for further manipulation.",
                "direct_dependent_steps": [
                    16
                ],
                "node": "Therefore $\\mathbf{E}[D]=(1-p)\\sum_{k=0}^{\\infty}(1-p)^{2k}\\frac{\\binom{2k}{k}}{2^{2k}}$."
            },
            {
                "step_id": 18,
                "edge": "We recall the standard generating function identity for central binomial coefficients, $\\sum_{k=0}^{\\infty} \\binom{2k}{k} x^k = \\frac{1}{\\sqrt{1-4x}}$, valid for $|x| < \\frac{1}{4}$. This combinatorial tool is essential for evaluating the infinite series in Step 17.",
                "direct_dependent_steps": null,
                "node": "Recall the generating function $\\sum_{k=0}^{\\infty}\\binom{2k}{k}x^k=\\frac{1}{\\sqrt{1-4x}}$."
            },
            {
                "step_id": 19,
                "edge": "Matching Step 17's series structure to Step 18's generating function, we set $x = \\left( \\frac{1-p}{2} \\right)^2$. This substitution aligns $(1-p)^{2k}/2^{2k} = \\left[ (1-p)^2 / 4 \\right]^k$ with $x^k$, enabling direct application of the generating function.",
                "direct_dependent_steps": [
                    17,
                    18
                ],
                "node": "Substitute $x=\\left(\\frac{1-p}{2}\\right)^2$ into the generating function."
            },
            {
                "step_id": 20,
                "edge": "Applying Step 19's substitution to Step 18's identity gives $\\sum_{k=0}^{\\infty} \\binom{2k}{k} \\left( \\frac{1-p}{2} \\right)^{2k} = \\frac{1}{\\sqrt{1 - 4 \\cdot (1-p)^2/4}} = \\frac{1}{\\sqrt{1 - (1-p)^2}}$. The simplification $4x = (1-p)^2$ streamlines the denominator.",
                "direct_dependent_steps": [
                    19
                ],
                "node": "This gives $\\sum_{k=0}^{\\infty}\\binom{2k}{k}\\left(\\frac{1-p}{2}\\right)^{2k}=\\frac{1}{\\sqrt{1-(1-p)^2}}$."
            },
            {
                "step_id": 21,
                "edge": "Substituting Step 20's closed-form sum into Step 17's expression yields $\\mathbf{E}[D] = (1-p) \\cdot \\frac{1}{\\sqrt{1 - (1-p)^2}}$. Algebraic simplification confirms this equals $\\frac{1-p}{\\sqrt{1 - (1-p)^2}}$, providing a compact formula for the expectation.",
                "direct_dependent_steps": [
                    17,
                    20
                ],
                "node": "Hence $\\mathbf{E}[D]=(1-p)\\frac{1}{\\sqrt{1-(1-p)^2}}=\\frac{1-p}{\\sqrt{1-(1-p)^2}}$."
            },
            {
                "step_id": 22,
                "edge": "We prepare for numerical evaluation by inserting $p = \\frac{1}{25}$ (Step 2) into Step 21's symbolic expression. This substitution initiates the concrete computation required for the final answer.",
                "direct_dependent_steps": [
                    2,
                    21
                ],
                "node": "Substitute $p=\\frac{1}{25}$ into the expression for $\\mathbf{E}[D]$."
            },
            {
                "step_id": 23,
                "edge": "Computing $1 - p$ from Step 22 gives $1 - \\frac{1}{25} = \\frac{24}{25}$. This fraction is exact and avoids floating-point errors, preserving precision for subsequent steps.",
                "direct_dependent_steps": [
                    22
                ],
                "node": "Then $1-p=\\frac{24}{25}$."
            },
            {
                "step_id": 24,
                "edge": "Squaring Step 23's result: $\\left( \\frac{24}{25} \\right)^2 = \\frac{576}{625}$. Verification: $24^2 = 576$ and $25^2 = 625$, confirming the arithmetic is correct.",
                "direct_dependent_steps": [
                    23
                ],
                "node": "Then $(1-p)^2=\\left(\\frac{24}{25}\\right)^2=\\frac{576}{625}$."
            },
            {
                "step_id": 25,
                "edge": "Calculating $1 - (1-p)^2$ using Step 24: $1 - \\frac{576}{625} = \\frac{625 - 576}{625} = \\frac{49}{625}$. Cross-check: $625 - 576 = 49$, which is $7^2$, ensuring the numerator is a perfect square.",
                "direct_dependent_steps": [
                    24
                ],
                "node": "Therefore $1-(1-p)^2=1-\\frac{576}{625}=\\frac{49}{625}$."
            },
            {
                "step_id": 26,
                "edge": "Taking the square root of Step 25's result: $\\sqrt{\\frac{49}{625}} = \\frac{7}{25}$. This follows since $\\sqrt{49} = 7$ and $\\sqrt{625} = 25$, yielding a rational value as expected.",
                "direct_dependent_steps": [
                    25
                ],
                "node": "Hence $\\sqrt{1-(1-p)^2}=\\sqrt{\\frac{49}{625}}=\\frac{7}{25}$."
            },
            {
                "step_id": 27,
                "edge": "Combining Step 23 ($\\frac{24}{25}$) and Step 26 ($\\frac{7}{25}$) in Step 21's formula: $\\frac{24/25}{7/25} = \\frac{24}{7}$. The $25$ denominators cancel cleanly, producing the simplified fraction $\\frac{24}{7}$ with no approximation.",
                "direct_dependent_steps": [
                    23,
                    26
                ],
                "node": "It follows that $\\mathbf{E}[D]=\\frac{\\frac{24}{25}}{\\frac{7}{25}}=\\frac{24}{7}$."
            }
        ]
    }
]
