11## DESCRIPTION
2+ ## For a dataset, find the correlation coefficient and the regression line.
3+ ## ENDDESCRIPTION
24## DBsubject(Statistics)
35## DBchapter(Simple linear regression)
46## DBsection(Regression)
1214## Problem1('')
1315## KEYWORDS('statistic', 'regression','correlation')
1416
15- DOCUMENT(); # This should be the first executable line in the problem.
17+ DOCUMENT(); # This should be the first executable line in the problem.
1618
1719loadMacros(
18- "PGstandard.pl",
19- "MathObjects.pl",
20- "answerHints.pl",
21- "contextCurrency.pl",
22- "niceTables.pl",
23- "parserPopUp.pl",
24- "PGchoicemacros.pl"
25- );
26-
27- TEXT(beginproblem());
28-
29- #################################################
20+ 'PGstandard.pl', 'PGML.pl',
21+ 'contextCurrency.pl',
22+ 'PGstatisticsmacros.pl', 'parserPopUp.pl',
23+ 'PGcourse.pl'
24+ );
25+
3026# Set-up
3127
32- @b = (32.98, 49.72, 88.01, 97.34, 64.30, 106.27, 52.44, 70.29, 43.58);
33- @ab=('32.98', '49.72', '88.01', '97.34', '64.30', '106.27', '52.44', '70.29', '43.58');
34- @t = ( 4.50, 5.28, 10.00, 16.00, 7.70, 16.00, 7.00, 10.00, 5.50);
35- @at =('4.50', '5.28', '10.00', '16.00', '7.70', '16.00', '7.00', '10.00', '5.50');
28+ @b = (32.98, 49.72, 88.01, 97.34, 64.30, 106.27, 52.44, 70.29, 43.58);
29+ @ab = (
30+ '32.98', '49.72', '88.01', '97.34', '64.30', '106.27',
31+ '52.44', '70.29', '43.58'
32+ );
33+ @t = (4.50, 5.28, 10.00, 16.00, 7.70, 16.00, 7.00, 10.00, 5.50);
34+ @at = (
35+ '4.50', '5.28', '10.00', '16.00', '7.70', '16.00',
36+ '7.00', '10.00', '5.50'
37+ );
3638
37- @slice = NchooseK(9,6 );
39+ @slice = random_subset(6, 0 .. 8 );
3840
39- @sb = @b[@slice];
41+ @sb = @b[@slice];
4042@sab = @ab[@slice];
41- @st = @t[@slice];
43+ @st = @t[@slice];
4244@sat = @at[@slice];
4345
44- $sx = 0;
45- $sy = 0;
46- $sxy = 0;
47- $sx2 = 0;
48- $sy2 = 0;
49-
50- for($i=0;$i<6;$i++){
51- $sx = $sx + $sb[$i];
52- $sy = $sy + $st[$i];
53- $sxy = $sxy + $sb[$i] * $st[$i];
54- $sx2 = $sx2 + ($sb[$i])**2;
55- $sy2 = $sy2 + ($st[$i])**2;
56- }
57- $ssxy = $sxy-(($sx*$sy)/6);
58- $ssx = $sx2-(($sx**2)/6);
59- $ssy = $sy2-(($sy**2)/6);
60-
61- $r = $ssxy/sqrt($ssx*$ssy);
62-
63- $explainb = "\(r\) is close to \(1.\)";
64- $popupb = PopUp(["?", "No", "Yes"], "Yes");
65- $popupe = PopUp(["?", "Decrease", "Increase"], "Increase");
66-
67- $b0 = sprintf("%0.5f",($sy * $sx2 - $sx * $sxy)/(6 * $sx2 - ($sx)**2));
68-
69- $b1 = sprintf("%0.5f",(6 * $sxy - $sx * $sy)/(6 * $sx2 - ($sx)**2));
70-
71- $bill = random(40,100,5);
72- $tip = $b0 + $b1*$bill;
73-
74- $increase = random(5,10,5);
75- $changetip = $increase * $b1;
76-
77- #################################################
78- # Main
79-
80- BEGIN_TEXT
81- The amounts of 6 restaurant bills and the corresponding amounts of the tips are given in the below. Assume that bill amount is the explanatory variable and tip amount the response variable. Use RStudio to find the following.
82- $BR
83- \{
84- LayoutTable(
85- [
86- [['Bill',b =>1, noencase=>1],"$sab[0]", "$sab[1]", "$sab[2]", "$sab[3]", "$sab[4]", "$sab[5]"],
87- [['Tip',b =>1, noencase=>1],"$sat[0]", "$sat[1]", "$sat[2]", "$sat[3]", "$sat[4]", "$sat[5]"],
88- ],
89- align => ' l |rrrrrr',
90- encase => ['\(','\)'],
91- allcellcss => 'padding:8px; '
92- );
93- \}
94- $PAR
95- (a) Compute the correlation: \( r =\) \{ans_rule(15)\}
96- $PAR
97- (b) Does there appear to be a significant correlation?
98- $BR $SPACE $SPACE
99- Answer: \{ $popupb->menu() \}
100- $PAR
101- (c) The regression equation is \(\hat{y}=\) \{ans_rule(15)\}.
102- $BR $BR
103- ${BITALIC}For parts (d) and (e): Enter your answer in dollars: ${DOLLAR}xx.xx$EITALIC
104- $PAR
105- (d) If the amount of the bill is $DOLLAR\($bill,\) the best prediction for the amount of the tip is
106- \{ans_rule(10)\}.
107- $PAR
108- (e) According to the regression equation, for every $DOLLAR\($increase \) increase in the bill, the tip should \{ $popupe->menu() \} by \{ans_rule(10)\}.
109- END_TEXT
110-
111- #################################################
112- # Answers
113-
114- $showPartialCorrectAnswers = 1;
115-
116- $ans_a = Compute($r)->with(tolType=>'absolute', tolerance=>'0.005');
117- ANS($ans_a->cmp->withPostFilter(AnswerHints(
118- sub {
119- my ($correct,$student,$anshash) = @_;
120- return abs($student-$correct) < .02;
121- } => ["Your answer is close. Try the calculation using more accuracy."])));
122-
123- ANS( $popupb->cmp() );
124-
125- $ans_c = Formula("$b0 + $b1 x");
126- ANS($ans_c->with(tolType=>'absolute', tolerance=>'0.001')->cmp);
46+ $r = Real(sample_correlation(~~@sat, ~~@sab));
47+ ($m, $b) = linear_regression(~~@sab, ~~@sat);
48+ $m=Real($m); $b=Real($b);
49+
50+ $explainb = "Yes, \(r\) is close to \(1.\)";
51+ $popupb = DropDown([ "No", "Yes" ], "Yes");
52+ $popupe = DropDown([ "Decrease", "Increase" ], "Increase");
53+
54+ $line = Formula("$m *x + $b");
55+
56+ $bill = random(40, 100, 5);
57+ $tip = $line->eval(x => $bill);
58+
59+ $increase = random(5, 10, 5);
60+ $changetip = $increase * $m;
12761
12862Context("Currency");
129- $ans_d = Currency($tip);
130- ANS($ans_d->with(tolType=>'absolute', tolerance=>'0.04')->cmp);
131-
132- ANS( $popupe->cmp() );
133-
134- $ans_e2 = Currency($changetip);
135- ANS($ans_e2->with(tolType=>'absolute', tolerance=>'0.04')->cmp);
136-
137- #################################################
138- # Solution
139-
140- Context()->texStrings;
141- BEGIN_SOLUTION
142- $BR
143- (a) \(r = $ans_a\)
144- $BR
145- (b) ${BBOLD}\{ $popupb->correct_ans() \}${EBOLD}, $explainb
146- $BR
147- (c) \(\hat{y} = $ans_c \)
148- $BR
149- (d) The predicted amount is \($b0 + $b1($bill) = $tip\) \(\longrightarrow\) \($ans_d\).
150- $BR
151- (e) The tip should ${BBOLD}\{ $popupe->correct_ans() \}$EBOLD by \($ans_e2\).
152- END_SOLUTION
153-
154- ENDDOCUMENT(); # This should be the last executable line in the problem.
63+ $ans_d = Currency($tip)->with(tolType => 'absolute', tolerance => '0.04');
64+
65+ # ANS( $popupe->cmp() );
66+
67+ $ans_e2 =
68+ Currency($changetip)->with(tolType => 'absolute', tolerance => '0.04');
69+
70+ BEGIN_PGML
71+ The amounts of 6 restaurant bills and the corresponding amounts of the tips are given in the below. Assume that bill amount is the explanatory variable and tip amount the response variable.
72+
73+ [#
74+ [. **Bill** .] [. [$sab[0]] .][. [$sab[1]] .][. [$sab[2]] .]
75+ [. [$sab[3]] .][. [$sab[4]] .][. [$sab[5]] .]*
76+ [. **Tip** .] [. [$sat[0]] .][. [$sat[1]] .][. [$sat[2]] .]
77+ [. [$sat[3]] .][. [$sat[4]] .][. [$sat[5]] .]*
78+ #]{align => 'r|rrrrrr', padding => [0.5,0.5]}
79+
80+ a) Compute the correlation: [` r \approx`] [_]{$r}{15}
81+
82+ b) Does there appear to be a significant correlation? [_]{$popupb}
83+
84+ c) The regression equation is [`\hat{y}=`] [_]{$line}{15}.
85+
86+ _For parts (d) and (e): Enter your answer in dollars: $xx.xx_
87+
88+ d) If the amount of the bill is $[`[$bill],`] the best prediction for the amount of the tip is [_]{$ans_d->cmp}{10}.
89+
90+ e) According to the regression equation, for every $[`[$increase] `] increase in the bill, the tip should [_]{$popupe} by [_]{$ans_e2->cmp}{10}.
91+ END_PGML
92+
93+ BEGIN_PGML_SOLUTION
94+
95+ a) [`r \approx [$r]`]
96+
97+ b) [$explainb]*
98+
99+ c) [`\hat{y} = [$line] `]
100+
101+ d) The predicted amount is [`[$b] + [$m]([$bill]) \approx [$ans_d]`]
102+
103+ e) The tip should increase by [$ans_e2]
104+ END_PGML_SOLUTION
105+
106+ ENDDOCUMENT(); # This should be the last executable line in the problem.
0 commit comments