randFromArray([-1, 1]) * randRange(10, 80) / 20 randRange(-5, 5) function(x) { return SLOPE * x + INTERCEPT } ((SLOPE < 0 ? -1 : 1) * 11 - INTERCEPT) / SLOPE ((SLOPE < 0 ? 1 : -1) * 11 - INTERCEPT) / SLOPE function(x, low, high) { return (x < low) ? low : (x > high) ? high : x; } random() + 0.5 (BOTTOM < -11) ? -11 : BOTTOM (TOP > 11) ? 11 : TOP sortNumbers((function() { var range = (RIGHT - LEFT) / 20; return _.map(shuffle(_.range(-8, 9), 9), function(x) { return x * range; }); })()) function(a) { // make sure the points are unique in a for (var i = 0; i < a.length; ++i) { for (var j = i + 1; j < a.length; ++j) { if (_.isEqual(a[i], a[j])) { a.splice(j, 1); --j; } } } return a; } atan2(SLOPE, 1) + PI / 2 [] UNIQARRAYS(_.map(XS, function(x, index) { // Calculate the sum of the offsets so far var total = _.reduce(TOTALOFFSET, function(sum, num) { return sum + num; }, 0); var randomRange = function(low, high) { return random() * (high - low) + low; } var offset; if (index < 2 || index > 6) { // make sure the first two and last // two points are on the same side offset = randomRange(OFFSET/2, OFFSET); } else if (total < 0) { // if the offset is negative, choose less // negative numbers offset = randomRange(-OFFSET - total, OFFSET); } else { // if it is positive, choose less //positive numbers offset = randomRange(-OFFSET, OFFSET - total); } // store this offset TOTALOFFSET.push(offset); // calculate the rounded point created by this offset return [BOUND(round(x + cos(ANG) * offset), -9, 9), BOUND(round(FUNC(x) + sin(ANG) * offset), -9, 9)]; })) (function() { // calculate the linear square regression line for our points var n = POINTS.length; var meanX = _.reduce(POINTS, function(sum, pt) { return sum + pt[0]; }, 0) / n; var meanX2 = pow(meanX, 2); var meanY = _.reduce(POINTS, function(sum, pt) { return sum + pt[1]; }, 0) / n; var meanY2 = pow(meanY, 2); var xi2 = _.reduce(POINTS, function(sum, pt) { return sum + pow(pt[0], 2); }, 0); var yi2 = _.reduce(POINTS, function(sum, pt) { return sum + pow(pt[1], 2); }, 0); var xiyi = _.reduce(POINTS, function(sum, pt) { return sum + pt[0] * pt[1]; }, 0); var error = xiyi - n * meanX * meanY; var realIntercept = (meanY * xi2 - meanX * xiyi) / (xi2 - n * meanX2); var realSlope = error / (xi2 - n * meanX2); // Sample standard deviations var correction = n / (n - 1); var stdevX = correction * pow(xi2 / n - meanX2, 0.5); var stdevY = correction * pow(yi2 / n - meanY2, 0.5); var p = _.map(POINTS, function(pt) {return pt[0]; }); var r2 = error / ((n - 1) * stdevX * stdevY); return [realSlope, realIntercept, r2]; })() function(slope, intercept) { var low = _.first(POINTS), high = _.last(POINTS); var slopeadd = 1 / REALSLOPE + REALSLOPE; lowx = (1 / REALSLOPE * low[0] + low[1] - REALINTERCEPT) / slopeadd; highx = (1 / REALSLOPE * high[0] + high[1] - REALINTERCEPT) / slopeadd; var lowfunc = function(x) { return -1 / REALSLOPE * (x - low[0]) + low[1]; }; var highfunc = function(x) { return -1 / REALSLOPE * (x - high[0]) + high[1]; }; lowIntersectx = (1 / REALSLOPE * low[0] + low[1] - intercept) / (slope + 1 / REALSLOPE); highIntersectx = (1 / REALSLOPE * high[0] + high[1] - intercept) / (slope + 1 / REALSLOPE); // the differences between the least squares line and the // given line, at the highest and lowest points var lowDiff = sqrt(pow(lowfunc(lowx) - lowfunc(lowIntersectx), 2) + pow(lowx - lowIntersectx, 2)); var highDiff = sqrt(pow(highfunc(highx) - highfunc(highIntersectx), 2) + pow(highx - highIntersectx, 2)); // whether or not each of the points are above or below // the given line var updown = _.map(POINTS, function(pt) { var x = pt[0], y = pt[1], est = slope * x + intercept; return y >= est ? 1 : -1; }); // sort and reverse-sort updown var updownSorted = sortNumbers(updown); var updownReversed = updownSorted.slice(0).reverse(); // ensure: // all the points are not up, ..., up, down, ..., down return !_.isEqual(updown, updownSorted) && // all the points are not down, ..., down, up, ..., up !_.isEqual(updown, updownReversed) && // one point is above/below _.include(updown, 1) && _.include(updown, -1) && // the differences are between some proportion // of the offset lowDiff < 1.3 * OFFSET && highDiff < 1.3 * OFFSET; }

In the graph below, can a line fit the data well?

If yes, fit a line to the data.

graphInit({ range: 11, scale: 20, axisArrows: "<->", tickStep: 1, labelStep: 1, gridOpacity: 0.05, axisOpacity: 0.2, tickOpacity: 0.4, labelOpacity: 0.5 }); addMouseLayer(); // add the points _.each(POINTS, function(pt) { circle(pt, 0.2, { fill: "black" }); }); // add our movable line graph.pointA = addMovablePoint({ coord: [-5, 5], snapX: 0.5, snapY: 0.5, normalStyle: { stroke: KhanUtil.BLUE, fill: KhanUtil.BLUE } }); graph.pointB = addMovablePoint({ coord: [5, 5], snapX: 0.5, snapY: 0.5, normalStyle: { stroke: KhanUtil.BLUE, fill: KhanUtil.BLUE } }); graph.bestFitLine = addMovableLineSegment({ pointA: graph.pointA, pointZ: graph.pointB, fixed: true, extendLine: true }); // A and B can't be in the same place graph.pointA.onMove = function(x, y) { return (x != graph.pointB.coord[0] || y != graph.pointB.coord[1]); }; graph.pointB.onMove = function(x, y) { return (x != graph.pointA.coord[0] || y != graph.pointA.coord[1]); }; graph.pointA.toFront(); graph.pointB.toFront(); var shown = false; graph.showLine = function() { graph.pointA.visibleShape.show(); graph.pointA.mouseTarget.show(); graph.pointB.visibleShape.show(); graph.pointB.mouseTarget.show(); graph.bestFitLine.show(); }; graph.hideLine = function() { graph.pointA.visibleShape.hide(); graph.pointA.mouseTarget.hide(); graph.pointB.visibleShape.hide(); graph.pointB.mouseTarget.hide(); graph.bestFitLine.hide(); }; // show the true least square regression line graph.showSolution = function() { if (shown) { return; } else { shown = true; } var roundToHalf = function(x) { return round(x * 2) / 2; }; var realFunc = function(x) { return REALSLOPE * x + REALINTERCEPT; }; $("html, body").animate({ scrollTop: $(".question").offset().top }, { duration: 500, easing: "swing", complete: function() { line([-11, realFunc(-11)], [11, realFunc(11)], { stroke: ORANGE, opacity: 0 }) .animate({ opacity: 1 }, 750); } }); };
[ KhanUtil.graphs.main.graph.pointA.coord, KhanUtil.graphs.main.graph.pointB.coord, $("input[name='linear']:checked").attr("id") ]
if (_.isEqual(guess, [[-5, 5], [5, 5], "exists"])) { return ""; } // Check that the right checkbox is checked if (guess[2] !== "exists") { return false; } var slope = (guess[1][1] - guess[0][1]) / (guess[1][0] - guess[0][0]); var intercept = slope * -guess[0][0] + guess[0][1]; // Validate the line return VALIDATOR(slope, intercept);
KhanUtil.graphs.main.graph.pointA.setCoord(guess[0]); KhanUtil.graphs.main.graph.pointB.setCoord(guess[1]); KhanUtil.graphs.main.graph.bestFitLine.transform(true);
$("#"+guess[2]).attr('checked', 'checked');

A line of fit is a line that approximates the data points.

There are three main criteria to use when finding a line of fit.

First, make sure that your line passes through the points, and does not lie completely above or below the points.

init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-5, -4], [5, -3], { stroke: BLUE }); label([0, 4], i18n._("Bad"), "center", false) .css("color", "red") .css("font-size", "20px");
init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-5, -2.5], [5, -1.5], { stroke: BLUE }); label([0, 4], i18n._("Okay"), "center", false) .css("color", "orange") .css("font-size", "20px");

Next, make sure that your line alternates between passing above and then below points, and doesn't simply go above some points and then below the rest.

init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-5, -2.5], [5, -1.5], { stroke: BLUE }); label([0, 4], i18n._("Okay"), "center", false) .css("color", "orange") .css("font-size", "20px");
init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-5, -3.5], [3, 5], { stroke: BLUE }); label([0, 4], i18n._("Better"), "center", false) .css("color", "#8EEB00") .css("font-size", "20px");

Finally, make sure that the line goes through the middle of all the points, so that it is close to all of the points.

init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-5, -3.5], [3, 5], { stroke: BLUE }); label([0, 4], i18n._("Better"), "center", false) .css("color", "#9FEE00") .css("font-size", "20px");
init({ range: [[-5, 5], [-5, 5]], scale: [20, 20] }); circle([-2, -3], 0.2, { fill: "black" }); circle([ 0, -1], 0.2, { fill: "black" }); circle([-1, 1], 0.2, { fill: "black" }); circle([ 1, 1], 0.2, { fill: "black" }); circle([ 0, 2], 0.2, { fill: "black" }); line([-4, -5], [3, 5], { stroke: BLUE }); label([0, 4], i18n._("Good"), "center", false) .css("color", "#00C322") .css("font-size", "20px");

There are several lines that satisfy this. Click here to show one of them.

0 0 11
[ KhanUtil.graphs.main.graph.pointA.coord, KhanUtil.graphs.main.graph.pointB.coord, $("input[name='nonlinear']:checked").attr("id") ]
if (_.isEqual(guess, [[-5, 5], [5, 5], "exists"])) { return ""; } return guess[2] === "notexists";
KhanUtil.graphs.main.graph.pointA.setCoord(guess[0]); KhanUtil.graphs.main.graph.pointB.setCoord(guess[1]); KhanUtil.graphs.main.graph.bestFitLine.transform(true);
$("#"+guess[2]).attr('checked', 'checked');

Does the data look like it follows a linear relationship?

The data is mostly random, so there is no line that fits the data well.

randRange(-3, 3) randRange(-25, 25) / 10 randRange(-30, 30) / 10 function(minX, maxX, slope) { var arr = []; var midX = (minX + maxX) / 2; var midY = randRange(2 * abs(slope) - 8, 8 - 2 * abs(slope)) var xs = []; while (minX < maxX) { xs.push(minX++); } xs = shuffle(xs); var n = randRange(4, 5); for (var i = 0; i < xs.length; i++) { var y = (xs[i] - midX) * slope + midY + 2 * random() - 1; if (abs(y) < 10) { arr.push([xs[i], round(y)]); if (arr.length === n) { break; } } } return arr; } GETPOINTS(-10, BREAK - 1, SLOPE1) GETPOINTS(BREAK + 1, 10, SLOPE2) [].concat(POINTS1).concat(POINTS2)
[ KhanUtil.graphs.main.graph.pointA.coord, KhanUtil.graphs.main.graph.pointB.coord, $("input[name='nonlinear']:checked").attr("id") ]
if (_.isEqual(guess, [[-5, 5], [5, 5], "exists"])) { return ""; } return guess[2] === "notexists";
KhanUtil.graphs.main.graph.pointA.setCoord(guess[0]); KhanUtil.graphs.main.graph.pointB.setCoord(guess[1]); KhanUtil.graphs.main.graph.bestFitLine.transform(true);
$("#"+guess[2]).attr('checked', 'checked');

Does the data look like it follows a linear relationship?

The data forms two separate linear trends, so there is no single line that fits the data well.

function() { var xs = shuffle([-10, -9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); var n = randRange(9, 12); var a = randRange(4, 10); var b = randRange(-10, 10) / 10; var c = randRange(-8, 8); if (c > 2) { a *= -1; } else if (c > -2) { a *= randFromArray([-1, 1]); } var arr = []; for (var i = 0; i < xs.length; i++) { var x = xs[i]; var y = round(x * x / a + b * x + c + 2 * random() - 1); if (abs(y) < 10) { arr.push([x, y]); if (arr.length === n) { break; } } } return arr; } GETPOINTS() [].concat(POINTS1)
[ KhanUtil.graphs.main.graph.pointA.coord, KhanUtil.graphs.main.graph.pointB.coord, $("input[name='nonlinear']:checked").attr("id") ]
if (_.isEqual(guess, [[-5, 5], [5, 5], "exists"])) { return ""; } return guess[2] === "notexists";
KhanUtil.graphs.main.graph.pointA.setCoord(guess[0]); KhanUtil.graphs.main.graph.pointB.setCoord(guess[1]); KhanUtil.graphs.main.graph.bestFitLine.transform(true);
$("#"+guess[2]).attr('checked', 'checked');

Does the data look like it follows a linear relationship?

The data does not form a linear trend, so there is no single line that fits the data well.