HTML tutorial
CSS3 tutorial
Bootstrap tutorial
JavaScript tutorial
JQuery tutorial
AngularJS tutorial
React tutorial
NodeJS tutorial
PHP tutorial
Python tutorial
Python3 tutorial
Django tutorial
Linux tutorial
Docker tutorial
Ruby tutorial
Java tutorial
C tutorial
C ++ tutorial
Perl tutorial
JSP tutorial
Lua tutorial
Scala tutorial
Go tutorial
ASP.NET tutorial
C # tutorial
A Regression is a method to determine the relationship between one variable (y)and other variables (x)."
This is the scatter plot (from the previous chapter):
var xArray = [50,60,70,80,90,100,110,120,130,140,150]; var yArray = [7,8,8,9,9,9,10,11,14,14,15]; var data = [{x:xArray, y:yArray, mode:"markers"}]; var layout = { xaxis: {range: [40, 160], title: "Square Meters"}, yaxis: {range: [5, 16], title: "Price in Millions"}, title: "House Prices vs. Size" }; Plotly.newPlot("myPlot1", data, layout);
var xArray = [50,60,70,80,90,100,110,120,130,140,150];
var yArray = [7,8,8,9,9,9,10,11,14,14,15];
// Define Data
var data = [{
x:xArray,
y:yArray,
mode: "markers"
}];
// Define Layout
var layout = {
xaxis: {range: [40, 160], title: "Square Meters"},
yaxis: {range: [5, 16], title: "Price in Millions"},
title: "House Prices vs. Size"
};
Plotly.newPlot("myPlot", data, layout);
From the scattered data above, how can we predict future prices?
This is a linear graph predicting prices based on the lowest and the highest price:
var xArray = [50,60,70,80,90,100,110,120,130,140,150]; var yArray = [7,8,8,9,9,9,10,11,14,14,15]; var data = [ {x:xArray, y:yArray, mode:"markers"},{x:[50,150], y:[7,15], mode:"line"}]; var layout = { xaxis: {range: [40, 160], title: "Square Meters"}, yaxis: {range: [5, 16], title: "Price in Millions"}, title: "House Prices vs. Size" }; Plotly.newPlot("myPlot2", data, layout);
var xArray = [50,60,70,80,90,100,110,120,130,140,150];
var yArray = [7,8,8,9,9,9,9,10,11,14,14,15];
var data = [
{x:xArray, y:yArray, mode:"markers"},
{x:[50,150], y:[7,15], mode:"line"}
];
var layout = {
xaxis: {range: [40, 160], title: "Square Meters"},
yaxis: {range: [5, 16], title: "Price in Millions"},
title: "House Prices vs. Size"
};
Plotly.newPlot("myPlot", data, layout);
A linear graph can be written as y = ax + b
Where:
This Model predicts prices using a linear relationship between price and size:
var xArray = [50,60,70,80,90,100,110,120,130,140,150]; var yArray = [7,8,8,9,9,9,10,11,14,14,15]; // Calculate Slope var xSum = xArray.reduce(function(a, b){return a + b;}, 0); var ySum = yArray.reduce(function(a, b){return a + b;}, 0); var slope = ySum / xSum; // Generate values var xValues = []; var yValues = []; for (var x = 50; x <= 150; x += 1) { yValues.push(x * slope); xValues.push(x); } var data = [ {x:xArray, y:yArray, mode:"markers"}, {x:xValues, y:yValues, mode:"line"} ]; var layout = { xaxis: {range: [40, 160], title: "Square Meters"}, yaxis: {range: [5, 16], title: "Price in Millions"}, title: "House Prices vs. Size" }; Plotly.newPlot("myPlot3", data, layout);
var xArray = [50,60,70,80,90,100,110,120,130,140,150];
var yArray = [7,8,8,9,9,9,10,11,14,14,15];
// Calculate Slope
var xSum = xArray.reduce(function(a, b){return a + b;}, 0);
var ySum = yArray.reduce(function(a, b){return a + b;}, 0);
var slope = ySum / xSum;
// Generate values
var xValues = [];
var yValues = [];
for (var x = 50; x <= 150; x += 1) {
xValues.push(x);
yValues.push(x * slope);
}
In the example above, the slope is a calculated average and the intercept = 0.
This Model predicts prices using a linear regression function:
var xArray = [50,60,70,80,90,100,110,120,130,140,150]; var yArray = [7,8,8,9,9,9,10,11,14,14,15]; // calculate Sums var xSum=0, ySum=0, xySum=0, xxSum=0; var count = xArray.length; for (var i = 0, len = count; i < count; i++) { xSum += xArray[i]; ySum += yArray[i]; xxSum += xArray[i] * xArray[i]; xySum += xArray[i] * yArray[i]; } // calculate slope and intercept var slope = (count * xySum - xSum * ySum) / (count * xxSum - xSum * xSum); var intercept = (ySum / count) - (slope * xSum) / count; // Generate values var xValues = []; var yValues = []; for (var x = 50; x <= 150; x += 1) { xValues.push(x); yValues.push(x * slope + intercept); } var data = [ {x:xArray, y:yArray, mode:"markers"}, {x:xValues, y:yValues, mode:"line"} ]; var layout = { xaxis: {range: [40, 160], title: "Square Meters"}, yaxis: {range: [5, 16], title: "Price in Millions"}, title: "House Prices vs. Size" }; Plotly.newPlot("myPlot4", data, layout);
var xArray = [50,60,70,80,90,100,110,120,130,140,150];
var yArray = [7,8,8,9,9,9,10,11,14,14,15];
// Calculate Sums
var xSum=0, ySum=0 , xxSum=0, xySum=0;
var count = xArray.length;
for (var i = 0, len = count; i < count; i++) {
xSum += xArray[i];
ySum += yArray[i];
xxSum += xArray[i] * xArray[i];
xySum += xArray[i] * yArray[i];
}
// Calculate slope and intercept
var slope = (count * xySum - xSum * ySum) / (count * xxSum - xSum * xSum);
var intercept = (ySum / count) - (slope * xSum) / count;
// Generate values
var xValues = [];
var yValues = [];
for (var x = 50; x <= 150; x += 1) {
xValues.push(x);
yValues.push(x * slope + intercept);
}
If scattered data points do not fit a linear regression (a straight line through the points), the data may fit an polynomial regression.
A Polynomial Regression, like linear regression,
uses the relationship between the variables x and y to find the best way to draw a line through the data points.