Spaces:
Runtime error
Runtime error
| # src: https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e | |
| class LinearRegressionGradient: | |
| def __init__(self, theta=None): | |
| self.theta = theta | |
| self.loss_ = float("inf") | |
| def hypothesis(self, x): | |
| return self.theta[0] + self.theta[1] * x | |
| def loss(self, X, y): | |
| m = len(X) | |
| return sum([(X[i] - y[i]) ** 2 for i in range(m)]) / (2 * m) | |
| def gradientDescent(self, X, y, theta, num_iter=3000, alpha=0.01): | |
| m = len(X) | |
| for j in range(num_iter): | |
| # predict | |
| h = list(map(self.hypothesis, X)) | |
| # compute slope, aka derivative with current params (theta) | |
| deri_th0 = sum([(h[i] - y[i]) for i in range(m)]) / m | |
| deri_th1 = sum([(h[i] - y[i]) * X[i] for i in range(m)]) / m | |
| # update parameters (moving against the gradient 'derivative') | |
| theta[0] = theta[0] - alpha * deri_th0 | |
| theta[1] = theta[1] - alpha * deri_th1 | |
| # report | |
| if j % 200 == 0: | |
| self.loss_ = self.loss(X, y) | |
| msg = f"loss: {self.loss_}" | |
| print(msg) | |
| def app(): | |
| import streamlit as st | |
| def header(): | |
| st.subheader("Linear Regression using Gradient Descent") | |
| desc = """> Plain Python (vanilla version) i.e. without importing any library""" | |
| st.markdown(desc) | |
| header() | |
| st1, st2 = st.columns(2) | |
| with st1: | |
| code_math() | |
| with st2: | |
| interactive_run() | |
| st.markdown( | |
| f"> source [notebook](https://gist.github.com/iamaziz/ea5863beaee090937fd6828e88653f5e)." | |
| ) | |
| def code_math(): | |
| import inspect | |
| import streamlit as st | |
| tex = st.latex | |
| write = st.write | |
| mark = st.write | |
| codify = lambda func: st.code(inspect.getsource(func), language="python") | |
| cls = LinearRegressionGradient(theta=[0, 0]) | |
| write("The class") | |
| codify(cls.__init__) | |
| write("the Hypothesis") | |
| tex(r"""h_\theta(x) = \theta_0 + \theta_1x""") | |
| codify(cls.hypothesis) | |
| mark('The Loss/Objective/Cost function "_minimize_"') | |
| tex(r"""J(\theta_0, \theta_1) = \frac{1}{2m}\sum(h_\theta(x^{(i)}) - y^{(i)})^2""") | |
| codify(cls.loss) | |
| write("The Gradient Descent algorithm") | |
| mark("> repeat until converge {") | |
| tex( | |
| r"""\theta_0 = \theta_0 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)} )""" | |
| ) | |
| tex( | |
| r"""\theta_1 = \theta_1 - \alpha \frac{1}{m} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)}) x^{(i)})""" | |
| ) | |
| mark("> }") | |
| codify(cls.gradientDescent) | |
| def interactive_run(): | |
| import streamlit as st | |
| import numpy as np | |
| mark = st.markdown | |
| tex = st.latex | |
| def random_data(n=10): | |
| def sample_linear_regression_dataset(n): | |
| # src: https://www.gaussianwaves.com/2020/01/generating-simulated-dataset-for-regression-problems-sklearn-make_regression/ | |
| import numpy as np | |
| from sklearn import datasets | |
| import matplotlib.pyplot as plt # for plotting | |
| x, y, coef = datasets.make_regression( | |
| n_samples=n, # number of samples | |
| n_features=1, # number of features | |
| n_informative=1, # number of useful features | |
| noise=40, # bias and standard deviation of the guassian noise | |
| coef=True, # true coefficient used to generated the data | |
| random_state=0, | |
| ) # set for same data points for each run | |
| # Scale feature x (years of experience) to range 0..20 | |
| # x = np.interp(x, (x.min(), x.max()), (0, 20)) | |
| # Scale target y (salary) to range 20000..150000 | |
| # y = np.interp(y, (y.min(), y.max()), (20000, 150000)) | |
| plt.ion() # interactive plot on | |
| plt.plot(x, y, ".", label="training data") | |
| plt.xlabel("Years of experience") | |
| plt.ylabel("Salary $") | |
| plt.title("Experience Vs. Salary") | |
| # st.pyplot(plt.show()) | |
| # st.write(type(x.tolist())) | |
| # st.write(x.tolist()) | |
| X, y = x.reshape(x.shape[0],), y.reshape( | |
| y.shape[0], | |
| ) | |
| return np.around(X, 2), np.around(y, 2) | |
| # return [a[0] for a in x.tolist()], [a[0] for a in y.tolist()] | |
| # return [item for sublist in x.tolist() for item in sublist], [ | |
| # item for sublist in y for item in sublist | |
| # ] | |
| X_, y_ = sample_linear_regression_dataset(n) | |
| return X_, y_ | |
| # st.write(type(X_), type(y_)) | |
| # st.write(type(np.round(X, 2).tolist())) | |
| # st.write(X_) # , y_) | |
| # return X, y | |
| # return np.around(X, 2).tolist(), np.around(y, 2).tolist() | |
| X, y = random_data() | |
| theta = [0, 0] # initial values | |
| model = LinearRegressionGradient(theta) | |
| mark("# Example") | |
| n = st.slider("Number of samples", min_value=10, max_value=200, step=10) | |
| if st.button("generate new data and solve"): | |
| X, y = random_data(n=n) | |
| mark("_Input_") | |
| mark(f"_X_ = {X}") | |
| mark(f"_y_ = {y}") | |
| model.gradientDescent(X, y, theta) # run to optimize thetas | |
| mark("_Solution_") | |
| tex(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") # print solution | |
| tex(f"loss = {model.loss_}") | |
| mark("> How to run") | |
| mark( | |
| """ | |
| ```python | |
| X, y = random_data() | |
| theta = [0, 0] # initial values | |
| model = LinearRegressionGradient(theta) | |
| model.gradientDescent(X, y, theta) # run "i.e. optimize thetas" | |
| # print solution | |
| # print(f"y = {model.theta[0]:.1f} + {model.theta[1]:.1f} x") | |
| # print(f"loss = {model.loss_}") | |
| ``` | |
| """ | |
| ) | |
| # -- visualize | |
| import matplotlib.pyplot as plt | |
| fig, ax = plt.subplots() | |
| ax.scatter(X, y, label="Linear Relation") | |
| y_pred = theta[0] + theta[1] * np.array(X) | |
| ax.plot(X, y_pred) | |
| ax.grid(color="black", linestyle="--", linewidth=0.5, markevery=int) | |
| ax.legend(loc=2) | |
| # ax.axis("scaled") | |
| st.pyplot(fig) | |
| # st.line_chart(X, y) | |