In [1]:
from ipywidgets import interact, interactive, fixed, interact_manual, interactive_output
import ipywidgets as widgets

import numpy as np
import matplotlib.pyplot as plt
import random as rand
from sklearn import datasets, linear_model
import copy as cp

import numpy.random as randn
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.pipeline import Pipeline
from sklearn import preprocessing

In [2]:
def generate_dataLasso(m):
    X = np.zeros((100,m)) ## creation de la matrice à n exemple de dimension m
    Y = np.zeros(100) ## creation de la matrice à n exemple de dimension l
    eps = randn.normal(0.0,0.05,100) ## vecteur des bruits
    
    for i in range(0, 100):
        X[i,:]=randn.uniform(0.0,2.0,m) # m nombre aléatoire entre 0 et 1
        Y[i]= X[i,2] - 2.0*X[i,4] + 1.5*X[i,m-2]  +eps[i]
    scaler = preprocessing.StandardScaler().fit(X)
    return X,Y

def running_mean(x, N): ## Moyenne glissante
    out = np.zeros_like(x, dtype=np.float64)
    dim_len = len(x)
    for i in range(dim_len):
        if N%2 == 0:
            a, b = i - (N-1)//2, i + (N-1)//2 + 2
        else:
            a, b = i - (N-1)//2, i + (N-1)//2 + 1
        #cap indices to min and max indices
        a = max(0, a)
        b = min(dim_len, b)
        out[i] = np.mean(x[a:b])
    return out

In [13]:
def Ridge_Lasso_error(lam,X,Y,m):
    Y_test_ref = np.zeros(50); Xnew = np.zeros((50,m)) 
    for i in range(0, 50):
        Xnew[i,:]=randn.uniform(0.0,2.0,m)   
        Y_test_ref[i]= Xnew[i,2] - 2.0*Xnew[i,4] + 1.5*Xnew[i,m-2]

    regR = Ridge(alpha=lam,solver="svd",tol=0.0000001).fit(X,Y)
    if lam>0.0:
        regL = Lasso(alpha=lam,tol=0.000001).fit(X,Y)
    else:
        regL = Ridge(alpha=0.0,solver="svd",tol=0.000001).fit(X,Y)
        
    
    ErrorR = np.mean((Y_test_ref - regR.predict(Xnew))**2)
    normp = np.linalg.norm(regR.coef_,2)
    ErrorL = np.mean((Y_test_ref - regL.predict(Xnew))**2)
    coefs_nuls=0
    for i in range(0,len(regL.coef_)):
        if abs(regL.coef_[i])<0.000000001:
            coefs_nuls+=1
    if abs(regL.intercept_)<0.000000001:
        coefs_nuls+=1
 
    return ErrorR,normp,ErrorL,coefs_nuls

In [17]:
def plot_overfitting_lasso(sigma,dim): ## affiche les erreurs pour une regréssion classique
    lamb_list = np.linspace(0.0,0.05,100)
    E=[]; N=[]; E2=[]; C=[]
    X,Y= generate_dataLasso(dim)
    for il in range(0,len(lamb_list)):
        ErrorR,normp,ErrorL,coefnuls = Ridge_Lasso_error(lamb_list[il],X,Y,dim)
        E.append(ErrorR); N.append(normp)
        E2.append(ErrorL); C.append(coefnuls/(dim+1))
        
    
    fig, axes = plt.subplots(2, 2, figsize=(18, 8))
    axes[0,0].plot(lamb_list,E,'o-',alpha=0.8,color='#0571b0',linewidth=3)
    axes[0,0].plot(lamb_list,running_mean(E,10),color='#ca0020',alpha=0.8,linewidth=3)
    axes[0,0].title.set_text('Erreur Ridge généralisation')
    
    axes[0,1].plot(lamb_list,N,'o-',alpha=0.8,color='#0571b0',linewidth=3)
    axes[0,1].title.set_text('Norme des poids')
    
    axes[1,0].plot(lamb_list,E2,'o-',alpha=0.8,color='#0571b0',linewidth=3)
    axes[1,0].plot(lamb_list,running_mean(E2,10),color='#ca0020',alpha=0.8,linewidth=3)
    axes[1,0].title.set_text('Erreur Lasso généralisation')
    
    axes[1,1].plot(lamb_list,C,'o-',alpha=0.8,color='#0571b0',linewidth=3)
    axes[1,1].title.set_text('Porportion de poids non nuls')
    plt.show()

In [18]:
sigma = widgets.FloatSlider(value=0.01,min=0.01,max=0.05,step=0.01,description="sigma")
dim = widgets.IntSlider(value=50,min=20,max=400,step=5,description="dim")

ui = widgets.HBox([sigma,dim])

out = widgets.interactive_output(plot_overfitting_lasso, {'sigma':sigma,'dim':dim})

In [19]:
display(ui,out)

HBox(children=(FloatSlider(value=0.01, description='sigma', max=0.05, min=0.01, step=0.01), IntSlider(value=50…

Output()