Create your own scorer¶
Create the scorer¶
Step 1: Create a new directory for your scorer¶
- Go in the
score-engine/src/scorers
directory - Create a new directory for your scorer
mkdir myscorer
Step 2: Create a new scorer¶
Create a new scorer file __init__.py
:
# -*- coding: utf-8 -*-
from .. import BaseScorer
import pandas as pd
class Scorer(BaseScorer):
def __init__(self):
super().__init__()
def score(self, data_submission):
df_submission = pd.read_csv(data_submission)
score = # Score processing
return score
Re-Deploy the project¶
Go in the deployment/simple
directory:
docker-compose down
docker-compose build
docker-compose up -d
Use the new scorer in your competition¶
- Go to http://localhost:3000
- Open the competition
- Select Edit info on the sidebar
- Write
scorers.myscorer.Scorer
in Scorer Class - Click on Update
Example 1: Scorer of MDSF 2016¶
Here is the scorer of the competition “Le Meilleur Data Scientist de France 2016”.
We use a MAPE metric:
# -*- coding: utf-8 -*-
from .. import BaseScorer
import pandas as pd
import numpy as np
# Mean Absolute Percentage Error
def mape_error(y_true, y_pred):
return np.mean(np.abs((y_true - y_pred) / y_true))[0]
class Scorer(BaseScorer):
def __init__(self):
super().__init__()
def score(self, data_submission):
df_submission = pd.read_csv(
data_submission,
sep=';',
decimal='.',
index_col=0,
header=0,
names=['id', 'price'],
)
submission_columns_count = df_submission.shape[1]
if submission_columns_count != 1:
raise Exception('Submission has {} columns and should have 1 columns with ";" separator'.format(
submission_columns_count
))
df_reference = pd.read_csv(
'scorers/mdsf2016/y_test.csv',
sep=';',
decimal='.',
index_col=0,
header=0,
names=['id', 'price'],
)
reference_rows_count = df_reference.shape[0]
submission_rows_count = df_submission.shape[0]
if submission_rows_count != reference_rows_count:
raise Exception('Submission has {} rows and should have {} rows'.format(
submission_rows_count, reference_rows_count)
)
df_reference.sort_index(inplace=True)
df_submission.sort_index(inplace=True)
score = mape_error(df_reference, df_submission)
return score
Example 2: Scorer of MDSF 2018¶
Here is the scorer of the competition “Le Meilleur Data Scientist de France 2018”.
We use a Logloss metric:
# -*- coding: utf-8 -*-
from .. import BaseScorer
from sklearn.metrics import log_loss
import pandas as pd
class Scorer(BaseScorer):
def __init__(self):
super().__init__()
def score(self, data_submission):
df_submission = pd.read_csv(
data_submission,
sep=',',
decimal='.',
header=0,
names=['id', 'cl1', 'cl2', 'cl3'],
index_col=0,
)
submission_columns_count = df_submission.shape[1]
if submission_columns_count != 3:
raise Exception('Submission has {} columns and should have 3 columns with comma separator'.format(
submission_columns_count
))
df_reference = pd.read_csv(
'scorers/mdsf2018/y_test.csv',
sep=',',
decimal='.',
index_col=0,
header=0,
names=['id', 'delai_vente'],
)
reference_rows_count = df_reference.shape[0]
submission_rows_count = df_submission.shape[0]
if submission_rows_count != reference_rows_count:
raise Exception('Submission has {} rows and should have {} rows'.format(
submission_rows_count, reference_rows_count)
)
df_reference.sort_index(inplace=True)
df_submission.sort_index(inplace=True)
score = log_loss(df_reference, df_submission)
return score