Work in Progress
This page is currently under construction.
comparison_viewer_dashboard
¶
At a glance
Useful for:
API Documentation: comparison_viewer_dashboard()
What is needed to generate the chart?
Worked Example¶
from splink.duckdb.linker import DuckDBLinker
import splink.duckdb.comparison_library as cl
import splink.duckdb.comparison_template_library as ctl
from splink.duckdb.blocking_rule_library import block_on
from splink.datasets import splink_datasets
import logging, sys
logging.disable(sys.maxsize)
df = splink_datasets.fake_1000
settings = {
"link_type": "dedupe_only",
"blocking_rules_to_generate_predictions": [
block_on("first_name"),
block_on("surname"),
],
"comparisons": [
ctl.name_comparison("first_name"),
ctl.name_comparison("surname"),
ctl.date_comparison("dob", cast_strings_to_date=True),
cl.exact_match("city", term_frequency_adjustments=True),
ctl.email_comparison("email", include_username_fuzzy_level=False),
],
"retain_intermediate_calculation_columns": True,
"retain_matching_columns":True,
}
linker = DuckDBLinker(df, settings)
linker.estimate_u_using_random_sampling(max_pairs=1e6)
blocking_rule_for_training = block_on(["first_name", "surname"])
linker.estimate_parameters_using_expectation_maximisation(blocking_rule_for_training)
blocking_rule_for_training = block_on("dob")
linker.estimate_parameters_using_expectation_maximisation(blocking_rule_for_training)
df_predictions = linker.predict(threshold_match_probability=0.2)
linker.comparison_viewer_dashboard(df_predictions, "img/scv.html", overwrite=True)
# You can view the scv.html file in your browser, or inline in a notbook as follows
from IPython.display import IFrame
IFrame(
src="./img/scv.html", width="100%", height=1200
)
FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))