Skip to content

VVP

Functions

cosine_similarity(list1, list2)

Calculates cosine similarity.

Source code in VVP/vvp.py
def cosine_similarity(list1, list2):
    """Calculates cosine similarity."""
    if list1 is None or list2 is None:
        return 0
    assert not (np.isnan(list2).any() or np.isinf(list2).any())
    assert not (np.isnan(list1).any() or np.isinf(list1).any())
    sim = 1 - scipy.spatial.distance.cosine(list1, list2)
    if np.isnan(sim):
        # the similarity is nan if no term in the document is in the vocabulary
        return 0
    return sim

ensemble_vvp(results_dirs, sample_testing_function, aggregation_function, **kwargs)

Goes through all the output directories and calculates the scores.

  • results_dirs: list of result dirs to analyse.
  • sample_testing_function: analysis/validation/verification function to be performed on each subdirectory of the results_dirs.
  • aggregation_function: function to combine all results
  • **kwargs : custom parameters. The items parameter can be used to give explicit ordering of the various subdirectories.

Returns:

Type Description
dict

ensemble_vvp_results

Source code in VVP/vvp.py
def ensemble_vvp(results_dirs, sample_testing_function,
                 aggregation_function, **kwargs):
    """
    Goes through all the output directories and calculates the scores.
    Arguments:
    - `results_dirs`: list of result dirs to analyse.
    - `sample_testing_function`: analysis/validation/verification function to
        be performed on each subdirectory of the results_dirs.
    - `aggregation_function`: function to combine all results
    - `**kwargs` : custom parameters. The `items` parameter can be used to
        give explicit ordering of the various subdirectories.

    Returns:
        dict: ensemble_vvp_results

    Authors: Derek Groen, Wouter Edeling, and Hamid Arabnejad
    """

    ensemble_vvp_results = {}

    # if a single result_dir is specified, still add it to a list
    if type(results_dirs) == str:
        tmp = []
        tmp.append(results_dirs)
        results_dirs = tmp

    for results_dir in results_dirs:

        scores = []

        # use user-specified sample directories if specified,
        # otherwise look for uq results in all directories in results_dir
        if 'items' in kwargs:
            items = kwargs['items']
        else:
            items = os.listdir("{}".format(results_dir))

        for item in items:
            if os.path.isdir(os.path.join(results_dir, item)):
                print(os.path.join(results_dir, item))
                scores.append(sample_testing_function(
                    os.path.join(results_dir, item), **kwargs))

        scores_aggregation = aggregation_function(scores, **kwargs)

        # update return results dict
        ensemble_vvp_results.update({results_dir: {}})

        ensemble_vvp_results[results_dir].update({
            'scores': scores,
            'scores_aggregation': scores_aggregation
        })

    return ensemble_vvp_results

ensemble_vvp_LoR(results_dirs_PATH, load_QoIs_function, aggregation_function, **kwargs)

  • results_dirs_PATH: list of result dirs, one directory for each resolution and each one containing the same QoIs stored to disk
  • load_QoIs_function: a function which loads the QoIs from each subdirectory of the results_dirs_PATH.
  • aggregation_function: function to combine all results
  • **kwargs: custom parameters. The 'items' parameter must be used to give explicit ordering of the various subdirectories, indicating the order of the refinement.

Returns:

Type Description
dic

returns the results score in dic format.

The scores dict has this structure

result_dir_1_name:
    order: <polynomial_order>
    runs: <num_runs>
    value:
        vary_param_1: {<sobol_func_name>:<value>}
        ...
        vary_param_X: {<sobol_func_name>:<value>}
...
result_dir_N_name:
      order: <polynomial_order>
      runs: <num_runs>
      value:
            vary_param_1: {<sobol_func_name>:<value>}
            ...
            vary_param_X: {<sobol_func_name>:<value>}

Tip

  • to see how input functions should be defined for your app, please look at the implemented examples in [FabFlee repo] (https://github.com/djgroen/FabFlee/blob/master/VVP/flee_vvp.py#L752)

Author: Hamid Arabnejad

Source code in VVP/vvp.py
def ensemble_vvp_LoR(results_dirs_PATH, load_QoIs_function,
                     aggregation_function,
                     **kwargs):
    """

    Arguments:
    ----------
    - `results_dirs_PATH`:
            list of result dirs, one directory for each resolution and
            each one containing the same QoIs stored to disk
    - `load_QoIs_function`:
            a function which loads the QoIs from each subdirectory of
            the results_dirs_PATH.
    - `aggregation_function`:
            function to combine all results
    - `**kwargs`:
            custom parameters. The 'items' parameter must be used to give
            explicit ordering of the various subdirectories, indicating
            the order of the refinement.

    Returns:
        dic: returns the results score in `dic` format.

        The scores dict has this structure
        ```python
        result_dir_1_name:
            order: <polynomial_order>
            runs: <num_runs>
            value:
                vary_param_1: {<sobol_func_name>:<value>}
                ...
                vary_param_X: {<sobol_func_name>:<value>}
        ...
        result_dir_N_name:
              order: <polynomial_order>
              runs: <num_runs>
              value:
                    vary_param_1: {<sobol_func_name>:<value>}
                    ...
                    vary_param_X: {<sobol_func_name>:<value>}
        ```

    !!! tip
        - to see how  input functions should be defined for your app, please
        look at the implemented examples in [FabFlee repo]
        (https://github.com/djgroen/FabFlee/blob/master/VVP/flee_vvp.py#L752)

    Author: Hamid Arabnejad
    """

    results_dirs = [dirname for dirname in os.listdir(results_dirs_PATH)
                    if os.path.isdir(os.path.join(results_dirs_PATH, dirname))]
    if len(results_dirs) == 0:
        raise ValueError('\nThere is not subdirectories in the passed '
                         'results_dirs_PATH arguments.'
                         '\nresults_dirs_PATH = %s' % (results_dirs_PATH))

    #########################################################
    # the scores dict has this structure:                   #
    # result_dir_1_name:                                    #
    #       order: <polynomial_order>                       #
    #       runs: <num_runs>                                #
    #       value:                                          #
    #             vary_param_1: {<sobol_func_name>:<value>} #
    #             ...                                       #
    #             vary_param_X: {<sobol_func_name>:<value>} #
    # ...                                                   #
    # result_dir_N_name:                                    #
    #       order: <polynomial_order>                       #
    #       runs: <num_runs>                                #
    #       value:                                          #
    #             vary_param_1: {<sobol_func_name>:<value>} #
    #             ...                                       #
    #             vary_param_X: {<sobol_func_name>:<value>} #
    #########################################################
    scores = {}
    for result_dir in results_dirs:
        value, order, num_runs = load_QoIs_function(
            os.path.join(results_dirs_PATH, result_dir))
        scores.update({
            result_dir: {
                'value': value,
                'runs': num_runs,
                'order': order
            }
        })

    #################################################################
    # sort scores dict based on order value in ascending            #
    # i.e., the last key in scores will have the higher order value #
    # to have Descending order, set reverse=True                    #
    #################################################################
    scores = dict(OrderedDict(sorted(scores.items(),
                                     key=lambda x: x[1]['order'],
                                     reverse=False)
                              ))
    ###########################################################
    # call aggregation_function to compares the sobol indices #
    ###########################################################
    aggregation_function(scores, **kwargs)

ensemble_vvp_QoI(simulation_result_QoI, uncertainty_result_QoI, QoI_name)

The current supported similarity measure are:

  • Jensen-Shannon divergence
  • Renyi divergence
  • Cosine distance
  • Euclidean distance
  • Kullback-Leibler divergence

Parameters:

Name Type Description Default
simulation_result_QoI

Experimental QoI data.

required
uncertainty_result_QoI

uncertainty QoI results

required
QoI_name

the name of QoI

required

Returns:

Type Description
dic

returns a dictionary with the following structure:

{
    "similarity measure function name":
    {
        "QoI_name" : [similarity_measure_function_result]
    }
}

Source code in VVP/vvp.py
def ensemble_vvp_QoI(simulation_result_QoI,
                     uncertainty_result_QoI,
                     QoI_name
                     ):
    """

    The current supported similarity measure are:

    * Jensen-Shannon divergence
    * Renyi divergence
    * Cosine distance
    * Euclidean distance
    * Kullback-Leibler divergence

    Args:
        simulation_result_QoI : Experimental QoI data.
        uncertainty_result_QoI : uncertainty QoI results
        QoI_name : the name of QoI


    Returns:
        dic : returns a dictionary with the following structure:
        ```json
        {
            "similarity measure function name":
            {
                "QoI_name" : [similarity_measure_function_result]
            }
        }
        ```



    Author: Hamid Arabnejad
    """
    simulation_result_QoI = np.array(simulation_result_QoI)
    uncertainty_result_QoI = np.array(uncertainty_result_QoI)

    if simulation_result_QoI.shape != uncertainty_result_QoI.shape:
        raise RuntimeError("The dimension of two input array are not equal !")

    if simulation_result_QoI.ndim == 1:
        simulation_result_QoI = np.array([simulation_result_QoI])
    if uncertainty_result_QoI.ndim == 1:
        uncertainty_result_QoI = np.array([uncertainty_result_QoI])

    similarity_measure_results = {}

    for simulation_result, uncertainty_result in zip(simulation_result_QoI,
                                                     uncertainty_result_QoI
                                                     ):
        # calculate Jensen-Shannon Divergence
        measure_name = "Jensen-Shannon Divergence"
        if measure_name not in similarity_measure_results:
            similarity_measure_results.update({measure_name: {}})
            similarity_measure_results[measure_name].update({QoI_name: []})

        res = jensen_shannon_divergence(simulation_result, uncertainty_result)
        similarity_measure_results[measure_name][QoI_name].append(res)

        # calculate Cosine Similarity
        measure_name = "Cosine similarity"
        if measure_name not in similarity_measure_results:
            similarity_measure_results.update({measure_name: {}})
            similarity_measure_results[measure_name].update({QoI_name: []})
        res = cosine_similarity(simulation_result, uncertainty_result)
        similarity_measure_results[measure_name][QoI_name].append(res)

        # calculate KL divergence
        measure_name = "KL divergence"
        if measure_name not in similarity_measure_results:
            similarity_measure_results.update({measure_name: {}})
            similarity_measure_results[measure_name].update({QoI_name: []})
        res = kl_divergence(simulation_result, uncertainty_result)
        similarity_measure_results[measure_name][QoI_name].append(res)

        # calculate Renyi Divergence
        measure_name = "Renyi Divergence"
        if measure_name not in similarity_measure_results:
            similarity_measure_results.update({measure_name: {}})
            similarity_measure_results[measure_name].update({QoI_name: []})
        res = renyi_divergence(simulation_result, uncertainty_result)
        similarity_measure_results[measure_name][QoI_name].append(res)

        # calculate Euclidean Distance
        measure_name = "Euclidean Distance"
        if measure_name not in similarity_measure_results:
            similarity_measure_results.update({measure_name: {}})
            similarity_measure_results[measure_name].update({QoI_name: []})
        res = euclidean_distance(simulation_result, uncertainty_result)
        similarity_measure_results[measure_name][QoI_name].append(res)

    return similarity_measure_results

euclidean_distance(list1, list2)

Calculates Euclidean distance.

Source code in VVP/vvp.py
def euclidean_distance(list1, list2):
    """Calculates Euclidean distance."""
    sim = np.sqrt(
        np.sum([np.power(p - q, 2) for (p, q) in zip(list1, list2)])
    )
    return sim

jensen_shannon_divergence(list1, list2)

Calculates Jenson-Shannon Distance

Source code in VVP/vvp.py
def jensen_shannon_divergence(list1, list2):
    """Calculates Jenson-Shannon Distance """

    # convert the vectors into numpy arrays in case that they aren't
    list1 = np.array(list1)
    list2 = np.array(list2)
    # calculate average
    avg_lists = (list1 + list2) / 2
    # compute Jensen Shannon Divergence
    sim = 1 - 0.5 * (scipy.stats.entropy(list1, avg_lists) +
                     scipy.stats.entropy(list2, avg_lists)
                     )
    if np.isinf(sim):
        # the similarity is -inf if no term in the document is in the
        # vocabulary
        return 0
    return sim

kl_divergence(list1, list2)

Calculates Kullback-Leibler divergence.

Source code in VVP/vvp.py
def kl_divergence(list1, list2):
    """Calculates Kullback-Leibler divergence."""
    sim = scipy.stats.entropy(list1, list2)
    return sim

renyi_divergence(list1, list2, alpha=0.99)

Calculates Renyi divergence.

Source code in VVP/vvp.py
def renyi_divergence(list1, list2, alpha=0.99):
    """Calculates Renyi divergence."""
    log_sum = np.sum(
        [np.power(p, alpha) / np.power(q, alpha - 1)
         for (p, q) in zip(list1, list2)]
    )
    sim = 1 / (alpha - 1) * np.log(log_sum)
    if np.isinf(sim):
        # the similarity is -inf if no term in the document is in the
        # vocabulary
        return 0
    return sim