VVP
Functions¶
cosine_similarity(list1, list2)
¶
Calculates cosine similarity.
Source code in fabsim/VVP/vvp.py
def cosine_similarity(list1, list2):
"""Calculates cosine similarity."""
import scipy
if list1 is None or list2 is None:
return 0
assert not (np.isnan(list2).any() or np.isinf(list2).any())
assert not (np.isnan(list1).any() or np.isinf(list1).any())
sim = 1 - scipy.spatial.distance.cosine(list1, list2)
if np.isnan(sim):
# the similarity is nan if no term in the document is in the vocabulary
return 0
return sim
ensemble_vvp(results_dirs, sample_testing_function, aggregation_function, **kwargs)
¶
Goes through all the output directories and calculates the scores.
results_dirs
: list of result dirs to analyse.sample_testing_function
: analysis/validation/verification function to be performed on each subdirectory of the results_dirs.aggregation_function
: function to combine all results**kwargs
: custom parameters. Theitems
parameter can be used to give explicit ordering of the various subdirectories.
Returns:
Type | Description |
---|---|
dict |
ensemble_vvp_results |
Source code in fabsim/VVP/vvp.py
def ensemble_vvp(
results_dirs, sample_testing_function, aggregation_function, **kwargs
):
"""
Goes through all the output directories and calculates the scores.
Arguments:
- `results_dirs`: list of result dirs to analyse.
- `sample_testing_function`: analysis/validation/verification function to
be performed on each subdirectory of the results_dirs.
- `aggregation_function`: function to combine all results
- `**kwargs` : custom parameters. The `items` parameter can be used to
give explicit ordering of the various subdirectories.
Returns:
dict: ensemble_vvp_results
Authors: Derek Groen, Wouter Edeling, and Hamid Arabnejad
"""
ensemble_vvp_results = {}
# if a single result_dir is specified, still add it to a list
if isinstance(results_dirs, str):
tmp = []
tmp.append(results_dirs)
results_dirs = tmp
for results_dir in results_dirs:
names = []
scores = []
# use user-specified sample directories if specified,
# otherwise look for uq results in all directories in results_dir
if "items" in kwargs:
items = kwargs["items"]
else:
items = os.listdir("{}".format(results_dir))
for item in items:
if os.path.isdir(os.path.join(results_dir, item)):
print(os.path.join(results_dir, item))
names.append(item)
scores.append(
sample_testing_function(
os.path.join(results_dir, item), **kwargs
)
)
scores_aggregation = aggregation_function(scores, **kwargs)
# update return results dict
ensemble_vvp_results.update({results_dir: {}})
ensemble_vvp_results[results_dir].update(
{"names": names, "scores": scores,
"scores_aggregation": scores_aggregation}
)
# print(ensemble_vvp_results)
return ensemble_vvp_results
ensemble_vvp_LoR(results_dirs_PATH, load_QoIs_function, aggregation_function, **kwargs)
¶
results_dirs_PATH
: list of result dirs, one directory for each resolution and each one containing the same QoIs stored to diskload_QoIs_function
: a function which loads the QoIs from each subdirectory of the results_dirs_PATH.aggregation_function
: function to combine all results**kwargs
: custom parameters. The 'items' parameter must be used to give explicit ordering of the various subdirectories, indicating the order of the refinement.
Returns:
Type | Description |
---|---|
dic |
returns the results score in The scores dict has this structure
|
Tip
- to see how input functions should be defined for your app, please look at the implemented examples in [FabFlee repo] (https://github.com/djgroen/FabFlee/blob/master/VVP/flee_vvp.py#L752)
Author: Hamid Arabnejad
Source code in fabsim/VVP/vvp.py
def ensemble_vvp_LoR(
results_dirs_PATH, load_QoIs_function, aggregation_function, **kwargs
):
"""
Arguments:
----------
- `results_dirs_PATH`:
list of result dirs, one directory for each resolution and
each one containing the same QoIs stored to disk
- `load_QoIs_function`:
a function which loads the QoIs from each subdirectory of
the results_dirs_PATH.
- `aggregation_function`:
function to combine all results
- `**kwargs`:
custom parameters. The 'items' parameter must be used to give
explicit ordering of the various subdirectories, indicating
the order of the refinement.
Returns:
dic: returns the results score in `dic` format.
The scores dict has this structure
```python
result_dir_1_name:
order: <polynomial_order>
runs: <num_runs>
value:
vary_param_1: {<sobol_func_name>:<value>}
...
vary_param_X: {<sobol_func_name>:<value>}
...
result_dir_N_name:
order: <polynomial_order>
runs: <num_runs>
value:
vary_param_1: {<sobol_func_name>:<value>}
...
vary_param_X: {<sobol_func_name>:<value>}
```
!!! tip
- to see how input functions should be defined for your app, please
look at the implemented examples in [FabFlee repo]
(https://github.com/djgroen/FabFlee/blob/master/VVP/flee_vvp.py#L752)
Author: Hamid Arabnejad
"""
results_dirs = [
dirname
for dirname in os.listdir(results_dirs_PATH)
if os.path.isdir(os.path.join(results_dirs_PATH, dirname))
]
if len(results_dirs) == 0:
raise ValueError(
"\nThere is not subdirectories in the passed "
"results_dirs_PATH arguments."
"\nresults_dirs_PATH = %s" % (results_dirs_PATH)
)
#########################################################
# the scores dict has this structure: #
# result_dir_1_name: #
# order: <polynomial_order> #
# runs: <num_runs> #
# value: #
# vary_param_1: {<sobol_func_name>:<value>} #
# ... #
# vary_param_X: {<sobol_func_name>:<value>} #
# ... #
# result_dir_N_name: #
# order: <polynomial_order> #
# runs: <num_runs> #
# value: #
# vary_param_1: {<sobol_func_name>:<value>} #
# ... #
# vary_param_X: {<sobol_func_name>:<value>} #
#########################################################
scores = {}
for result_dir in results_dirs:
value, order, num_runs = load_QoIs_function(
os.path.join(results_dirs_PATH, result_dir)
)
scores.update(
{result_dir: {"value": value, "runs": num_runs, "order": order}}
)
#################################################################
# sort scores dict based on order value in ascending #
# i.e., the last key in scores will have the higher order value #
# to have Descending order, set reverse=True #
#################################################################
scores = dict(
OrderedDict(
sorted(scores.items(), key=lambda x: x[1]["order"], reverse=False)
)
)
###########################################################
# call aggregation_function to compares the sobol indices #
###########################################################
aggregation_function(scores, **kwargs)
ensemble_vvp_QoI(simulation_result_QoI, uncertainty_result_QoI, QoI_name)
¶
The current supported similarity measure are:
- Jensen-Shannon divergence
- Renyi divergence
- Cosine distance
- Euclidean distance
- Kullback-Leibler divergence
Parameters:
Name | Type | Description | Default |
---|---|---|---|
simulation_result_QoI |
Experimental QoI data. |
required | |
uncertainty_result_QoI |
uncertainty QoI results |
required | |
QoI_name |
the name of QoI |
required |
Returns:
Type | Description |
---|---|
dic |
returns a dictionary with the following structure:
|
Source code in fabsim/VVP/vvp.py
def ensemble_vvp_QoI(simulation_result_QoI, uncertainty_result_QoI, QoI_name):
"""
The current supported similarity measure are:
* Jensen-Shannon divergence
* Renyi divergence
* Cosine distance
* Euclidean distance
* Kullback-Leibler divergence
Args:
simulation_result_QoI : Experimental QoI data.
uncertainty_result_QoI : uncertainty QoI results
QoI_name : the name of QoI
Returns:
dic : returns a dictionary with the following structure:
```json
{
"similarity measure function name":
{
"QoI_name" : [similarity_measure_function_result]
}
}
```
Author: Hamid Arabnejad
"""
simulation_result_QoI = np.array(simulation_result_QoI)
uncertainty_result_QoI = np.array(uncertainty_result_QoI)
if simulation_result_QoI.shape != uncertainty_result_QoI.shape:
raise RuntimeError("The dimension of two input array are not equal !")
if simulation_result_QoI.ndim == 1:
simulation_result_QoI = np.array([simulation_result_QoI])
if uncertainty_result_QoI.ndim == 1:
uncertainty_result_QoI = np.array([uncertainty_result_QoI])
similarity_measure_results = {}
for simulation_result, uncertainty_result in zip(
simulation_result_QoI, uncertainty_result_QoI
):
# calculate Jensen-Shannon Divergence
measure_name = "Jensen-Shannon Divergence"
if measure_name not in similarity_measure_results:
similarity_measure_results.update({measure_name: {}})
similarity_measure_results[measure_name].update({QoI_name: []})
res = jensen_shannon_divergence(simulation_result, uncertainty_result)
similarity_measure_results[measure_name][QoI_name].append(res)
# calculate Cosine Similarity
measure_name = "Cosine similarity"
if measure_name not in similarity_measure_results:
similarity_measure_results.update({measure_name: {}})
similarity_measure_results[measure_name].update({QoI_name: []})
res = cosine_similarity(simulation_result, uncertainty_result)
similarity_measure_results[measure_name][QoI_name].append(res)
# calculate KL divergence
measure_name = "KL divergence"
if measure_name not in similarity_measure_results:
similarity_measure_results.update({measure_name: {}})
similarity_measure_results[measure_name].update({QoI_name: []})
res = kl_divergence(simulation_result, uncertainty_result)
similarity_measure_results[measure_name][QoI_name].append(res)
# calculate Renyi Divergence
measure_name = "Renyi Divergence"
if measure_name not in similarity_measure_results:
similarity_measure_results.update({measure_name: {}})
similarity_measure_results[measure_name].update({QoI_name: []})
res = renyi_divergence(simulation_result, uncertainty_result)
similarity_measure_results[measure_name][QoI_name].append(res)
# calculate Euclidean Distance
measure_name = "Euclidean Distance"
if measure_name not in similarity_measure_results:
similarity_measure_results.update({measure_name: {}})
similarity_measure_results[measure_name].update({QoI_name: []})
res = euclidean_distance(simulation_result, uncertainty_result)
similarity_measure_results[measure_name][QoI_name].append(res)
return similarity_measure_results
euclidean_distance(list1, list2)
¶
Calculates Euclidean distance.
Source code in fabsim/VVP/vvp.py
def euclidean_distance(list1, list2):
"""Calculates Euclidean distance."""
sim = np.sqrt(np.sum([np.power(p - q, 2) for (p, q) in zip(list1, list2)]))
return sim
jensen_shannon_divergence(list1, list2)
¶
Calculates Jenson-Shannon Distance
Source code in fabsim/VVP/vvp.py
def jensen_shannon_divergence(list1, list2):
"""Calculates Jenson-Shannon Distance"""
import scipy
# convert the vectors into numpy arrays in case that they aren't
list1 = np.array(list1)
list2 = np.array(list2)
# calculate average
avg_lists = (list1 + list2) / 2
# compute Jensen Shannon Divergence
sim = 1 - 0.5 * (
scipy.stats.entropy(list1, avg_lists)
+ scipy.stats.entropy(list2, avg_lists)
)
if np.isinf(sim):
# the similarity is -inf if no term in the document is in the
# vocabulary
return 0
return sim
kl_divergence(list1, list2)
¶
Calculates Kullback-Leibler divergence.
Source code in fabsim/VVP/vvp.py
def kl_divergence(list1, list2):
"""Calculates Kullback-Leibler divergence."""
import scipy
sim = scipy.stats.entropy(list1, list2)
return sim
renyi_divergence(list1, list2, alpha=0.99)
¶
Calculates Renyi divergence.
Source code in fabsim/VVP/vvp.py
def renyi_divergence(list1, list2, alpha=0.99):
"""Calculates Renyi divergence."""
log_sum = np.sum(
[
np.power(p, alpha) / np.power(q, alpha - 1)
for (p, q) in zip(list1, list2)
]
)
sim = 1 / (alpha - 1) * np.log(log_sum)
if np.isinf(sim):
# the similarity is -inf if no term in the document is in the
# vocabulary
return 0
return sim
sif_vvp(results_dirs, sif_dirs, sample_testing_function, aggregation_function, **kwargs)
¶
Goes through all the output directories and calculates the scores.
- results_dirs: list of result dirs to analyse.
- sample_testing_function: analysis/validation/verification function to be performed on each subdirectory of the results_dirs.
- aggregation_function: function to combine all results
- **kwargs: custom parameters. The 'items' parameter can be used to give explicit ordering of the various subdirectories.
return : sif_vvp_results (dict)
Authors: Derek Groen, Wouter Edeling, and Hamid Arabnejad
Source code in fabsim/VVP/vvp.py
def sif_vvp(
results_dirs,
sif_dirs,
sample_testing_function,
aggregation_function,
**kwargs
):
"""
Goes through all the output directories and calculates the scores.
Arguments:
- results_dirs: list of result dirs to analyse.
- sample_testing_function: analysis/validation/verification function to be
performed on each subdirectory of the results_dirs.
- aggregation_function: function to combine all results
- **kwargs: custom parameters. The 'items' parameter can be used to give
explicit ordering of the various subdirectories.
return : sif_vvp_results (dict)
Authors: Derek Groen, Wouter Edeling, and Hamid Arabnejad
"""
sif_vvp_results = {}
# if a single result_dir is specified, still add it to a list
if isinstance(results_dirs, str):
tmp = []
tmp.append(results_dirs)
results_dirs = tmp
# if a single sif_dir is specified, still add it to a list
if isinstance(sif_dirs, str):
tmp = []
tmp.append(sif_dirs)
sif_dirs = tmp
print("SIF_VVP results dirs:", results_dirs, sif_dirs)
if (len(results_dirs)) == 0:
print("ERROR: SIF_VVP applied,")
print("but no results directories of test_subject runs provided.")
sys.exit()
for i in range(0, len(results_dirs)):
ri = results_dirs[i]
si = sif_dirs[0]
scores = []
# use user-specified sample directories if specified,
# otherwise look for uq results in all directories in results_dir
if "items" in kwargs:
items = kwargs["items"]
else:
items = os.listdir("{}".format(ri))
scores.append(sample_testing_function(ri, si, **kwargs))
for item in items:
if os.path.isdir(os.path.join(ri, item)):
if os.path.isdir(os.path.join(sif_dir, item)):
print(os.path.join(ri, item))
print(os.path.join(si, item))
scores.append(
sample_testing_function(
os.path.join(ri, item),
os.path.join(si, item),
**kwargs
)
)
else:
print(
"ERROR: SIF dir structure doesn't match "
"results dir structure."
)
scores_aggregation = aggregation_function(scores, **kwargs)
# update return results dict
sif_vvp_results.update({ri: {}})
sif_vvp_results[ri].update(
{"scores": scores, "scores_aggregation": scores_aggregation}
)
return sif_vvp_results