File size: 1,645 Bytes
999b799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import glob
import json
from io import StringIO

import flatten_dict
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

dfs = []
# glob.glob("pytorch_awq_exllama/**/benchmark_report.json", recursive=True)
for report_path, experiment_path in zip(
    glob.glob("awq_exllama/**/benchmark_report.json", recursive=True),
    glob.glob("awq_exllama/**/experiment_config.json", recursive=True),
):
    with open(f"{report_path}", "r") as f:
        report = json.load(f)

    with open(f"{experiment_path}", "r") as f:
        experiment = json.load(f)

    report = flatten_dict.flatten(report, reducer="dot")
    report_df = pd.read_json(StringIO(json.dumps(report)), orient="index").T

    experiment = flatten_dict.flatten(experiment, reducer="dot")
    experiment_df = pd.read_json(StringIO(json.dumps(experiment)), orient="index").T

    df = pd.concat([report_df, experiment_df], axis=1)
    dfs.append(df)

# aggregate
df = pd.concat(dfs)
# save
df.to_csv("awq_exllama/pytorch_awq_exllama.csv", index=False)

for metric in ["per_token.latency.mean", "prefill.latency.mean"]:
    sns.barplot(data=df, x="benchmark.input_shapes.batch_size", y=metric, hue="backend.quantization_config.version")
    # add title
    plt.title("Per Token Latency" if metric == "per_token.latency.mean" else "Prefill Latency")
    # change x label
    plt.xlabel("Batch Size")
    # change y label
    plt.ylabel("Latency (ms)")
    # change legend title
    plt.legend(title="AWQ Version")
    # tight layout
    plt.tight_layout()
    # save plot
    plt.savefig(f"awq_exllama/pytorch_awq_exllama_{metric}.png")
    # flush
    plt.close()