|
|
import glob |
|
|
import json |
|
|
from io import StringIO |
|
|
|
|
|
import flatten_dict |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
import seaborn as sns |
|
|
|
|
|
dfs = [] |
|
|
|
|
|
for report_path, experiment_path in zip( |
|
|
glob.glob("awq_exllama/**/benchmark_report.json", recursive=True), |
|
|
glob.glob("awq_exllama/**/experiment_config.json", recursive=True), |
|
|
): |
|
|
with open(f"{report_path}", "r") as f: |
|
|
report = json.load(f) |
|
|
|
|
|
with open(f"{experiment_path}", "r") as f: |
|
|
experiment = json.load(f) |
|
|
|
|
|
report = flatten_dict.flatten(report, reducer="dot") |
|
|
report_df = pd.read_json(StringIO(json.dumps(report)), orient="index").T |
|
|
|
|
|
experiment = flatten_dict.flatten(experiment, reducer="dot") |
|
|
experiment_df = pd.read_json(StringIO(json.dumps(experiment)), orient="index").T |
|
|
|
|
|
df = pd.concat([report_df, experiment_df], axis=1) |
|
|
dfs.append(df) |
|
|
|
|
|
|
|
|
df = pd.concat(dfs) |
|
|
|
|
|
df.to_csv("awq_exllama/pytorch_awq_exllama.csv", index=False) |
|
|
|
|
|
for metric in ["per_token.latency.mean", "prefill.latency.mean"]: |
|
|
sns.barplot(data=df, x="benchmark.input_shapes.batch_size", y=metric, hue="backend.quantization_config.version") |
|
|
|
|
|
plt.title("Per Token Latency" if metric == "per_token.latency.mean" else "Prefill Latency") |
|
|
|
|
|
plt.xlabel("Batch Size") |
|
|
|
|
|
plt.ylabel("Latency (ms)") |
|
|
|
|
|
plt.legend(title="AWQ Version") |
|
|
|
|
|
plt.tight_layout() |
|
|
|
|
|
plt.savefig(f"awq_exllama/pytorch_awq_exllama_{metric}.png") |
|
|
|
|
|
plt.close() |
|
|
|