I probably need a perfect hash function. This code seems to do the job:
encoded_reference()
{
local -r yr=$1
local -r seqno=$2
local -ar symbolset=(a b c d e f g h j k m n p q r s t u v w x y z 2 3 4 5 6 7 8 9)
local -a seedset=("${symbolset[@]}")
local -r ln_symbolset=${#symbolset[@]}; # 31
local ln_seedset=${#seedset[@]}
local -A lookup_table=()
for sym in "${symbolset[@]}"
do
pos=$((50 % ln_seedset)); # 50 is just an arbitrary static number
lookup_table+=(["$sym"]=${seedset["$pos"]})
seedset=(${seedset[@]/${seedset[$pos]}}); # remove used elements from the seedset
ln_seedset=${#seedset[@]}
done
local yr_enc=${symbolset[$(((yr / ln_symbolset) % ln_symbolset))]}${symbolset[$(($yr % ln_symbolset))]}
local most_sig_fig=$((seqno / ln_symbolset))
local least_sig_fig=$((seqno % ln_symbolset))
# caution: if the seqno exceeds ln_symbolset², this calculation is out of range
local seq_enc=${lookup_table[${symbolset[$most_sig_fig]}]}${lookup_table[${symbolset[$least_sig_fig]}]}
printf '%s\n' "answer → ${yr_enc}-$seq_enc"
};#encoded_reference
for yr in 2024 2025 2026
do
for seqno in {1..20}
do
encoded_reference "$yr" "$seqno"
done
done
output
answer → js-wy answer → js-w2 answer → js-w4 answer → js-w6 answer → js-w8 answer → js-wa answer → js-wd answer → js-wg answer → js-wk answer → js-wp answer → js-ws answer → js-wv answer → js-w3 answer → js-w9 answer → js-we answer → js-wm answer → js-wt answer → js-w5 answer → js-wf answer → js-wr answer → jt-wy answer → jt-w2 answer → jt-w4 answer → jt-w6 answer → jt-w8 answer → jt-wa answer → jt-wd answer → jt-wg answer → jt-wk answer → jt-wp answer → jt-ws answer → jt-wv answer → jt-w3 answer → jt-w9 answer → jt-we answer → jt-wm answer → jt-wt answer → jt-w5 answer → jt-wf answer → jt-wr answer → ju-wy answer → ju-w2 answer → ju-w4 answer → ju-w6 answer → ju-w8 answer → ju-wa answer → ju-wd answer → ju-wg answer → ju-wk answer → ju-wp answer → ju-ws answer → ju-wv answer → ju-w3 answer → ju-w9 answer → ju-we answer → ju-wm answer → ju-wt answer → ju-w5 answer → ju-wf answer → ju-wr
This is close to ideal, but I just thought of another problem: what if a year-seq pair were to derive an encoded number like “fy-ou” or “us-uk” or “sh-it”? A bias that nearly ensures a digit is used would help avoid generating offending words. But I guess I’m getting well into over-engineering territory.
This is the decode function if anyone is interested:
decoded_reference()
decoded_reference() { local yr_msd=${1:0:1} local yr_lsd=${1:1:1} local seq_enc_msd=${1:3:1} local seq_enc_lsd=${1:4:1} local seq_msd=${lookup_table_reverse[$seq_enc_msd]} local seq_lsd=${lookup_table_reverse[$seq_enc_lsd]} local seq_msd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$seq_msd"'")') local seq_lsd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$seq_lsd"'")') local seq=$((seq_msd_index * ln_symbolset + seq_lsd_index)) local yr_msd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$yr_msd"'")') local yr_lsd_index=$(typeset -p symbolset | grep -oP '[0-9]+(?=]="'"$yr_lsd"'")') local yr=$((ln_symbolset * ln_symbolset * 2 + yr_msd_index * ln_symbolset + yr_lsd_index)); # warning: the “2” is a dangerous hard-coding! Hopefully that bug manifests after I am dead printf '%s\n' "${yr}-$seq" };#decoded_reference