STRATO-apps/wordpress_03/app/wp-content/plugins/aimogen-pro/res/tokenizer/Merges.php

SHA-256: 0950e6901c2c032fb6962f5e8ea4f8dfe8ea2732833e4a7cfe5cad2ff06d2f2c
<?php

namespace Gioni06\Gpt3Tokenizer;
class Merges {
    public function __construct(private string $path = __DIR__ . '/pretrained_vocab_files/merges.txt')
    {
    }

    public function bpeMerges(): array
    {
        $lines = [];
        $fp = @fopen($this->path, "r");
        if ($fp) {
            // drop the first line of the buffer
            fgets($fp, 300);
            while (($buffer = fgets($fp, 300)) !== false) {
                $line = array_filter(preg_split("/(\s+)/", $buffer), function($e) {
                    return strlen(trim($e)) > 0;
                });
                $lines[] = $line;
            }
            if (!feof($fp)) {
                throw new Exception("Error: unexpected fgets() fail\n");
            }
            fclose($fp);
        }
        return $lines;
    }
}