aboutsummaryrefslogtreecommitdiffstats
path: root/Llk
diff options
context:
space:
mode:
authorIvan Enderlin <ivan.enderlin@hoa-project.net>2016-02-05 17:03:53 +0100
committerIvan Enderlin <ivan.enderlin@hoa-project.net>2016-07-15 16:50:21 +0200
commitd083a0c353280bc9f6b68a4a00e444e87a0f6e0b (patch)
tree4287ad79ee9f05f1e0eb64dca66951e48f779b27 /Llk
parent41e424233ba9b3ee9ae74ecd74b398ae60af978c (diff)
downloadCompiler-d083a0c353280bc9f6b68a4a00e444e87a0f6e0b.zip
Compiler-d083a0c353280bc9f6b68a4a00e444e87a0f6e0b.tar.gz
Compiler-d083a0c353280bc9f6b68a4a00e444e87a0f6e0b.tar.bz2
Llk: Implement pragmas.
Pragmas are useful when the grammar needs to change the behavior of the compiler-compiler. For instance, the first pragma we introduce is `unicode`. It is used by the lexer to turn the Unicode mode on for the regular expressions.
Diffstat (limited to 'Llk')
-rw-r--r--Llk/Lexer.php32
-rw-r--r--Llk/Llk.php38
-rw-r--r--Llk/Parser.php36
3 files changed, 87 insertions, 19 deletions
diff --git a/Llk/Lexer.php b/Llk/Lexer.php
index f137cec..4800760 100644
--- a/Llk/Lexer.php
+++ b/Llk/Lexer.php
@@ -53,31 +53,53 @@ class Lexer
*
* @var array
*/
- protected $_lexerState = null;
+ protected $_lexerState = null;
/**
* Text.
*
* @var string
*/
- protected $_text = null;
+ protected $_text = null;
/**
* Tokens.
*
* @var array
*/
- protected $_tokens = [];
+ protected $_tokens = [];
/**
* Namespace stacks.
*
* @var \SplStack
*/
- protected $_nsStack = null;
+ protected $_nsStack = null;
+ /**
+ * PCRE options.
+ *
+ * @var string
+ */
+ protected $_pcreOptions = null;
+
+ /**
+ * Constructor.
+ *
+ * @param array $pragmas Pragmas.
+ * @return void
+ */
+ public function __construct(array $pragmas = [])
+ {
+ if (!isset($pragmas['unicode']) || true === $pragmas['unicode']) {
+ $this->_pcreOptions .= 'u';
+ }
+
+ return;
+ }
+
/**
* Text tokenizer: splits the text in parameter in an ordered array of
* tokens.
@@ -253,7 +275,7 @@ class Lexer
{
$_regex = str_replace('#', '\#', $regex);
$preg = preg_match(
- '#\G(?|' . $_regex . ')#u',
+ '#\G(?|' . $_regex . ')#' . $this->_pcreOptions,
$this->_text,
$matches,
0,
diff --git a/Llk/Llk.php b/Llk/Llk.php
index c5ec2c5..d7108a9 100644
--- a/Llk/Llk.php
+++ b/Llk/Llk.php
@@ -112,6 +112,11 @@ class Llk
* <inner>
* ::lt:: ::slash:: ::tagname[0]:: ::gt::
*
+ * In addition to `%skip` and `%token`, we have the `%pragma` keyword to declare a
+ * pragma. Currently support pragmas are:
+ * * `unicode`, used by the lexer to turn the Unicode mode on for the
+ * regular expressions.
+ *
* @param \Hoa\Stream\IStream\In $stream Stream that contains the
* grammar.
* @return \Hoa\Compiler\Llk\Parser
@@ -140,12 +145,12 @@ class Llk
throw new Compiler\Exception($message . '.', 0);
}
- static::parsePP($pp, $tokens, $rawRules, $stream->getStreamName());
+ static::parsePP($pp, $tokens, $rawRules, $pragmas, $stream->getStreamName());
$ruleAnalyzer = new Rule\Analyzer($tokens);
$rules = $ruleAnalyzer->analyzeRules($rawRules);
- return new Parser($tokens, $rules);
+ return new Parser($tokens, $rules, $pragmas);
}
public static function save(Parser $parser, $className)
@@ -287,15 +292,17 @@ class Llk
* @param string $pp PP.
* @param array $tokens Extracted tokens.
* @param array $rules Extracted raw rules.
+ * @param array $pragmas Extracted raw pragmas.
* @param string $streamName The name of the stream that contains the grammar.
* @return void
* @throws \Hoa\Compiler\Exception
*/
- public static function parsePP($pp, &$tokens, &$rules, $streamName)
+ public static function parsePP($pp, &$tokens, &$rules, &$pragmas, $streamName)
{
- $lines = explode("\n", $pp);
- $tokens = ['default' => []];
- $rules = [];
+ $lines = explode("\n", $pp);
+ $pragmas = [];
+ $tokens = ['default' => []];
+ $rules = [];
for ($i = 0, $m = count($lines); $i < $m; ++$i) {
$line = rtrim($lines[$i]);
@@ -305,7 +312,24 @@ class Llk
}
if ('%' == $line[0]) {
- if (0 !== preg_match('#^%skip\s+(?:([^:]+):)?([^\s]+)\s+(.*)$#u', $line, $matches)) {
+ if (0 !== preg_match('#^%pragma\s+([^\s]+)\s+(.*)$#u', $line, $matches)) {
+ switch ($matches[2]) {
+ case 'true':
+ $pragmaValue = true;
+
+ break;
+
+ case 'false':
+ $pragmaValue = false;
+
+ break;
+
+ default:
+ $pragmaValue = $matches[2];
+ }
+
+ $pragmas[$matches[1]] = $pragmaValue;
+ } else if (0 !== preg_match('#^%skip\s+(?:([^:]+):)?([^\s]+)\s+(.*)$#u', $line, $matches)) {
if (empty($matches[1])) {
$matches[1] = 'default';
}
diff --git a/Llk/Parser.php b/Llk/Parser.php
index 05f43ee..23bdd0e 100644
--- a/Llk/Parser.php
+++ b/Llk/Parser.php
@@ -50,6 +50,13 @@ use Hoa\Iterator;
class Parser
{
/**
+ * List of pragmas.
+ *
+ * @var array
+ */
+ protected $_pragmas = null;
+
+ /**
* List of skipped tokens.
*
* @var array
@@ -118,14 +125,19 @@ class Parser
/**
* Construct the parser.
*
- * @param array $tokens Tokens.
- * @param array $rules Rules.
+ * @param array $tokens Tokens.
+ * @param array $rules Rules.
+ * @param array $pragmas Pragmas.
* @return void
*/
- public function __construct(array $tokens = [], array $rules = [])
- {
- $this->_tokens = $tokens;
- $this->_rules = $rules;
+ public function __construct(
+ array $tokens = [],
+ array $rules = [],
+ array $pragmas = []
+ ) {
+ $this->_tokens = $tokens;
+ $this->_rules = $rules;
+ $this->_pragmas = $pragmas;
return;
}
@@ -141,7 +153,7 @@ class Parser
*/
public function parse($text, $rule = null, $tree = true)
{
- $lexer = new Lexer();
+ $lexer = new Lexer($this->_pragmas);
$this->_tokenSequence = new Iterator\Buffer(
$lexer->lexMe($text, $this->_tokens),
1024
@@ -694,6 +706,16 @@ class Parser
}
/**
+ * Get pragmas.
+ *
+ * @return array
+ */
+ public function getPragmas()
+ {
+ return $this->_pragmas;
+ }
+
+ /**
* Get tokens.
*
* @return array