Skip to content

Commit 763d456

Browse files
committed
Add index field to Position
1 parent 6013a3c commit 763d456

File tree

6 files changed

+73
-71
lines changed

6 files changed

+73
-71
lines changed

β€ŽCHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Breaking changes:
2626
prevents a compiler error (i.e. `MixedAssociativityError`)
2727
without causing issues with `<$>`.
2828
- Rename module prefix from `Text.Parsing.Parser` to `Parsing` (#169 by @jamesdbrock)
29+
- Add the `index` field to `Position`. (#171 by @jamesdbrock)
2930

3031
New features:
3132

β€Žbench/Main.purs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ import Bench.Json.Parsing as BenchParsing
5757
import Bench.Json.StringParser as BenchStringParser
5858
import Bench.Json.TestData (largeJson, mediumJson, smallJson)
5959
import Control.Monad.Trampoline (runTrampoline)
60-
import Control.Monad.Free (liftF)
6160
import Data.Array (fold, replicate)
6261
import Data.Either (either)
6362
import Data.List (many, manyRec)

β€Žsrc/Parsing/Indent.purs

Lines changed: 13 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -85,18 +85,6 @@ get' = do
8585
put' :: forall s. Position -> IndentParser s Unit
8686
put' p = lift (put p)
8787

88-
sourceColumn :: Position -> Int
89-
sourceColumn (Position { line: _, column: c }) = c
90-
91-
sourceLine :: Position -> Int
92-
sourceLine (Position { line: l, column: _ }) = l
93-
94-
setSourceLine :: Position -> Int -> Position
95-
setSourceLine (Position { line: _, column: c }) l = Position { line: l, column: c }
96-
97-
biAp :: forall a b c. (a -> b) -> (b -> b -> c) -> a -> a -> c
98-
biAp f c v1 v2 = c (f v1) (f v2)
99-
10088
many1 :: forall s m a. ParserT s m a -> ParserT s m (List a)
10189
many1 p = lift2 Cons p (many p)
10290

@@ -121,19 +109,17 @@ withBlock' = withBlock (flip const)
121109
-- | Parses only when indented past the level of the reference
122110
indented :: forall s. IndentParser s Unit
123111
indented = do
124-
pos <- position
125-
s <- get'
126-
if biAp sourceColumn (<=) pos s then fail "not indented"
127-
else do
128-
put' $ setSourceLine s (sourceLine pos)
129-
pure unit
112+
Position p <- position
113+
Position s <- get'
114+
if p.column <= s.column then fail "not indented"
115+
else put' $ Position { index: 0, line: p.line, column: s.column }
130116

131117
-- | Same as `indented`, but does not change internal state
132118
indented' :: forall s. IndentParser s Unit
133119
indented' = do
134-
pos <- position
135-
s <- get'
136-
if biAp sourceColumn (<=) pos s then fail "not indented" else pure unit
120+
Position p <- position
121+
Position s <- get'
122+
if p.column <= s.column then fail "not indented" else pure unit
137123

138124
-- | Parses only when indented past the level of the reference or on the same line
139125
sameOrIndented :: forall s. IndentParser s Unit
@@ -142,9 +128,9 @@ sameOrIndented = sameLine <|> indented
142128
-- | Parses only on the same line as the reference
143129
sameLine :: forall s. IndentParser s Unit
144130
sameLine = do
145-
pos <- position
146-
s <- get'
147-
if biAp sourceLine (==) pos s then pure unit else fail "over one line"
131+
Position p <- position
132+
Position s <- get'
133+
if p.line == s.line then pure unit else fail "over one line"
148134

149135
-- | Parses a block of lines at the same indentation level
150136
block1 :: forall s a. IndentParser s a -> IndentParser s (List a)
@@ -169,9 +155,9 @@ withPos x = do
169155
-- | Ensures the current indentation level matches that of the reference
170156
checkIndent :: forall s. IndentParser s Unit
171157
checkIndent = do
172-
s <- get'
173-
p <- position
174-
if biAp sourceColumn (==) p s then pure unit else fail "indentation doesn't match"
158+
Position p <- position
159+
Position s <- get'
160+
if p.column == s.column then pure unit else fail "indentation doesn't match"
175161

176162
-- | Run the result of an indentation sensitive parse
177163
runIndent :: forall a. State Position a -> a

β€Žsrc/Parsing/Pos.purs

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,30 @@ module Parsing.Pos where
33
import Prelude
44

55
import Data.Generic.Rep (class Generic)
6+
import Data.Show.Generic (genericShow)
67

78
-- | `Position` represents the position of the parser in the input.
89
-- |
9-
-- | - `line` is the current line in the input
10-
-- | - `column` is the column of the next character in the current line that will be parsed
10+
-- | - `index` is the position since the start of the input. Starts at 0.
11+
-- | - `line` is the current line in the input. Starts at 1.
12+
-- | - `column` is the column of the next character in the current line that
13+
-- | will be parsed. Starts at 1.
1114
newtype Position = Position
12-
{ line :: Int
15+
{ index :: Int
16+
, line :: Int
1317
, column :: Int
1418
}
1519

16-
derive instance genericPosition :: Generic Position _
20+
derive instance Generic Position _
21+
instance Show Position where
22+
show x = genericShow x
1723

18-
instance showPosition :: Show Position where
19-
show (Position { line: line, column: column }) =
20-
"(Position { line: " <> show line <> ", column: " <> show column <> " })"
24+
instance Eq Position where
25+
eq (Position l) (Position r) = l.index == r.index
2126

22-
derive instance eqPosition :: Eq Position
23-
derive instance ordPosition :: Ord Position
27+
instance Ord Position where
28+
compare (Position l) (Position r) = compare l.index r.index
2429

2530
-- | The `Position` before any input has been parsed.
2631
initialPos :: Position
27-
initialPos = Position { line: 1, column: 1 }
32+
initialPos = Position { index: 0, line: 1, column: 1 }

β€Žsrc/Parsing/String.purs

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@
1919
-- | The other primitive parsers, which return `CodePoint` and `String` types,
2020
-- | can parse the full Unicode character set. All of the primitive parsers
2121
-- | in this module can be used together.
22+
-- |
23+
-- | ### Position
24+
-- |
25+
-- | In a `String` parser, the `Position {index}` counts the number of
26+
-- | unicode `CodePoint`s since the beginning of the input string.
27+
-- |
28+
-- | Each tab character (`0x09`) encountered in a `String` parser will advance
29+
-- | the `Position {column}` by 8.
30+
-- |
31+
-- | These patterns will advance the `Position {line}` by 1 and reset
32+
-- | the `Position {column}` to 1:
33+
-- | - newline (`0x0A`)
34+
-- | - carriage-return (`0x0D`)
35+
-- | - carriage-return-newline (`0x0D 0x0A`)
2236
module Parsing.String
2337
( string
2438
, eof
@@ -190,14 +204,14 @@ updatePosString pos before after = case uncons before of
190204
-- | Updates a `Position` by adding the columns and lines in a
191205
-- | single `CodePoint`.
192206
updatePosSingle :: Position -> CodePoint -> String -> Position
193-
updatePosSingle (Position { line, column }) cp after = case fromEnum cp of
194-
10 -> Position { line: line + 1, column: 1 } -- "\n"
207+
updatePosSingle (Position { index, line, column }) cp after = case fromEnum cp of
208+
10 -> Position { index: index + 1, line: line + 1, column: 1 } -- "\n"
195209
13 ->
196210
case codePointAt 0 after of
197-
Just nextCp | fromEnum nextCp == 10 -> Position { line, column } -- "\r\n" lookahead
198-
_ -> Position { line: line + 1, column: 1 } -- "\r"
199-
9 -> Position { line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
200-
_ -> Position { line, column: column + 1 }
211+
Just nextCp | fromEnum nextCp == 10 -> Position { index: index + 1, line, column } -- "\r\n" lookahead
212+
_ -> Position { index: index + 1, line: line + 1, column: 1 } -- "\r"
213+
9 -> Position { index: index + 1, line, column: column + 8 - ((column - 1) `mod` 8) } -- "\t" Who says that one tab is 8 columns?
214+
_ -> Position { index: index + 1, line, column: column + 1 }
201215

202216
-- | Combinator which returns both the result of a parse and the slice of
203217
-- | the input that was consumed while it was being parsed.

β€Žtest/Main.purs

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ stackSafeLoopsTest = do
114114
parseErrorTestPosition
115115
(many1TillRec (string "a") (string "b"))
116116
"baa"
117-
(Position { line: 1, column: 1 })
117+
(Position { index: 0, line: 1, column: 1 })
118118

119119
parseTest "a,a,a,b,a,a" (toUnfoldable [ "a", "a", "a" ]) $
120120
sepEndByRec (string "a") (string ",")
@@ -130,7 +130,7 @@ stackSafeLoopsTest = do
130130
parseErrorTestPosition
131131
(sepEndBy1Rec (string "a") (string ","))
132132
"b,a,a"
133-
(Position { line: 1, column: 1 })
133+
(Position { index: 0, line: 1, column: 1 })
134134

135135
-- 8 `div` (8 `div` 2) == 2
136136
parseTest "8x8x2" 2 $
@@ -142,7 +142,7 @@ stackSafeLoopsTest = do
142142
parseErrorTestPosition
143143
(chainr1Rec digit (string "x" $> div))
144144
""
145-
(Position { line: 1, column: 1 })
145+
(Position { index: 0, line: 1, column: 1 })
146146

147147
-- (8 `div` 2) `div` 2 == 2
148148
parseTest "8x2x2" 2 $
@@ -154,15 +154,15 @@ stackSafeLoopsTest = do
154154
parseErrorTestPosition
155155
(chainl1Rec digit (string "x" $> div))
156156
""
157-
(Position { line: 1, column: 1 })
157+
(Position { index: 0, line: 1, column: 1 })
158158

159159
parseTest "aaaabcd" "b"
160160
$ skipMany1Rec (string "a")
161161
*> string "b"
162162
parseErrorTestPosition
163163
(skipMany1Rec (string "a"))
164164
"bcd"
165-
(Position { line: 1, column: 1 })
165+
(Position { index: 0, line: 1, column: 1 })
166166

167167
parseTest "aaaabcd" "b"
168168
$ skipManyRec (string "a")
@@ -176,7 +176,7 @@ stackSafeLoopsTest = do
176176
parseErrorTestPosition
177177
(many1Rec (string "a"))
178178
""
179-
(Position { line: 1, column: 1 })
179+
(Position { index: 0, line: 1, column: 1 })
180180

181181
parseTest "a,a,ab" (toUnfoldable [ "a", "a", "a" ])
182182
$ sepByRec (string "a") (string ",")
@@ -190,11 +190,11 @@ stackSafeLoopsTest = do
190190
parseErrorTestPosition
191191
(sepBy1Rec (string "a") (string ","))
192192
""
193-
(Position { line: 1, column: 1 })
193+
(Position { index: 0, line: 1, column: 1 })
194194
parseErrorTestPosition
195195
(sepBy1Rec (string "a") (string ","))
196196
"a,"
197-
(Position { line: 1, column: 3 })
197+
(Position { index: 2, line: 1, column: 3 })
198198

199199
parseTest "a,a,a,b" (toUnfoldable [ "a", "a", "a" ])
200200
$ endByRec (string "a") (string ",")
@@ -208,11 +208,11 @@ stackSafeLoopsTest = do
208208
parseErrorTestPosition
209209
(endBy1Rec (string "a") (string ","))
210210
""
211-
(Position { line: 1, column: 1 })
211+
(Position { index: 0, line: 1, column: 1 })
212212
parseErrorTestPosition
213213
(endBy1Rec (string "a") (string ","))
214214
"a,a"
215-
(Position { line: 1, column: 4 })
215+
(Position { index: 3, line: 1, column: 4 })
216216

217217
data TestToken = A | B
218218

@@ -233,10 +233,7 @@ testTokenParser :: TokenParser
233233
testTokenParser = makeTokenParser haskellDef
234234

235235
mkPos :: Int -> Position
236-
mkPos n = mkPos' n 1
237-
238-
mkPos' :: Int -> Int -> Position
239-
mkPos' column line = Position { column: column, line: line }
236+
mkPos n = Position { index: n - 1, line: 1, column: n }
240237

241238
type TestM = Effect Unit
242239

@@ -562,12 +559,12 @@ main = do
562559
parseErrorTestPosition
563560
(many $ char 'f' *> char '?')
564561
"foo"
565-
(Position { column: 2, line: 1 })
562+
(Position { index: 1, column: 2, line: 1 })
566563

567564
parseErrorTestPosition
568565
(satisfy (_ == '?'))
569566
"foo"
570-
(Position { column: 1, line: 1 })
567+
(Position { index: 0, column: 1, line: 1 })
571568

572569
parseTest
573570
"foo"
@@ -592,17 +589,17 @@ main = do
592589

593590
parseTest "rest" "rest" rest
594591
parseTest "rest" unit (rest *> eof)
595-
parseTest "rest\nrest" (Position { line: 2, column: 5 }) (rest *> position)
592+
parseTest "rest\nrest" (Position { index: 9, line: 2, column: 5 }) (rest *> position)
596593

597594
parseErrorTestPosition
598595
(rest *> notFollowedBy eof)
599596
"aa\naa"
600-
(Position { column: 3, line: 2 })
597+
(Position { index: 5, column: 3, line: 2 })
601598

602599
parseErrorTestPosition
603-
anyChar
604-
"𝅑"
605-
(Position { column: 1, line: 1 })
600+
(string "π…Ÿπ… " *> string "π…Ÿπ… ")
601+
"π…Ÿπ… x𝅑"
602+
(Position { index: 2, column: 3, line: 1 })
606603

607604
parseTest "π…Ÿπ… x𝅑" [ "π…Ÿ", "𝅘𝅥𝅮", "x", "𝅑" ] do
608605
quarter <- anyCodePoint
@@ -618,8 +615,8 @@ main = do
618615

619616
parseTest "abcd" "ab" $ takeN 2
620617
parseTest "abcd" "" $ takeN 0
621-
parseErrorTestPosition (takeN 10) "abcd" (Position { column: 1, line: 1 })
622-
parseErrorTestPosition (takeN (-1)) "abcd" (Position { column: 1, line: 1 })
618+
parseErrorTestPosition (takeN 10) "abcd" (Position { index: 0, column: 1, line: 1 })
619+
parseErrorTestPosition (takeN (-1)) "abcd" (Position { index: 0, column: 1, line: 1 })
623620

624621
parseErrorTestMessage
625622
(noneOfCodePoints $ SCP.toCodePointArray "β“βœ…")
@@ -662,10 +659,10 @@ main = do
662659
parseTest "ababab" [ 'b', 'b', 'b' ] $ Array.many (char 'a' *> char 'b')
663660
parseTest "abaXab" [ 'b' ] $ Array.many (try (char 'a' *> char 'b'))
664661

665-
parseErrorTestPosition (string "abc") "bcd" (Position { column: 1, line: 1 })
666-
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { column: 4, line: 1 })
667-
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { column: 1, line: 4 })
668-
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { column: 10, line: 1 })
662+
parseErrorTestPosition (string "abc") "bcd" (Position { index: 0, column: 1, line: 1 })
663+
parseErrorTestPosition (string "abc" *> eof) "abcdefg" (Position { index: 3, column: 4, line: 1 })
664+
parseErrorTestPosition (string "a\nb\nc\n" *> eof) "a\nb\nc\nd\n" (Position { index: 6, column: 1, line: 4 })
665+
parseErrorTestPosition (string "\ta" *> eof) "\tab" (Position { index: 2, column: 10, line: 1 })
669666

670667
parseTest "Infinity" infinity number
671668
parseTest "+Infinity" infinity number

0 commit comments

Comments
Β (0)