PGN file parser using PCRE

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
Roland Chastain
Posts: 1003
Joined: Nov 24, 2011 19:49
Location: France
Contact:

PGN file parser using PCRE

Post by Roland Chastain »

Hello! Here is the elements of a PGN parser using regular expressions. Tested with only one file.

Code: Select all

#include "pcre.bi"

declare sub ExtractGames(aPgnText as const string, aStrings() as string)
declare function ExtractStrings(aSubject as const string, aPattern as const string, aStrings() as string) as integer
declare function ReadText(aFileName as const string) as string
declare sub Show(aStrings() as string)
declare sub TryParsePGN(aPgnText as const string)
declare sub TryParsePGNFile(aFileName as const string)

TryParsePGNFile("myfile.pgn")
end

sub ExtractGames(aPgnText as const string, aStrings() as string)
  const GAMESTART = "[Event """
  dim as integer iStart, iEnd
  iStart = instr(aPgnText, GAMESTART)
  while iStart > 0
    redim preserve aStrings(lbound(aStrings) to ubound(aStrings) + 1)
    iEnd = instr(iStart + 1, aPgnText, GAMESTART)
    aStrings(ubound(aStrings)) = iif(iEnd = 0, mid(aPgnText, iStart), mid(aPgnText, iStart, iEnd - iStart))
    iStart = iEnd
  wend
end sub

function ExtractStrings(aSubject as const string, aPattern as const string, aStrings() as string) as integer
  const OVECCOUNT = 300
  dim as zstring ptr erreur
  dim as integer error_offset
  dim as integer rc, i
  dim as integer ovector(OVECCOUNT - 1)
  dim as integer result
  dim as pcre ptr re
  erase aStrings
  result = 0
  re = pcre_compile(aPattern, 0, @erreur, @error_offset, NULL)
  if re = NULL then
    return result
  end if
  i = 0
  do
    rc = pcre_exec(re, NULL, strptr(aSubject), len(aSubject), i, 0, @ovector(0), OVECCOUNT)
    if rc > 0 then
      redim preserve aStrings(lbound(aStrings) to ubound(aStrings) + 1)
      aStrings(ubound(aStrings)) = mid(aSubject, ovector(0) + 1, ovector(1) - ovector(0))
      result += 1
      i = ovector(1)
    end if
  loop while rc >= 0
  return result
end function

function ReadText(aFileName as const string) as string
  dim result as string
  open aFileName for binary access read as #1
    result = space(LOF(1))
    get #1,, result
  close #1
  return result
end function

sub Show(aStrings() as string)
  for i as integer = lbound(aStrings) to ubound(aStrings)
    ? aStrings(i)
  next i
end sub

sub TryParsePGN(aPgnText as const string)
  
  const A = "\[\w+ "".+""\]"                                   ' Tag
  const B = "[PNBRQK]?[a-h]?[1-8]?x?[a-h][1-8](\+|#|=[NBRQ])?" ' Move
  const C = "(" & B & "|O-O|O-O-O)"                            ' Move including castling
  const D = "\s+"                                              ' Space (including end of line)
  const E = D & "\d+\." & D & C & "(" & D & C & ")?"           ' Full move sequence
  const F = D & "\{.+\}"                                       ' Comment
  const G = D & "(1-0|0-1|1/2-1/2|\*)"                         ' Result of the game
  const H = "(" & E & ")+(" & F & ")?" & G                     ' Moves block
  
  dim as string games(), subject, result()
  
  ExtractGames(aPgnText, games())
  
  for iGame as integer = lbound(games) to ubound(games)
    subject = games(iGame)
    if ExtractStrings(subject, A, result()) > 0 then
      ? "(tags)"
      Show(result())
    end if
    if ExtractStrings(subject, H, result()) >= 1 then
      subject = result(0)
      if ExtractStrings(subject, C, result()) > 0 then
        ? "(moves)"
        Show(result())
      end if
    end if
  next iGame
end sub

sub TryParsePGNFile(aFileName as const string)
  TryParsePGN(ReadText(aFileName))
end sub
(tags)
[Event "Mosquito vs Soberango 0098 T06"]
[Site "LUIS-PC"]
[Date "2018.04.15"]
[Round "1"]
[White "Mosquito"]
[Black "Soberango0098"]
[Result "1-0"]
[BlackElo "2200"]
[ECO "A50"]
[Opening "Indian"]
[Time "17:25:22"]
[Variation "2.c4 a6"]
[WhiteElo "2200"]
[TimeControl "400/60:400/60:400/60"]
[Termination "normal"]
[PlyCount "79"]
[WhiteType "program"]
[BlackType "program"]
(moves)
c4
a6
d4
Nf6
Nc3
g6
Bg5
Nc6
d5
Na5
Qa4
c5
dxc6
bxc6
Nf3
Bg7
Ne5
O-O
b4
Nb7
Rd1
Ne4
Nxe4
Bxe5
f4
Bb2
Bh6
Bg7
Bxg7
Kxg7
e3
Kh6
c5
f5
Ng5
Rf6
Bxa6
Nxc5
bxc5
Bxa6
Rd4
Kg7
Qb3
Bb5
Ne6+
Rxe6
h4
Ba6
g4
fxg4
Rh2
Bb5
Rhd2
d5
cxd6
Rxd6
a4
Rxa4
Rxd6
exd6
Qb2+
Kh6
Rh2
Qd7
Rg2
Kh5
Qb3
Kxh4
Qd1
Ra5
Qd4
Qc7
Rc2
Ba4
Qd2
Kg3
Qf2+
Kh3
Qh2#
(tags)
[Event "Mosquito vs Soberango 0098 T06"]
...
Read here the PGN Specification. Some interesting PGN files.
Roland Chastain
Posts: 1003
Joined: Nov 24, 2011 19:49
Location: France
Contact:

Re: PGN file parser using PCRE

Post by Roland Chastain »

I reworked my program. Too many things were neglected in the first version. :)

Please see first post.
Post Reply