$ pdf-parser.py CLKF.pdf
PDF Comment '%PDF-1.3\n'
PDF Comment '%\xc7\xec\x8f\xa2\n'
obj 5 0
Type:
Referencing: 6 0 R
Contains stream
[(1, '\n'), (2, '<<'), (2, '/Length'), (1, ' '), (3, '6'), (1, ' '), (3, '0'), (1, ' '), (3, 'R'), (2, '/Filter'), (1, ' '), (2, '/FlateDecode'), (2, '>>'), (1, '\n')]
<<
/Length 6 0 R
/Filter /FlateDecode
>>
obj 6 0
Type:
Referencing:
[(1, '\n'), (3, '678'), (1, '\n')]
...
obj 4 0
Type: /Page
Referencing: 3 0 R, 11 0 R, 12 0 R, 13 0 R, 5 0 R
...
$ pdf-parser.py CLKF.pdf |
awk '/^obj/ { objnum = $2 };
/Referencing: [0-9]/ \
{ max = split($0, a);
for (i = 2; i < max; i += 3) { print objnum" -> "a[i]";" }
}'
5 -> 6;
...
4 -> 3;
4 -> 11;
4 -> 12;
4 -> 13;
4 -> 5;
...
PS C:\> C:\Python25\python.exe pdf-parser.py CLKF.pdf |
Select-String -Pattern "(?<=^obj\s)\d+" -Context 0,2
> obj 5 0
Type:
Referencing: 6 0 R
> obj 6 0
Type:
Referencing:
> obj 15 0
Type:
Referencing: 16 0 R
...
> obj 4 0
Type: /Page
Referencing: 3 0 R, 11 0 R, 12 0 R, 13 0 R, 5 0 R
PS C:\> C:\Python25\python.exe pdf-parser.py CLKF.pdf |
Select-String -Pattern "(?<=^obj\s)[0-9]+" -Context 0,2 | Format-List
IgnoreCase : True
LineNumber : 7
Line : obj 5 0
Filename : InputStream
Path : InputStream
Pattern : (?<=^obj\s)[0-9]+
Context : Microsoft.PowerShell.Commands.MatchInfoContext
Matches : {5}
...
PS C:\> ... | Select-Object -ExcludeProperty Context | Get-Member
TypeName: Microsoft.PowerShell.Commands.MatchInfoContext
Name MemberType Definition
---- ---------- ----------
Clone Method System.Object Clone()
Equals Method bool Equals(System.Object obj)
GetHashCode Method int GetHashCode()
GetType Method type GetType()
ToString Method string ToString()
DisplayPostContext Property System.String[] DisplayPostContext {get;set;}
DisplayPreContext Property System.String[] DisplayPreContext {get;set;}
PostContext Property System.String[] PostContext {get;set;}
PreContext Property System.String[] PreContext {get;set;}
PS C:\> C:\Python25\python.exe pdf-parser.py CLKF.pdf |
Select-String -Pattern "(?<=^obj\s)[0-9]+" -Context 0,2 |
ForEach-Object { $objnum = $_.matches[0].Value; $_.Context.PostContext[1] }
Referencing: 6 0 R
Referencing:
Referencing: 16 0 R
Referencing:
Referencing: 25 0 R
...
PS C:\> C:\Python25\python.exe pdf-parser.py CLKF.pdf |
Select-String -Pattern "(?<=^obj\s)[0-9]+" -Context 0,2 |
% { $objnum = $_.matches[0].Value; $_.Context.PostContext[1] |
Select-String "(\d+(?=\s0\sR))" -AllMatches | Select-Object -ExpandProperty matches |
ForEach-Object { "$($objnum)->$($_.Value)" } }
5 -> 6;
...
4 -> 3;
4 -> 11;
4 -> 12;
4 -> 13;
4 -> 5;
...
Click to Open Code Editor