| 45 | } |
| 46 | |
| 47 | func (cmd *Extract) Run() error { |
| 48 | if cmd.Input == "" { |
| 49 | return argp.ShowUsage |
| 50 | } |
| 51 | |
| 52 | f, err := os.Open(cmd.Input) |
| 53 | if err != nil { |
| 54 | return err |
| 55 | } |
| 56 | |
| 57 | pdf, err := NewPDFReader(f, cmd.Password) |
| 58 | if err != nil { |
| 59 | return err |
| 60 | } |
| 61 | |
| 62 | if cmd.Info { |
| 63 | fmt.Println("File name:", filepath.Base(cmd.Input)) |
| 64 | fmt.Println("Pages:", len(pdf.kids)) |
| 65 | if _, ok := pdf.trailer["Encrypt"]; ok { |
| 66 | fmt.Println("Encrypted: yes") |
| 67 | } else { |
| 68 | fmt.Println("Encrypted: no") |
| 69 | } |
| 70 | fmt.Println(pdf.GetInfo()) |
| 71 | return nil |
| 72 | } |
| 73 | |
| 74 | names, objects := getObjects(pdf, cmd.Page) |
| 75 | for i, obj := range objects { |
| 76 | if i == 0 { |
| 77 | fmt.Printf("Page %d:\n", cmd.Page) |
| 78 | } else { |
| 79 | fmt.Printf("\nXObject %s:\n", names[i]) |
| 80 | } |
| 81 | err = walkStrings(pdf, obj, func(index int, ops []textOperator, state textState) (int, error) { |
| 82 | var s string |
| 83 | op, vals := ops[0].Op, ops[0].Vals |
| 84 | if ops[0].Op == "Td" { |
| 85 | op, vals = ops[1].Op, ops[1].Vals |
| 86 | } |
| 87 | if op == "TJ" && len(vals) == 1 { |
| 88 | if array, ok := vals[0].(pdfArray); ok { |
| 89 | for _, item := range array { |
| 90 | if val, ok := item.([]byte); ok { |
| 91 | s += state.fonts[state.fontName].ToUnicode(val) |
| 92 | } |
| 93 | } |
| 94 | } |
| 95 | } else if (op == "Tj" || op == "'") && len(vals) == 1 { |
| 96 | if str, ok := vals[0].([]byte); ok { |
| 97 | s = state.fonts[state.fontName].ToUnicode(str) |
| 98 | } |
| 99 | } else if op == "\"" && len(vals) == 3 { |
| 100 | if str, ok := vals[2].([]byte); ok { |
| 101 | s = state.fonts[state.fontName].ToUnicode(str) |
| 102 | } |
| 103 | } |
| 104 | //if names[i] != "" { |