I have a simplified C code witch can extract text from pdf, and write into a file in XML format. The code is based on MuPDF library.
/*
* mudraw -- command line tool simplified
*/
#include "mupdf/fitz.h"
#include "mupdf/pdf.h"
int main(int argc, char **argv)
{
fz_document *doc = NULL;
fz_context *ctx;
fz_page *page;
fz_device *dev = NULL;
fz_text_sheet *sheet = NULL;
fz_text_page *text = NULL;
fz_cookie cookie = { 0 };
fz_output *out = NULL;
char buf[512];
FILE *file;
ctx = fz_new_context(NULL, NULL, FZ_STORE_DEFAULT);
doc = fz_open_document(ctx, "c:\test\test.pdf");
page = fz_load_page(doc, 0);
sheet = fz_new_text_sheet(ctx);
text = fz_new_text_page(ctx);
dev = fz_new_text_device(ctx, sheet, text);
fz_run_page(doc, page, dev, &fz_identity, &cookie);
fz_free_device(dev);
file = fopen(buf, "c:\test\test.xml");
out = fz_new_output_with_file(ctx, file);
fz_print_text_page_xml(ctx, out, text);
fz_free_text_page(ctx, text);
fz_free_text_sheet(ctx, sheet);
fz_free_page(doc, page);
fz_close_document(doc);
fz_close_output(out);
fz_free_context(ctx);
fclose(file);
}
My question is how can I implement this in C# with P/Invoke?
I tried the following C# code, but it does'n work. Can anybody help me?
using System.IO;
using System;
using System.Runtime.InteropServices;
using Test.Helpers;
namespace Test
{
public static class Test
{
const uint FZ_STORE_DEFAULT = 256 << 20;
public static void CopyTextPage()
{
const float zoomFactor = 1.0f;
var ctm = new Matrix();
var currentDpi = DpiHelpers.GetCurrentDpi();
var zoomX = zoomFactor * (currentDpi.HorizontalDpi / DpiHelpers.DEFAULT_DPI);
var zoomY = zoomFactor * (currentDpi.VerticalDpi / DpiHelpers.DEFAULT_DPI);
ctm.A = zoomX;
ctm.D = zoomY;
var context = NativeMethods.NewContext(IntPtr.Zero, IntPtr.Zero, FZ_STORE_DEFAULT);
var stream = NativeMethods.OpenFile(context, @"c:\test\test.pdf");
var document = NativeMethods.OpenDocumentStream(context, ".pdf", stream);
var page = NativeMethods.LoadPage(document, 0);
using (var filestream = File.Create(@"C:\test\test.xml"))
{
var text = NativeMethods.NewTextPage(context);
var sheet = NativeMethods.NewTextSheet(context);
var dev = NativeMethods.NewTextDevice(context, sheet, text);
if (filestream.SafeFileHandle != null)
{
var header = filestream.SafeFileHandle.DangerousGetHandle();
var output = NativeMethods.NewOutputFile(context, header);
NativeMethods.RunPage(document, page, dev, ctm, IntPtr.Zero);
NativeMethods.PrintTextPage(context, output, text);
NativeMethods.FreeOutputFile(output);
}
NativeMethods.FreeDevice(dev);
NativeMethods.FreeTextSheet(context, sheet);
NativeMethods.FreeTextPage(context, text);
}
NativeMethods.FreePage(document, page);
NativeMethods.CloseDocument(document);
NativeMethods.CloseStream(stream);
NativeMethods.FreeContext(context);
}
private static class NativeMethods
{
const string DLL = "libmupdf.dll";
[DllImport(DLL, EntryPoint = "fz_new_context", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr NewContext(IntPtr alloc, IntPtr locks, uint max_store);
[DllImport(DLL, EntryPoint = "fz_free_context", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr FreeContext(IntPtr ctx);
[DllImport(DLL, EntryPoint = "fz_open_file_w", CharSet = CharSet.Unicode, CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr OpenFile(IntPtr ctx, string fileName);
[DllImport(DLL, EntryPoint = "fz_open_document_with_stream", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr OpenDocumentStream(IntPtr ctx, string magic, IntPtr stm);
[DllImport(DLL, EntryPoint = "fz_close", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr CloseStream(IntPtr stm);
[DllImport(DLL, EntryPoint = "fz_close_document", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr CloseDocument(IntPtr doc);
[DllImport(DLL, EntryPoint = "fz_free_device", CallingConvention = CallingConvention.Cdecl)]
public static extern void FreeDevice(IntPtr dev);
[DllImport(DLL, EntryPoint = "fz_free_page", CallingConvention = CallingConvention.Cdecl)]
public static extern void FreePage(IntPtr doc, IntPtr page);
[DllImport(DLL, EntryPoint = "fz_load_page", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr LoadPage(IntPtr doc, int pageNumber);
[DllImport(DLL, EntryPoint = "fz_run_page", CallingConvention = CallingConvention.Cdecl)]
public static extern void RunPage(IntPtr doc, IntPtr page, IntPtr dev, Matrix transform, IntPtr cookie);
[DllImport(DLL, EntryPoint = "fz_new_text_sheet", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr NewTextSheet(IntPtr ctx);
[DllImport(DLL, EntryPoint = "fz_free_text_sheet", CallingConvention = CallingConvention.Cdecl)]
public static extern void FreeTextSheet(IntPtr ctx, IntPtr sheet);
[DllImport(DLL, EntryPoint = "fz_new_text_page", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr NewTextPage(IntPtr ctx);
[DllImport(DLL, EntryPoint = "fz_free_text_page", CallingConvention = CallingConvention.Cdecl)]
public static extern void FreeTextPage(IntPtr ctx, IntPtr page);
[DllImport(DLL, EntryPoint = "fz_new_text_device", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr NewTextDevice(IntPtr ctx, IntPtr sheet, IntPtr page);
[DllImport(DLL, EntryPoint = "fz_print_text_page_xml", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr PrintTextPage(IntPtr ctx, IntPtr file, IntPtr page);
[DllImport(DLL, EntryPoint = "fz_new_output_with_file", CallingConvention = CallingConvention.Cdecl)]
public static extern IntPtr NewOutputFile(IntPtr ctx, IntPtr file);
[DllImport(DLL, EntryPoint = "fz_close_output", CallingConvention = CallingConvention.Cdecl)]
public static extern void FreeOutputFile(IntPtr file);
}
}
internal struct Matrix
{
public float A, B, C, D, E, F;
}
}
|